mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-14 14:32:37 +02:00
kernels are now consistent with pep8 and common reason
This commit is contained in:
parent
bbca026a21
commit
6ee8732cf4
29 changed files with 47 additions and 75 deletions
65
GPy/kern/parts/Brownian.py
Normal file
65
GPy/kern/parts/Brownian.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
|
||||
def theta(x):
|
||||
"""Heavisdie step function"""
|
||||
return np.where(x>=0.,1.,0.)
|
||||
|
||||
class Brownian(Kernpart):
|
||||
"""
|
||||
Brownian Motion kernel.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance:
|
||||
:type variance: float
|
||||
"""
|
||||
def __init__(self,input_dim,variance=1.):
|
||||
self.input_dim = input_dim
|
||||
assert self.input_dim==1, "Brownian motion in 1D only"
|
||||
self.num_params = 1
|
||||
self.name = 'Brownian'
|
||||
self._set_params(np.array([variance]).flatten())
|
||||
|
||||
def _get_params(self):
|
||||
return self.variance
|
||||
|
||||
def _set_params(self,x):
|
||||
assert x.shape==(1,)
|
||||
self.variance = x
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['variance']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
target += self.variance*np.fmin(X,X2.T)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
target += self.variance*X.flatten()
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
target += np.sum(np.fmin(X,X2.T)*dL_dK)
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
target += np.dot(X.flatten(), dL_dKdiag)
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
raise NotImplementedError, "TODO"
|
||||
#target += self.variance
|
||||
#target -= self.variance*theta(X-X2.T)
|
||||
#if X.shape==X2.shape:
|
||||
#if np.all(X==X2):
|
||||
#np.add(target[:,:,0],self.variance*np.diag(X2.flatten()-X.flatten()),target[:,:,0])
|
||||
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
target += self.variance*dL_dKdiag[:,None]
|
||||
|
||||
135
GPy/kern/parts/Matern32.py
Normal file
135
GPy/kern/parts/Matern32.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from scipy import integrate
|
||||
|
||||
class Matern32(Kernpart):
|
||||
"""
|
||||
Matern 3/2 kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type variance: float
|
||||
:param lengthscale: the vector of lengthscale :math:`\ell_i`
|
||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False):
|
||||
self.input_dim = input_dim
|
||||
self.ARD = ARD
|
||||
if ARD == False:
|
||||
self.num_params = 2
|
||||
self.name = 'Mat32'
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
||||
else:
|
||||
lengthscale = np.ones(1)
|
||||
else:
|
||||
self.num_params = self.input_dim + 1
|
||||
self.name = 'Mat32'
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
|
||||
else:
|
||||
lengthscale = np.ones(self.input_dim)
|
||||
self._set_params(np.hstack((variance, lengthscale.flatten())))
|
||||
|
||||
def _get_params(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.variance, self.lengthscale))
|
||||
|
||||
def _set_params(self, x):
|
||||
"""set the value of the parameters."""
|
||||
assert x.size == self.num_params
|
||||
self.variance = x[0]
|
||||
self.lengthscale = x[1:]
|
||||
|
||||
def _get_param_names(self):
|
||||
"""return parameter names."""
|
||||
if self.num_params == 2:
|
||||
return ['variance', 'lengthscale']
|
||||
else:
|
||||
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
|
||||
|
||||
def K(self, X, X2, target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
||||
np.add(self.variance * (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist), target, target)
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
np.add(target, self.variance, target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
||||
dvar = (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist)
|
||||
invdist = 1. / np.where(dist != 0., dist, np.inf)
|
||||
dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3
|
||||
# dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
|
||||
target[0] += np.sum(dvar * dL_dK)
|
||||
if self.ARD == True:
|
||||
dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist))[:, :, np.newaxis] * dist2M * invdist[:, :, np.newaxis]
|
||||
# dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
|
||||
target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0)
|
||||
else:
|
||||
dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist)) * dist2M.sum(-1) * invdist
|
||||
# dl = self.variance*dvar*dist2M.sum(-1)*invdist
|
||||
target[1] += np.sum(dl * dL_dK)
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
||||
def Gram_matrix(self, F, F1, F2, lower, upper):
|
||||
"""
|
||||
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
|
||||
|
||||
:param F: vector of functions
|
||||
:type F: np.array
|
||||
:param F1: vector of derivatives of F
|
||||
:type F1: np.array
|
||||
:param F2: vector of second derivatives of F
|
||||
:type F2: np.array
|
||||
:param lower,upper: boundaries of the input domain
|
||||
:type lower,upper: floats
|
||||
"""
|
||||
assert self.input_dim == 1
|
||||
def L(x, i):
|
||||
return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x))
|
||||
n = F.shape[0]
|
||||
G = np.zeros((n, n))
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
|
||||
Flower = np.array([f(lower) for f in F])[:, None]
|
||||
F1lower = np.array([f(lower) for f in F1])[:, None]
|
||||
# print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
|
||||
# return(G)
|
||||
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))
|
||||
142
GPy/kern/parts/Matern52.py
Normal file
142
GPy/kern/parts/Matern52.py
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
from scipy import integrate
|
||||
|
||||
class Matern52(Kernpart):
|
||||
"""
|
||||
Matern 5/2 kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type variance: float
|
||||
:param lengthscale: the vector of lengthscale :math:`\ell_i`
|
||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
def __init__(self,input_dim,variance=1.,lengthscale=None,ARD=False):
|
||||
self.input_dim = input_dim
|
||||
self.ARD = ARD
|
||||
if ARD == False:
|
||||
self.num_params = 2
|
||||
self.name = 'Mat52'
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
||||
else:
|
||||
lengthscale = np.ones(1)
|
||||
else:
|
||||
self.num_params = self.input_dim + 1
|
||||
self.name = 'Mat52'
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
|
||||
else:
|
||||
lengthscale = np.ones(self.input_dim)
|
||||
self._set_params(np.hstack((variance,lengthscale.flatten())))
|
||||
|
||||
def _get_params(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.variance,self.lengthscale))
|
||||
|
||||
def _set_params(self,x):
|
||||
"""set the value of the parameters."""
|
||||
assert x.size == self.num_params
|
||||
self.variance = x[0]
|
||||
self.lengthscale = x[1:]
|
||||
|
||||
def _get_param_names(self):
|
||||
"""return parameter names."""
|
||||
if self.num_params == 2:
|
||||
return ['variance','lengthscale']
|
||||
else:
|
||||
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
|
||||
|
||||
def K(self,X,X2,target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
|
||||
np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
np.add(target,self.variance,target)
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
|
||||
invdist = 1./np.where(dist!=0.,dist,np.inf)
|
||||
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
|
||||
dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
|
||||
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
|
||||
target[0] += np.sum(dvar*dL_dK)
|
||||
if self.ARD:
|
||||
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
|
||||
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
|
||||
target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
|
||||
else:
|
||||
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist
|
||||
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
|
||||
target[1] += np.sum(dl*dL_dK)
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
|
||||
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
|
||||
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
|
||||
target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
pass
|
||||
|
||||
def Gram_matrix(self,F,F1,F2,F3,lower,upper):
|
||||
"""
|
||||
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
|
||||
|
||||
:param F: vector of functions
|
||||
:type F: np.array
|
||||
:param F1: vector of derivatives of F
|
||||
:type F1: np.array
|
||||
:param F2: vector of second derivatives of F
|
||||
:type F2: np.array
|
||||
:param F3: vector of third derivatives of F
|
||||
:type F3: np.array
|
||||
:param lower,upper: boundaries of the input domain
|
||||
:type lower,upper: floats
|
||||
"""
|
||||
assert self.input_dim == 1
|
||||
def L(x,i):
|
||||
return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x))
|
||||
n = F.shape[0]
|
||||
G = np.zeros((n,n))
|
||||
for i in range(n):
|
||||
for j in range(i,n):
|
||||
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
|
||||
G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5))
|
||||
Flower = np.array([f(lower) for f in F])[:,None]
|
||||
F1lower = np.array([f(lower) for f in F1])[:,None]
|
||||
F2lower = np.array([f(lower) for f in F2])[:,None]
|
||||
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T)
|
||||
orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
|
||||
return(1./self.variance* (G_coef*G + orig + orig2))
|
||||
|
||||
|
||||
|
||||
89
GPy/kern/parts/bias.py
Normal file
89
GPy/kern/parts/bias.py
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
|
||||
class Bias(Kernpart):
|
||||
def __init__(self,input_dim,variance=1.):
|
||||
"""
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
"""
|
||||
self.input_dim = input_dim
|
||||
self.num_params = 1
|
||||
self.name = 'bias'
|
||||
self._set_params(np.array([variance]).flatten())
|
||||
|
||||
def _get_params(self):
|
||||
return self.variance
|
||||
|
||||
def _set_params(self,x):
|
||||
assert x.shape==(1,)
|
||||
self.variance = x
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['variance']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
target += self.variance
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
target += self.variance
|
||||
|
||||
def dK_dtheta(self,dL_dKdiag,X,X2,target):
|
||||
target += dL_dKdiag.sum()
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
target += dL_dKdiag.sum()
|
||||
|
||||
def dK_dX(self, dL_dK,X, X2, target):
|
||||
pass
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
pass
|
||||
|
||||
#---------------------------------------#
|
||||
# PSI statistics #
|
||||
#---------------------------------------#
|
||||
|
||||
def psi0(self, Z, mu, S, target):
|
||||
target += self.variance
|
||||
|
||||
def psi1(self, Z, mu, S, target):
|
||||
self._psi1 = self.variance
|
||||
target += self._psi1
|
||||
|
||||
def psi2(self, Z, mu, S, target):
|
||||
target += self.variance**2
|
||||
|
||||
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
|
||||
target += dL_dpsi0.sum()
|
||||
|
||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
|
||||
target += dL_dpsi1.sum()
|
||||
|
||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
|
||||
target += 2.*self.variance*dL_dpsi2.sum()
|
||||
|
||||
def dpsi0_dZ(self, dL_dpsi0, Z, mu, S, target):
|
||||
pass
|
||||
|
||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
||||
pass
|
||||
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||
pass
|
||||
|
||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||
pass
|
||||
|
||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
||||
pass
|
||||
|
||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
||||
pass
|
||||
142
GPy/kern/parts/coregionalise.py
Normal file
142
GPy/kern/parts/coregionalise.py
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
# Copyright (c) 2012, James Hensman and Ricardo Andrade
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from GPy.util.linalg import mdot, pdinv
|
||||
import pdb
|
||||
from scipy import weave
|
||||
|
||||
class Coregionalise(Kernpart):
|
||||
"""
|
||||
Kernel for Intrinsic Corregionalization Models
|
||||
"""
|
||||
def __init__(self,Nout,R=1, W=None, kappa=None):
|
||||
self.input_dim = 1
|
||||
self.name = 'coregion'
|
||||
self.Nout = Nout
|
||||
self.R = R
|
||||
if W is None:
|
||||
self.W = np.ones((self.Nout,self.R))
|
||||
else:
|
||||
assert W.shape==(self.Nout,self.R)
|
||||
self.W = W
|
||||
if kappa is None:
|
||||
kappa = np.ones(self.Nout)
|
||||
else:
|
||||
assert kappa.shape==(self.Nout,)
|
||||
self.kappa = kappa
|
||||
self.num_params = self.Nout*(self.R + 1)
|
||||
self._set_params(np.hstack([self.W.flatten(),self.kappa]))
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack([self.W.flatten(),self.kappa])
|
||||
|
||||
def _set_params(self,x):
|
||||
assert x.size == self.num_params
|
||||
self.kappa = x[-self.Nout:]
|
||||
self.W = x[:-self.Nout].reshape(self.Nout,self.R)
|
||||
self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa)
|
||||
|
||||
def _get_param_names(self):
|
||||
return sum([['W%i_%i'%(i,j) for j in range(self.R)] for i in range(self.Nout)],[]) + ['kappa_%i'%i for i in range(self.Nout)]
|
||||
|
||||
def K(self,index,index2,target):
|
||||
index = np.asarray(index,dtype=np.int)
|
||||
|
||||
#here's the old code (numpy)
|
||||
#if index2 is None:
|
||||
#index2 = index
|
||||
#else:
|
||||
#index2 = np.asarray(index2,dtype=np.int)
|
||||
#false_target = target.copy()
|
||||
#ii,jj = np.meshgrid(index,index2)
|
||||
#ii,jj = ii.T, jj.T
|
||||
#false_target += self.B[ii,jj]
|
||||
|
||||
if index2 is None:
|
||||
code="""
|
||||
for(int i=0;i<N; i++){
|
||||
target[i+i*N] += B[index[i]+Nout*index[i]];
|
||||
for(int j=0; j<i; j++){
|
||||
target[j+i*N] += B[index[i]+Nout*index[j]];
|
||||
target[i+j*N] += target[j+i*N];
|
||||
}
|
||||
}
|
||||
"""
|
||||
N,B,Nout = index.size, self.B, self.Nout
|
||||
weave.inline(code,['target','index','N','B','Nout'])
|
||||
else:
|
||||
index2 = np.asarray(index2,dtype=np.int)
|
||||
code="""
|
||||
for(int i=0;i<num_inducing; i++){
|
||||
for(int j=0; j<N; j++){
|
||||
target[i+j*num_inducing] += B[Nout*index[j]+index2[i]];
|
||||
}
|
||||
}
|
||||
"""
|
||||
N,num_inducing,B,Nout = index.size,index2.size, self.B, self.Nout
|
||||
weave.inline(code,['target','index','index2','N','num_inducing','B','Nout'])
|
||||
|
||||
|
||||
def Kdiag(self,index,target):
|
||||
target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
|
||||
|
||||
def dK_dtheta(self,dL_dK,index,index2,target):
|
||||
index = np.asarray(index,dtype=np.int)
|
||||
dL_dK_small = np.zeros_like(self.B)
|
||||
if index2 is None:
|
||||
index2 = index
|
||||
else:
|
||||
index2 = np.asarray(index2,dtype=np.int)
|
||||
|
||||
code="""
|
||||
for(int i=0; i<num_inducing; i++){
|
||||
for(int j=0; j<N; j++){
|
||||
dL_dK_small[index[j] + Nout*index2[i]] += dL_dK[i+j*num_inducing];
|
||||
}
|
||||
}
|
||||
"""
|
||||
N, num_inducing, Nout = index.size, index2.size, self.Nout
|
||||
weave.inline(code, ['N','num_inducing','Nout','dL_dK','dL_dK_small','index','index2'])
|
||||
|
||||
dkappa = np.diag(dL_dK_small)
|
||||
dL_dK_small += dL_dK_small.T
|
||||
dW = (self.W[:,None,:]*dL_dK_small[:,:,None]).sum(0)
|
||||
|
||||
target += np.hstack([dW.flatten(),dkappa])
|
||||
|
||||
def dK_dtheta_old(self,dL_dK,index,index2,target):
|
||||
if index2 is None:
|
||||
index2 = index
|
||||
else:
|
||||
index2 = np.asarray(index2,dtype=np.int)
|
||||
ii,jj = np.meshgrid(index,index2)
|
||||
ii,jj = ii.T, jj.T
|
||||
|
||||
dL_dK_small = np.zeros_like(self.B)
|
||||
for i in range(self.Nout):
|
||||
for j in range(self.Nout):
|
||||
tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
|
||||
dL_dK_small[i,j] = tmp
|
||||
|
||||
dkappa = np.diag(dL_dK_small)
|
||||
dL_dK_small += dL_dK_small.T
|
||||
dW = (self.W[:,None,:]*dL_dK_small[:,:,None]).sum(0)
|
||||
|
||||
target += np.hstack([dW.flatten(),dkappa])
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,index,target):
|
||||
index = np.asarray(index,dtype=np.int).flatten()
|
||||
dL_dKdiag_small = np.zeros(self.Nout)
|
||||
for i in range(self.Nout):
|
||||
dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
|
||||
dW = 2.*self.W*dL_dKdiag_small[:,None]
|
||||
dkappa = dL_dKdiag_small
|
||||
target += np.hstack([dW.flatten(),dkappa])
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
127
GPy/kern/parts/exponential.py
Normal file
127
GPy/kern/parts/exponential.py
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from scipy import integrate
|
||||
|
||||
class Exponential(Kernpart):
|
||||
"""
|
||||
Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2)
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp(- r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type variance: float
|
||||
:param lengthscale: the vector of lengthscale :math:`\ell_i`
|
||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False):
|
||||
self.input_dim = input_dim
|
||||
self.ARD = ARD
|
||||
if ARD == False:
|
||||
self.num_params = 2
|
||||
self.name = 'exp'
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
||||
else:
|
||||
lengthscale = np.ones(1)
|
||||
else:
|
||||
self.num_params = self.input_dim + 1
|
||||
self.name = 'exp'
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
|
||||
else:
|
||||
lengthscale = np.ones(self.input_dim)
|
||||
self._set_params(np.hstack((variance, lengthscale.flatten())))
|
||||
|
||||
def _get_params(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.variance, self.lengthscale))
|
||||
|
||||
def _set_params(self, x):
|
||||
"""set the value of the parameters."""
|
||||
assert x.size == self.num_params
|
||||
self.variance = x[0]
|
||||
self.lengthscale = x[1:]
|
||||
|
||||
def _get_param_names(self):
|
||||
"""return parameter names."""
|
||||
if self.num_params == 2:
|
||||
return ['variance', 'lengthscale']
|
||||
else:
|
||||
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
|
||||
|
||||
def K(self, X, X2, target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
||||
np.add(self.variance * np.exp(-dist), target, target)
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
np.add(target, self.variance, target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
||||
invdist = 1. / np.where(dist != 0., dist, np.inf)
|
||||
dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3
|
||||
dvar = np.exp(-dist)
|
||||
target[0] += np.sum(dvar * dL_dK)
|
||||
if self.ARD == True:
|
||||
dl = self.variance * dvar[:, :, None] * dist2M * invdist[:, :, None]
|
||||
target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0)
|
||||
else:
|
||||
dl = self.variance * dvar * dist2M.sum(-1) * invdist
|
||||
target[1] += np.sum(dl * dL_dK)
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
||||
# NB: derivative of diagonal elements wrt lengthscale is 0
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
||||
def Gram_matrix(self, F, F1, lower, upper):
|
||||
"""
|
||||
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
|
||||
|
||||
:param F: vector of functions
|
||||
:type F: np.array
|
||||
:param F1: vector of derivatives of F
|
||||
:type F1: np.array
|
||||
:param lower,upper: boundaries of the input domain
|
||||
:type lower,upper: floats
|
||||
"""
|
||||
assert self.input_dim == 1
|
||||
def L(x, i):
|
||||
return(1. / self.lengthscale * F[i](x) + F1[i](x))
|
||||
n = F.shape[0]
|
||||
G = np.zeros((n, n))
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
|
||||
Flower = np.array([f(lower) for f in F])[:, None]
|
||||
return(self.lengthscale / 2. / self.variance * G + 1. / self.variance * np.dot(Flower, Flower.T))
|
||||
74
GPy/kern/parts/finite_dimensional.py
Normal file
74
GPy/kern/parts/finite_dimensional.py
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from ...util.linalg import pdinv,mdot
|
||||
|
||||
class FiniteDimensional(Kernpart):
|
||||
def __init__(self, input_dim, F, G, variance=1., weights=None):
|
||||
"""
|
||||
Argumnents
|
||||
----------
|
||||
input_dim: int - the number of input dimensions
|
||||
F: np.array of functions with shape (n,) - the n basis functions
|
||||
G: np.array with shape (n,n) - the Gram matrix associated to F
|
||||
weights : np.ndarray with shape (n,)
|
||||
"""
|
||||
self.input_dim = input_dim
|
||||
self.F = F
|
||||
self.G = G
|
||||
self.G_1 ,L,Li,logdet = pdinv(G)
|
||||
self.n = F.shape[0]
|
||||
if weights is not None:
|
||||
assert weights.shape==(self.n,)
|
||||
else:
|
||||
weights = np.ones(self.n)
|
||||
self.num_params = self.n + 1
|
||||
self.name = 'finite_dim'
|
||||
self._set_params(np.hstack((variance,weights)))
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.variance,self.weights))
|
||||
def _set_params(self,x):
|
||||
assert x.size == (self.num_params)
|
||||
self.variance = x[0]
|
||||
self.weights = x[1:]
|
||||
def _get_param_names(self):
|
||||
if self.n==1:
|
||||
return ['variance','weight']
|
||||
else:
|
||||
return ['variance']+['weight_%i'%i for i in range(self.weights.size)]
|
||||
|
||||
def K(self,X,X2,target):
|
||||
if X2 is None: X2 = X
|
||||
FX = np.column_stack([f(X) for f in self.F])
|
||||
FX2 = np.column_stack([f(X2) for f in self.F])
|
||||
product = self.variance * mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
|
||||
np.add(product,target,target)
|
||||
def Kdiag(self,X,target):
|
||||
product = np.diag(self.K(X, X))
|
||||
np.add(target,product,target)
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
"""Return shape is NxMx(Ntheta)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = np.column_stack([f(X) for f in self.F])
|
||||
FX2 = np.column_stack([f(X2) for f in self.F])
|
||||
DER = np.zeros((self.n,self.n,self.n))
|
||||
for i in range(self.n):
|
||||
DER[i,i,i] = np.sqrt(self.weights[i])
|
||||
dw = self.variance * mdot(FX,DER,self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
|
||||
dv = mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
|
||||
np.add(target[:,:,0],np.transpose(dv,(0,2,1)), target[:,:,0])
|
||||
np.add(target[:,:,1:],np.transpose(dw,(0,2,1)), target[:,:,1:])
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
np.add(target[:,0],1.,target[:,0])
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
41
GPy/kern/parts/fixed.py
Normal file
41
GPy/kern/parts/fixed.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
|
||||
class Fixed(Kernpart):
|
||||
def __init__(self, input_dim, K, variance=1.):
|
||||
"""
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
"""
|
||||
self.input_dim = input_dim
|
||||
self.fixed_K = K
|
||||
self.num_params = 1
|
||||
self.name = 'fixed'
|
||||
self._set_params(np.array([variance]).flatten())
|
||||
|
||||
def _get_params(self):
|
||||
return self.variance
|
||||
|
||||
def _set_params(self, x):
|
||||
assert x.shape == (1,)
|
||||
self.variance = x
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['variance']
|
||||
|
||||
def K(self, X, X2, target):
|
||||
target += self.variance * self.fixed_K
|
||||
|
||||
def dK_dtheta(self, partial, X, X2, target):
|
||||
target += (partial * self.fixed_K).sum()
|
||||
|
||||
def dK_dX(self, partial, X, X2, target):
|
||||
pass
|
||||
|
||||
def dKdiag_dX(self, partial, X, target):
|
||||
pass
|
||||
97
GPy/kern/parts/independent_outputs.py
Normal file
97
GPy/kern/parts/independent_outputs.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
# Copyright (c) 2012, James Hesnsman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
|
||||
def index_to_slices(index):
|
||||
"""
|
||||
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
|
||||
|
||||
e.g.
|
||||
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
|
||||
returns
|
||||
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
|
||||
|
||||
or, a more complicated example
|
||||
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
|
||||
returns
|
||||
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
|
||||
"""
|
||||
|
||||
#contruct the return structure
|
||||
ind = np.asarray(index,dtype=np.int64)
|
||||
ret = [[] for i in range(ind.max()+1)]
|
||||
|
||||
#find the switchpoints
|
||||
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
|
||||
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
|
||||
|
||||
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
|
||||
return ret
|
||||
|
||||
class IndependentOutputs(Kernpart):
|
||||
"""
|
||||
A kernel part shich can reopresent several independent functions.
|
||||
this kernel 'switches off' parts of the matrix where the output indexes are different.
|
||||
|
||||
The index of the functions is given by the last column in the input X
|
||||
the rest of the columns of X are passed to the kernel for computation (in blocks).
|
||||
|
||||
"""
|
||||
def __init__(self,k):
|
||||
self.input_dim = k.input_dim + 1
|
||||
self.num_params = k.num_params
|
||||
self.name = 'iops('+ k.name + ')'
|
||||
self.k = k
|
||||
|
||||
def _get_params(self):
|
||||
return self.k._get_params()
|
||||
|
||||
def _set_params(self,x):
|
||||
self.k._set_params(x)
|
||||
self.params = x
|
||||
|
||||
def _get_param_names(self):
|
||||
return self.k._get_param_names()
|
||||
|
||||
def K(self,X,X2,target):
|
||||
#Sort out the slices from the input data
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
else:
|
||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
|
||||
[[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
else:
|
||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
else:
|
||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
|
||||
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]
|
||||
56
GPy/kern/parts/kernpart.py
Normal file
56
GPy/kern/parts/kernpart.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
class Kernpart(object):
|
||||
def __init__(self,input_dim):
|
||||
"""
|
||||
The base class for a kernpart: a positive definite function which forms part of a kernel
|
||||
|
||||
:param input_dim: the number of input dimensions to the function
|
||||
:type input_dim: int
|
||||
|
||||
Do not instantiate.
|
||||
"""
|
||||
self.input_dim = input_dim
|
||||
self.num_params = 1
|
||||
self.name = 'unnamed'
|
||||
|
||||
def _get_params(self):
|
||||
raise NotImplementedError
|
||||
def _set_params(self,x):
|
||||
raise NotImplementedError
|
||||
def _get_param_names(self):
|
||||
raise NotImplementedError
|
||||
def K(self,X,X2,target):
|
||||
raise NotImplementedError
|
||||
def Kdiag(self,X,target):
|
||||
raise NotImplementedError
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
raise NotImplementedError
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
raise NotImplementedError
|
||||
def psi0(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def psi1(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi1_dtheta(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def psi2(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def dK_dX(self,X,X2,target):
|
||||
raise NotImplementedError
|
||||
298
GPy/kern/parts/linear.py
Normal file
298
GPy/kern/parts/linear.py
Normal file
|
|
@ -0,0 +1,298 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from ...util.linalg import tdot
|
||||
from scipy import weave
|
||||
|
||||
class Linear(Kernpart):
|
||||
"""
|
||||
Linear kernel
|
||||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variances: the vector of variances :math:`\sigma^2_i`
|
||||
:type variances: array or list of the appropriate size (or float if there is only one variance parameter)
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variances=None, ARD=False):
|
||||
self.input_dim = input_dim
|
||||
self.ARD = ARD
|
||||
if ARD == False:
|
||||
self.num_params = 1
|
||||
self.name = 'linear'
|
||||
if variances is not None:
|
||||
variances = np.asarray(variances)
|
||||
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
|
||||
else:
|
||||
variances = np.ones(1)
|
||||
self._Xcache, self._X2cache = np.empty(shape=(2,))
|
||||
else:
|
||||
self.num_params = self.input_dim
|
||||
self.name = 'linear'
|
||||
if variances is not None:
|
||||
variances = np.asarray(variances)
|
||||
assert variances.size == self.input_dim, "bad number of lengthscales"
|
||||
else:
|
||||
variances = np.ones(self.input_dim)
|
||||
self._set_params(variances.flatten())
|
||||
|
||||
# initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||
|
||||
def _get_params(self):
|
||||
return self.variances
|
||||
|
||||
def _set_params(self, x):
|
||||
assert x.size == (self.num_params)
|
||||
self.variances = x
|
||||
self.variances2 = np.square(self.variances)
|
||||
|
||||
def _get_param_names(self):
|
||||
if self.num_params == 1:
|
||||
return ['variance']
|
||||
else:
|
||||
return ['variance_%i' % i for i in range(self.variances.size)]
|
||||
|
||||
def K(self, X, X2, target):
|
||||
if self.ARD:
|
||||
XX = X * np.sqrt(self.variances)
|
||||
if X2 is None:
|
||||
target += tdot(XX)
|
||||
else:
|
||||
XX2 = X2 * np.sqrt(self.variances)
|
||||
target += np.dot(XX, XX2.T)
|
||||
else:
|
||||
self._K_computations(X, X2)
|
||||
target += self.variances * self._dot_product
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
np.add(target, np.sum(self.variances * np.square(X), -1), target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
if self.ARD:
|
||||
if X2 is None:
|
||||
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)]
|
||||
else:
|
||||
product = X[:, None, :] * X2[None, :, :]
|
||||
target += (dL_dK[:, :, None] * product).sum(0).sum(0)
|
||||
else:
|
||||
self._K_computations(X, X2)
|
||||
target += np.sum(self._dot_product * dL_dK)
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
tmp = dL_dKdiag[:, None] * X ** 2
|
||||
if self.ARD:
|
||||
target += tmp.sum(0)
|
||||
else:
|
||||
target += tmp.sum()
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
target += 2.*self.variances*dL_dKdiag[:,None]*X
|
||||
|
||||
#---------------------------------------#
|
||||
# PSI statistics #
|
||||
#---------------------------------------#
|
||||
|
||||
def psi0(self, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
target += np.sum(self.variances * self.mu2_S, 1)
|
||||
|
||||
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = dL_dpsi0[:, None] * self.mu2_S
|
||||
if self.ARD:
|
||||
target += tmp.sum(0)
|
||||
else:
|
||||
target += tmp.sum()
|
||||
|
||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
||||
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
|
||||
target_S += dL_dpsi0[:, None] * self.variances
|
||||
|
||||
def psi1(self, Z, mu, S, target):
|
||||
"""the variance, it does nothing"""
|
||||
self._psi1 = self.K(mu, Z, target)
|
||||
|
||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
|
||||
"""the variance, it does nothing"""
|
||||
self.dK_dtheta(dL_dpsi1, mu, Z, target)
|
||||
|
||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||
"""Do nothing for S, it does not affect psi1"""
|
||||
self._psi_computations(Z, mu, S)
|
||||
target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
|
||||
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||
self.dK_dX(dL_dpsi1.T, Z, mu, target)
|
||||
|
||||
def psi2(self, Z, mu, S, target):
|
||||
"""
|
||||
returns N,num_inducing,num_inducing matrix
|
||||
"""
|
||||
self._psi_computations(Z, mu, S)
|
||||
# psi2_old = self.ZZ * np.square(self.variances) * self.mu2_S[:, None, None, :]
|
||||
# target += psi2.sum(-1)
|
||||
# slow way of doing it, but right
|
||||
# psi2_real = rm np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
|
||||
# for n in range(mu.shape[0]):
|
||||
# for m_prime in range(Z.shape[0]):
|
||||
# for m in range(Z.shape[0]):
|
||||
# tmp = self._Z[m:m + 1] * self.variances
|
||||
# tmp = np.dot(tmp, (tdot(self._mu[n:n + 1].T) + np.diag(S[n])))
|
||||
# psi2_real[n, m, m_prime] = np.dot(tmp, (
|
||||
# self._Z[m_prime:m_prime + 1] * self.variances).T)
|
||||
# mu2_S = (self._mu[:, None, :] * self._mu[:, :, None])
|
||||
# mu2_S[:, np.arange(self.input_dim), np.arange(self.input_dim)] += self._S
|
||||
# psi2 = (self.ZA[None, :, None, :] * mu2_S[:, None]).sum(-1)
|
||||
# psi2 = (psi2[:, :, None] * self.ZA[None, None]).sum(-1)
|
||||
# psi2_tensor = np.tensordot(self.ZZ[None, :, :, :] * np.square(self.variances), self.mu2_S[:, None, None, :], ((3), (3))).squeeze().T
|
||||
target += self._psi2
|
||||
|
||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
|
||||
if self.ARD:
|
||||
target += tmp.sum(0).sum(0).sum(0)
|
||||
else:
|
||||
target += tmp.sum()
|
||||
|
||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
||||
"""Think N,num_inducing,num_inducing,input_dim """
|
||||
self._psi_computations(Z, mu, S)
|
||||
AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :]
|
||||
AZZA = AZZA + AZZA.swapaxes(1, 2)
|
||||
AZZA_2 = AZZA/2.
|
||||
#muAZZA = np.tensordot(mu,AZZA,(-1,0))
|
||||
#target_mu_dummy, target_S_dummy = np.zeros_like(target_mu), np.zeros_like(target_S)
|
||||
#target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1)
|
||||
#target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1)
|
||||
|
||||
#Using weave, we can exploiut the symmetry of this problem:
|
||||
code = """
|
||||
int n, m, mm,q,qq;
|
||||
double factor,tmp;
|
||||
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
|
||||
for(n=0;n<N;n++){
|
||||
for(m=0;m<num_inducing;m++){
|
||||
for(mm=0;mm<=m;mm++){
|
||||
//add in a factor of 2 for the off-diagonal terms (and then count them only once)
|
||||
if(m==mm)
|
||||
factor = dL_dpsi2(n,m,mm);
|
||||
else
|
||||
factor = 2.0*dL_dpsi2(n,m,mm);
|
||||
|
||||
for(q=0;q<input_dim;q++){
|
||||
|
||||
//take the dot product of mu[n,:] and AZZA[:,m,mm,q] TODO: blas!
|
||||
tmp = 0.0;
|
||||
for(qq=0;qq<input_dim;qq++){
|
||||
tmp += mu(n,qq)*AZZA(qq,m,mm,q);
|
||||
}
|
||||
|
||||
target_mu(n,q) += factor*tmp;
|
||||
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
support_code = """
|
||||
#include <omp.h>
|
||||
#include <math.h>
|
||||
"""
|
||||
weave_options = {'headers' : ['<omp.h>'],
|
||||
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
||||
'extra_link_args' : ['-lgomp']}
|
||||
|
||||
N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
|
||||
type_converters=weave.converters.blitz,**weave_options)
|
||||
|
||||
|
||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
#psi2_dZ = dL_dpsi2[:, :, :, None] * self.variances * self.ZAinner[:, :, None, :]
|
||||
#dummy_target = np.zeros_like(target)
|
||||
#dummy_target += psi2_dZ.sum(0).sum(0)
|
||||
|
||||
AZA = self.variances*self.ZAinner
|
||||
code="""
|
||||
int n,m,mm,q;
|
||||
#pragma omp parallel for private(n,mm,q)
|
||||
for(m=0;m<num_inducing;m++){
|
||||
for(q=0;q<input_dim;q++){
|
||||
for(mm=0;mm<num_inducing;mm++){
|
||||
for(n=0;n<N;n++){
|
||||
target(m,q) += dL_dpsi2(n,m,mm)*AZA(n,mm,q);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
support_code = """
|
||||
#include <omp.h>
|
||||
#include <math.h>
|
||||
"""
|
||||
weave_options = {'headers' : ['<omp.h>'],
|
||||
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
||||
'extra_link_args' : ['-lgomp']}
|
||||
|
||||
N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
|
||||
type_converters=weave.converters.blitz,**weave_options)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#---------------------------------------#
|
||||
# Precomputations #
|
||||
#---------------------------------------#
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)):
|
||||
self._Xcache = X.copy()
|
||||
if X2 is None:
|
||||
self._dot_product = tdot(X)
|
||||
self._X2cache = None
|
||||
else:
|
||||
self._X2cache = X2.copy()
|
||||
self._dot_product = np.dot(X, X2.T)
|
||||
|
||||
def _psi_computations(self, Z, mu, S):
|
||||
# here are the "statistics" for psi1 and psi2
|
||||
Zv_changed = not (np.array_equal(Z, self._Z) and np.array_equal(self.variances, self._variances))
|
||||
muS_changed = not (np.array_equal(mu, self._mu) and np.array_equal(S, self._S))
|
||||
if Zv_changed:
|
||||
# Z has changed, compute Z specific stuff
|
||||
# self.ZZ = Z[:,None,:]*Z[None,:,:] # num_inducing,num_inducing,input_dim
|
||||
# self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F')
|
||||
# [tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])]
|
||||
self.ZA = Z * self.variances
|
||||
self._Z = Z.copy()
|
||||
self._variances = self.variances.copy()
|
||||
if muS_changed:
|
||||
self.mu2_S = np.square(mu) + S
|
||||
self.inner = (mu[:, None, :] * mu[:, :, None])
|
||||
diag_indices = np.diag_indices(mu.shape[1], 2)
|
||||
self.inner[:, diag_indices[0], diag_indices[1]] += S
|
||||
self._mu, self._S = mu.copy(), S.copy()
|
||||
if Zv_changed or muS_changed:
|
||||
self.ZAinner = np.dot(self.ZA, self.inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]!
|
||||
self._psi2 = np.dot(self.ZAinner, self.ZA.T)
|
||||
248
GPy/kern/parts/periodic_Matern32.py
Normal file
248
GPy/kern/parts/periodic_Matern32.py
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from GPy.util.linalg import mdot
|
||||
from GPy.util.decorators import silence_errors
|
||||
|
||||
class PeriodicMatern32(Kernpart):
|
||||
"""
|
||||
Kernel of the periodic subspace (up to a given frequency) of a Matern 3/2 RKHS. Only defined for input_dim=1.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the Matern kernel
|
||||
:type variance: float
|
||||
:param lengthscale: the lengthscale of the Matern kernel
|
||||
:type lengthscale: np.ndarray of size (input_dim,)
|
||||
:param period: the period
|
||||
:type period: float
|
||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||
:type n_freq: int
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
||||
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
|
||||
self.name = 'periodic_Mat32'
|
||||
self.input_dim = input_dim
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == 1, "Wrong size: only one lengthscale needed"
|
||||
else:
|
||||
lengthscale = np.ones(1)
|
||||
self.lower,self.upper = lower, upper
|
||||
self.num_params = 3
|
||||
self.n_freq = n_freq
|
||||
self.n_basis = 2*n_freq
|
||||
self._set_params(np.hstack((variance,lengthscale,period)))
|
||||
|
||||
def _cos(self,alpha,omega,phase):
|
||||
def f(x):
|
||||
return alpha*np.cos(omega*x+phase)
|
||||
return f
|
||||
|
||||
@silence_errors
|
||||
def _cos_factorization(self,alpha,omega,phase):
|
||||
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
|
||||
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
|
||||
r = np.sqrt(r1**2 + r2**2)
|
||||
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
|
||||
return r,omega[:,0:1], psi
|
||||
|
||||
@silence_errors
|
||||
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
|
||||
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
|
||||
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
|
||||
#Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
|
||||
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
|
||||
return Gint
|
||||
|
||||
def _get_params(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.variance,self.lengthscale,self.period))
|
||||
|
||||
def _set_params(self,x):
|
||||
"""set the value of the parameters."""
|
||||
assert x.size==3
|
||||
self.variance = x[0]
|
||||
self.lengthscale = x[1]
|
||||
self.period = x[2]
|
||||
|
||||
self.a = [3./self.lengthscale**2, 2*np.sqrt(3)/self.lengthscale, 1.]
|
||||
self.b = [1,self.lengthscale**2/3]
|
||||
|
||||
self.basis_alpha = np.ones((self.n_basis,))
|
||||
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
|
||||
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
|
||||
|
||||
self.G = self.Gram_matrix()
|
||||
self.Gi = np.linalg.inv(self.G)
|
||||
|
||||
def _get_param_names(self):
|
||||
"""return parameter names."""
|
||||
return ['variance','lengthscale','period']
|
||||
|
||||
def Gram_matrix(self):
|
||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
|
||||
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
|
||||
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
||||
|
||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
||||
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
|
||||
|
||||
def K(self,X,X2,target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
if X2 is None:
|
||||
FX2 = FX
|
||||
else:
|
||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
||||
np.add(mdot(FX,self.Gi,FX2.T), target,target)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
||||
|
||||
@silence_errors
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
||||
|
||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
|
||||
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
|
||||
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
||||
|
||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
||||
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
|
||||
#dK_dvar
|
||||
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
|
||||
|
||||
#dK_dlen
|
||||
da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.]
|
||||
db_dlen = [0.,2*self.lengthscale/3.]
|
||||
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2))
|
||||
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
|
||||
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
|
||||
dGint_dlen = dGint_dlen + dGint_dlen.T
|
||||
dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T)
|
||||
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
|
||||
|
||||
#dK_dper
|
||||
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
|
||||
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
|
||||
|
||||
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
|
||||
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
|
||||
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
|
||||
|
||||
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
||||
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
||||
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
||||
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
||||
#IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
|
||||
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
||||
|
||||
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
||||
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
||||
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
|
||||
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
|
||||
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
|
||||
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
||||
|
||||
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
|
||||
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
||||
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
|
||||
|
||||
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
|
||||
dGint_dper = dGint_dper + dGint_dper.T
|
||||
|
||||
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
|
||||
dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
|
||||
|
||||
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
|
||||
|
||||
# np.add(target[:,:,0],dK_dvar, target[:,:,0])
|
||||
target[0] += np.sum(dK_dvar*dL_dK)
|
||||
#np.add(target[:,:,1],dK_dlen, target[:,:,1])
|
||||
target[1] += np.sum(dK_dlen*dL_dK)
|
||||
#np.add(target[:,:,2],dK_dper, target[:,:,2])
|
||||
target[2] += np.sum(dK_dper*dL_dK)
|
||||
|
||||
@silence_errors
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
"""derivative of the diagonal covariance matrix with respect to the parameters"""
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
|
||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2))
|
||||
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
|
||||
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
||||
|
||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
||||
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
|
||||
#dK_dvar
|
||||
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX.T)
|
||||
|
||||
#dK_dlen
|
||||
da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.]
|
||||
db_dlen = [0.,2*self.lengthscale/3.]
|
||||
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2))
|
||||
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
|
||||
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
|
||||
dGint_dlen = dGint_dlen + dGint_dlen.T
|
||||
dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T)
|
||||
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
|
||||
|
||||
#dK_dper
|
||||
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
|
||||
|
||||
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
|
||||
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
|
||||
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
|
||||
|
||||
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
||||
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
||||
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
||||
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
||||
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
||||
|
||||
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
||||
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
||||
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
|
||||
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
|
||||
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
||||
|
||||
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
|
||||
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
||||
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
|
||||
|
||||
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
|
||||
dGint_dper = dGint_dper + dGint_dper.T
|
||||
|
||||
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
|
||||
dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
|
||||
|
||||
dK_dper = 2* mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
|
||||
|
||||
target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
|
||||
target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
|
||||
target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)
|
||||
266
GPy/kern/parts/periodic_Matern52.py
Normal file
266
GPy/kern/parts/periodic_Matern52.py
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from GPy.util.linalg import mdot
|
||||
from GPy.util.decorators import silence_errors
|
||||
|
||||
class PeriodicMatern52(Kernpart):
|
||||
"""
|
||||
Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for input_dim=1.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the Matern kernel
|
||||
:type variance: float
|
||||
:param lengthscale: the lengthscale of the Matern kernel
|
||||
:type lengthscale: np.ndarray of size (input_dim,)
|
||||
:param period: the period
|
||||
:type period: float
|
||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||
:type n_freq: int
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,input_dim=1,variance=1.,lengthscale=None,period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
|
||||
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
|
||||
self.name = 'periodic_Mat52'
|
||||
self.input_dim = input_dim
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == 1, "Wrong size: only one lengthscale needed"
|
||||
else:
|
||||
lengthscale = np.ones(1)
|
||||
self.lower,self.upper = lower, upper
|
||||
self.num_params = 3
|
||||
self.n_freq = n_freq
|
||||
self.n_basis = 2*n_freq
|
||||
self._set_params(np.hstack((variance,lengthscale,period)))
|
||||
|
||||
def _cos(self,alpha,omega,phase):
|
||||
def f(x):
|
||||
return alpha*np.cos(omega*x+phase)
|
||||
return f
|
||||
|
||||
@silence_errors
|
||||
def _cos_factorization(self,alpha,omega,phase):
|
||||
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
|
||||
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
|
||||
r = np.sqrt(r1**2 + r2**2)
|
||||
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
|
||||
return r,omega[:,0:1], psi
|
||||
|
||||
@silence_errors
|
||||
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
|
||||
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
|
||||
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
|
||||
#Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
|
||||
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
|
||||
return Gint
|
||||
|
||||
def _get_params(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.variance,self.lengthscale,self.period))
|
||||
|
||||
def _set_params(self,x):
|
||||
"""set the value of the parameters."""
|
||||
assert x.size==3
|
||||
self.variance = x[0]
|
||||
self.lengthscale = x[1]
|
||||
self.period = x[2]
|
||||
|
||||
self.a = [5*np.sqrt(5)/self.lengthscale**3, 15./self.lengthscale**2,3*np.sqrt(5)/self.lengthscale, 1.]
|
||||
self.b = [9./8, 9*self.lengthscale**4/200., 3*self.lengthscale**2/5., 3*self.lengthscale**2/(5*8.), 3*self.lengthscale**2/(5*8.)]
|
||||
|
||||
self.basis_alpha = np.ones((2*self.n_freq,))
|
||||
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
|
||||
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
|
||||
|
||||
self.G = self.Gram_matrix()
|
||||
self.Gi = np.linalg.inv(self.G)
|
||||
|
||||
def _get_param_names(self):
|
||||
"""return parameter names."""
|
||||
return ['variance','lengthscale','period']
|
||||
|
||||
def Gram_matrix(self):
|
||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
|
||||
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
|
||||
Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
|
||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
||||
|
||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
||||
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
|
||||
lower_terms = self.b[0]*np.dot(Flower,Flower.T) + self.b[1]*np.dot(F2lower,F2lower.T) + self.b[2]*np.dot(F1lower,F1lower.T) + self.b[3]*np.dot(F2lower,Flower.T) + self.b[4]*np.dot(Flower,F2lower.T)
|
||||
return(3*self.lengthscale**5/(400*np.sqrt(5)*self.variance) * Gint + 1./self.variance*lower_terms)
|
||||
|
||||
def K(self,X,X2,target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
if X2 is None:
|
||||
FX2 = FX
|
||||
else:
|
||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
||||
np.add(mdot(FX,self.Gi,FX2.T), target,target)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
||||
|
||||
@silence_errors
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
||||
|
||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
|
||||
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
|
||||
Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
|
||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
||||
|
||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
||||
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
|
||||
|
||||
#dK_dvar
|
||||
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
|
||||
|
||||
#dK_dlen
|
||||
da_dlen = [-3*self.a[0]/self.lengthscale, -2*self.a[1]/self.lengthscale, -self.a[2]/self.lengthscale, 0.]
|
||||
db_dlen = [0., 4*self.b[1]/self.lengthscale, 2*self.b[2]/self.lengthscale, 2*self.b[3]/self.lengthscale, 2*self.b[4]/self.lengthscale]
|
||||
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)), da_dlen[1]*self.basis_omega, da_dlen[2]*self.basis_omega**2, da_dlen[3]*self.basis_omega**3))
|
||||
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
|
||||
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
|
||||
dGint_dlen = dGint_dlen + dGint_dlen.T
|
||||
dlower_terms_dlen = db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F2lower,F2lower.T) + db_dlen[2]*np.dot(F1lower,F1lower.T) + db_dlen[3]*np.dot(F2lower,Flower.T) + db_dlen[4]*np.dot(Flower,F2lower.T)
|
||||
dG_dlen = 15*self.lengthscale**4/(400*np.sqrt(5))*Gint + 3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dlen + dlower_terms_dlen
|
||||
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
|
||||
|
||||
#dK_dper
|
||||
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
|
||||
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
|
||||
|
||||
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period, -self.a[3]*self.basis_omega**4/self.period))
|
||||
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2,self.basis_phi))
|
||||
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
|
||||
|
||||
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
||||
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
||||
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
||||
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
||||
#IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
|
||||
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
||||
|
||||
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
||||
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
||||
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
|
||||
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
|
||||
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
|
||||
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
||||
|
||||
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
|
||||
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
|
||||
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
|
||||
|
||||
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
|
||||
dGint_dper = dGint_dper + dGint_dper.T
|
||||
|
||||
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
dF2lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**3/self.period,self.basis_omega,self.basis_phi+np.pi*3/2)(self.lower) + self._cos(-2*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
|
||||
|
||||
dlower_terms_dper = self.b[0] * (np.dot(dFlower_dper,Flower.T) + np.dot(Flower.T,dFlower_dper))
|
||||
dlower_terms_dper += self.b[1] * (np.dot(dF2lower_dper,F2lower.T) + np.dot(F2lower,dF2lower_dper.T)) - 4*self.b[1]/self.period*np.dot(F2lower,F2lower.T)
|
||||
dlower_terms_dper += self.b[2] * (np.dot(dF1lower_dper,F1lower.T) + np.dot(F1lower,dF1lower_dper.T)) - 2*self.b[2]/self.period*np.dot(F1lower,F1lower.T)
|
||||
dlower_terms_dper += self.b[3] * (np.dot(dF2lower_dper,Flower.T) + np.dot(F2lower,dFlower_dper.T)) - 2*self.b[3]/self.period*np.dot(F2lower,Flower.T)
|
||||
dlower_terms_dper += self.b[4] * (np.dot(dFlower_dper,F2lower.T) + np.dot(Flower,dF2lower_dper.T)) - 2*self.b[4]/self.period*np.dot(Flower,F2lower.T)
|
||||
|
||||
dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
|
||||
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
|
||||
|
||||
# np.add(target[:,:,0],dK_dvar, target[:,:,0])
|
||||
target[0] += np.sum(dK_dvar*dL_dK)
|
||||
#np.add(target[:,:,1],dK_dlen, target[:,:,1])
|
||||
target[1] += np.sum(dK_dlen*dL_dK)
|
||||
#np.add(target[:,:,2],dK_dper, target[:,:,2])
|
||||
target[2] += np.sum(dK_dper*dL_dK)
|
||||
|
||||
@silence_errors
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters"""
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
|
||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
|
||||
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
|
||||
Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
|
||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
||||
|
||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
||||
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
|
||||
|
||||
#dK_dvar
|
||||
dK_dvar = 1. / self.variance * mdot(FX, self.Gi, FX.T)
|
||||
|
||||
#dK_dlen
|
||||
da_dlen = [-3*self.a[0]/self.lengthscale, -2*self.a[1]/self.lengthscale, -self.a[2]/self.lengthscale, 0.]
|
||||
db_dlen = [0., 4*self.b[1]/self.lengthscale, 2*self.b[2]/self.lengthscale, 2*self.b[3]/self.lengthscale, 2*self.b[4]/self.lengthscale]
|
||||
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)), da_dlen[1]*self.basis_omega, da_dlen[2]*self.basis_omega**2, da_dlen[3]*self.basis_omega**3))
|
||||
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
|
||||
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
|
||||
dGint_dlen = dGint_dlen + dGint_dlen.T
|
||||
dlower_terms_dlen = db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F2lower,F2lower.T) + db_dlen[2]*np.dot(F1lower,F1lower.T) + db_dlen[3]*np.dot(F2lower,Flower.T) + db_dlen[4]*np.dot(Flower,F2lower.T)
|
||||
dG_dlen = 15*self.lengthscale**4/(400*np.sqrt(5))*Gint + 3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dlen + dlower_terms_dlen
|
||||
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
|
||||
|
||||
#dK_dper
|
||||
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
|
||||
|
||||
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period, -self.a[3]*self.basis_omega**4/self.period))
|
||||
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2,self.basis_phi))
|
||||
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
|
||||
|
||||
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
||||
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
||||
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
||||
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
||||
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
||||
|
||||
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
||||
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
||||
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + .5*self.upper**2*np.cos(phi-phi1.T)
|
||||
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + .5*self.lower**2*np.cos(phi-phi1.T)
|
||||
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
||||
|
||||
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
|
||||
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
|
||||
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
|
||||
|
||||
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
|
||||
dGint_dper = dGint_dper + dGint_dper.T
|
||||
|
||||
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
dF2lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**3/self.period,self.basis_omega,self.basis_phi+np.pi*3/2)(self.lower) + self._cos(-2*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
|
||||
|
||||
dlower_terms_dper = self.b[0] * (np.dot(dFlower_dper,Flower.T) + np.dot(Flower.T,dFlower_dper))
|
||||
dlower_terms_dper += self.b[1] * (np.dot(dF2lower_dper,F2lower.T) + np.dot(F2lower,dF2lower_dper.T)) - 4*self.b[1]/self.period*np.dot(F2lower,F2lower.T)
|
||||
dlower_terms_dper += self.b[2] * (np.dot(dF1lower_dper,F1lower.T) + np.dot(F1lower,dF1lower_dper.T)) - 2*self.b[2]/self.period*np.dot(F1lower,F1lower.T)
|
||||
dlower_terms_dper += self.b[3] * (np.dot(dF2lower_dper,Flower.T) + np.dot(F2lower,dFlower_dper.T)) - 2*self.b[3]/self.period*np.dot(F2lower,Flower.T)
|
||||
dlower_terms_dper += self.b[4] * (np.dot(dFlower_dper,F2lower.T) + np.dot(Flower,dF2lower_dper.T)) - 2*self.b[4]/self.period*np.dot(Flower,F2lower.T)
|
||||
|
||||
dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
|
||||
dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
|
||||
|
||||
target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
|
||||
target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
|
||||
target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)
|
||||
237
GPy/kern/parts/periodic_exponential.py
Normal file
237
GPy/kern/parts/periodic_exponential.py
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from GPy.util.linalg import mdot
|
||||
from GPy.util.decorators import silence_errors
|
||||
|
||||
class PeriodicExponential(Kernpart):
|
||||
"""
|
||||
Kernel of the periodic subspace (up to a given frequency) of a exponential (Matern 1/2) RKHS. Only defined for input_dim=1.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the Matern kernel
|
||||
:type variance: float
|
||||
:param lengthscale: the lengthscale of the Matern kernel
|
||||
:type lengthscale: np.ndarray of size (input_dim,)
|
||||
:param period: the period
|
||||
:type period: float
|
||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||
:type n_freq: int
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
||||
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
|
||||
self.name = 'periodic_exp'
|
||||
self.input_dim = input_dim
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == 1, "Wrong size: only one lengthscale needed"
|
||||
else:
|
||||
lengthscale = np.ones(1)
|
||||
self.lower,self.upper = lower, upper
|
||||
self.num_params = 3
|
||||
self.n_freq = n_freq
|
||||
self.n_basis = 2*n_freq
|
||||
self._set_params(np.hstack((variance,lengthscale,period)))
|
||||
|
||||
def _cos(self,alpha,omega,phase):
|
||||
def f(x):
|
||||
return alpha*np.cos(omega*x+phase)
|
||||
return f
|
||||
|
||||
@silence_errors
|
||||
def _cos_factorization(self,alpha,omega,phase):
|
||||
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
|
||||
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
|
||||
r = np.sqrt(r1**2 + r2**2)
|
||||
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
|
||||
return r,omega[:,0:1], psi
|
||||
|
||||
@silence_errors
|
||||
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
|
||||
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
|
||||
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
|
||||
#Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
|
||||
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
|
||||
return Gint
|
||||
|
||||
def _get_params(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.variance,self.lengthscale,self.period))
|
||||
|
||||
def _set_params(self,x):
|
||||
"""set the value of the parameters."""
|
||||
assert x.size==3
|
||||
self.variance = x[0]
|
||||
self.lengthscale = x[1]
|
||||
self.period = x[2]
|
||||
|
||||
self.a = [1./self.lengthscale, 1.]
|
||||
self.b = [1]
|
||||
|
||||
self.basis_alpha = np.ones((self.n_basis,))
|
||||
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
|
||||
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
|
||||
|
||||
self.G = self.Gram_matrix()
|
||||
self.Gi = np.linalg.inv(self.G)
|
||||
|
||||
def _get_param_names(self):
|
||||
"""return parameter names."""
|
||||
return ['variance','lengthscale','period']
|
||||
|
||||
def Gram_matrix(self):
|
||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
|
||||
Lo = np.column_stack((self.basis_omega,self.basis_omega))
|
||||
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
|
||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
||||
return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T))
|
||||
|
||||
def K(self,X,X2,target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
if X2 is None:
|
||||
FX2 = FX
|
||||
else:
|
||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
||||
np.add(mdot(FX,self.Gi,FX2.T), target,target)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
||||
|
||||
@silence_errors
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
|
||||
|
||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
|
||||
Lo = np.column_stack((self.basis_omega,self.basis_omega))
|
||||
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
|
||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
||||
|
||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
||||
|
||||
#dK_dvar
|
||||
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
|
||||
|
||||
#dK_dlen
|
||||
da_dlen = [-1./self.lengthscale**2,0.]
|
||||
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega))
|
||||
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
|
||||
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
|
||||
dGint_dlen = dGint_dlen + dGint_dlen.T
|
||||
dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen
|
||||
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
|
||||
|
||||
#dK_dper
|
||||
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
|
||||
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
|
||||
|
||||
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
|
||||
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
||||
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
|
||||
|
||||
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
||||
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
||||
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
||||
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
||||
#IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
|
||||
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
||||
|
||||
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
||||
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
||||
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
|
||||
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
|
||||
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
|
||||
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
||||
|
||||
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
|
||||
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))
|
||||
r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
|
||||
|
||||
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
|
||||
dGint_dper = dGint_dper + dGint_dper.T
|
||||
|
||||
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
|
||||
dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
|
||||
|
||||
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
|
||||
|
||||
target[0] += np.sum(dK_dvar*dL_dK)
|
||||
target[1] += np.sum(dK_dlen*dL_dK)
|
||||
target[2] += np.sum(dK_dper*dL_dK)
|
||||
|
||||
@silence_errors
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters"""
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
|
||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
|
||||
Lo = np.column_stack((self.basis_omega,self.basis_omega))
|
||||
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
|
||||
r,omega,phi = self._cos_factorization(La,Lo,Lp)
|
||||
Gint = self._int_computation( r,omega,phi, r,omega,phi)
|
||||
|
||||
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
|
||||
|
||||
#dK_dvar
|
||||
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX.T)
|
||||
|
||||
#dK_dlen
|
||||
da_dlen = [-1./self.lengthscale**2,0.]
|
||||
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega))
|
||||
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
|
||||
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
|
||||
dGint_dlen = dGint_dlen + dGint_dlen.T
|
||||
dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen
|
||||
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
|
||||
|
||||
#dK_dper
|
||||
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
|
||||
|
||||
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
|
||||
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
|
||||
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
|
||||
|
||||
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
|
||||
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
|
||||
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
|
||||
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
|
||||
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
|
||||
|
||||
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
|
||||
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
|
||||
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
|
||||
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
|
||||
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
|
||||
|
||||
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
|
||||
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))
|
||||
r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
|
||||
|
||||
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
|
||||
dGint_dper = dGint_dper + dGint_dper.T
|
||||
|
||||
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
|
||||
|
||||
dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
|
||||
|
||||
dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
|
||||
|
||||
target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
|
||||
target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
|
||||
target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)
|
||||
111
GPy/kern/parts/prod.py
Normal file
111
GPy/kern/parts/prod.py
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
|
||||
class Prod(Kernpart):
|
||||
"""
|
||||
Computes the product of 2 kernels
|
||||
|
||||
:param k1, k2: the kernels to multiply
|
||||
:type k1, k2: Kernpart
|
||||
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
|
||||
:type tensor: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
def __init__(self,k1,k2,tensor=False):
|
||||
self.num_params = k1.num_params + k2.num_params
|
||||
self.name = k1.name + '<times>' + k2.name
|
||||
self.k1 = k1
|
||||
self.k2 = k2
|
||||
if tensor:
|
||||
self.input_dim = k1.input_dim + k2.input_dim
|
||||
self.slice1 = slice(0,self.k1.input_dim)
|
||||
self.slice2 = slice(self.k1.input_dim,self.k1.input_dim+self.k2.input_dim)
|
||||
else:
|
||||
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to sum don't have the same dimension."
|
||||
self.input_dim = k1.input_dim
|
||||
self.slice1 = slice(0,self.input_dim)
|
||||
self.slice2 = slice(0,self.input_dim)
|
||||
|
||||
self._X, self._X2, self._params = np.empty(shape=(3,1))
|
||||
self._set_params(np.hstack((k1._get_params(),k2._get_params())))
|
||||
|
||||
def _get_params(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.k1._get_params(), self.k2._get_params()))
|
||||
|
||||
def _set_params(self,x):
|
||||
"""set the value of the parameters."""
|
||||
self.k1._set_params(x[:self.k1.num_params])
|
||||
self.k2._set_params(x[self.k1.num_params:])
|
||||
|
||||
def _get_param_names(self):
|
||||
"""return parameter names."""
|
||||
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
|
||||
|
||||
def K(self,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
target += self._K1 * self._K2
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
self._K_computations(X,X2)
|
||||
if X2 is None:
|
||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
|
||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
|
||||
else:
|
||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
|
||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
target1 = np.zeros(X.shape[0])
|
||||
target2 = np.zeros(X.shape[0])
|
||||
self.k1.Kdiag(X[:,self.slice1],target1)
|
||||
self.k2.Kdiag(X[:,self.slice2],target2)
|
||||
target += target1 * target2
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
K2 = np.zeros(X.shape[0])
|
||||
self.k1.Kdiag(X[:,self.slice1],K1)
|
||||
self.k2.Kdiag(X[:,self.slice2],K2)
|
||||
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
|
||||
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
self._K_computations(X,X2)
|
||||
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target)
|
||||
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
K2 = np.zeros(X.shape[0])
|
||||
self.k1.Kdiag(X[:,self.slice1],K1)
|
||||
self.k2.Kdiag(X[:,self.slice2],K2)
|
||||
|
||||
self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target)
|
||||
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target)
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
||||
self._X = X.copy()
|
||||
self._params == self._get_params().copy()
|
||||
if X2 is None:
|
||||
self._X2 = None
|
||||
self._K1 = np.zeros((X.shape[0],X.shape[0]))
|
||||
self._K2 = np.zeros((X.shape[0],X.shape[0]))
|
||||
self.k1.K(X[:,self.slice1],None,self._K1)
|
||||
self.k2.K(X[:,self.slice2],None,self._K2)
|
||||
else:
|
||||
self._X2 = X2.copy()
|
||||
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
|
||||
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
|
||||
self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
|
||||
self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)
|
||||
|
||||
101
GPy/kern/parts/prod_orthogonal.py
Normal file
101
GPy/kern/parts/prod_orthogonal.py
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
#from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
|
||||
|
||||
class prod_orthogonal(Kernpart):
|
||||
"""
|
||||
Computes the product of 2 kernels
|
||||
|
||||
:param k1, k2: the kernels to multiply
|
||||
:type k1, k2: Kernpart
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
def __init__(self,k1,k2):
|
||||
self.input_dim = k1.input_dim + k2.input_dim
|
||||
self.num_params = k1.num_params + k2.num_params
|
||||
self.name = k1.name + '<times>' + k2.name
|
||||
self.k1 = k1
|
||||
self.k2 = k2
|
||||
self._X, self._X2, self._params = np.empty(shape=(3,1))
|
||||
self._set_params(np.hstack((k1._get_params(),k2._get_params())))
|
||||
|
||||
def _get_params(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.k1._get_params(), self.k2._get_params()))
|
||||
|
||||
def _set_params(self,x):
|
||||
"""set the value of the parameters."""
|
||||
self.k1._set_params(x[:self.k1.num_params])
|
||||
self.k2._set_params(x[self.k1.num_params:])
|
||||
|
||||
def _get_param_names(self):
|
||||
"""return parameter names."""
|
||||
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
|
||||
|
||||
def K(self,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
target += self._K1 * self._K2
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
self._K_computations(X,X2)
|
||||
if X2 is None:
|
||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
|
||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
|
||||
else:
|
||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
|
||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
target1 = np.zeros(X.shape[0])
|
||||
target2 = np.zeros(X.shape[0])
|
||||
self.k1.Kdiag(X[:,:self.k1.input_dim],target1)
|
||||
self.k2.Kdiag(X[:,self.k1.input_dim:],target2)
|
||||
target += target1 * target2
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
K2 = np.zeros(X.shape[0])
|
||||
self.k1.Kdiag(X[:,:self.k1.input_dim],K1)
|
||||
self.k2.Kdiag(X[:,self.k1.input_dim:],K2)
|
||||
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params])
|
||||
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:])
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
self._K_computations(X,X2)
|
||||
self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target)
|
||||
self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
K2 = np.zeros(X.shape[0])
|
||||
self.k1.Kdiag(X[:,0:self.k1.input_dim],K1)
|
||||
self.k2.Kdiag(X[:,self.k1.input_dim:],K2)
|
||||
|
||||
self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.input_dim], target)
|
||||
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.input_dim:], target)
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
||||
self._X = X.copy()
|
||||
self._params == self._get_params().copy()
|
||||
if X2 is None:
|
||||
self._X2 = None
|
||||
self._K1 = np.zeros((X.shape[0],X.shape[0]))
|
||||
self._K2 = np.zeros((X.shape[0],X.shape[0]))
|
||||
self.k1.K(X[:,:self.k1.input_dim],None,self._K1)
|
||||
self.k2.K(X[:,self.k1.input_dim:],None,self._K2)
|
||||
else:
|
||||
self._X2 = X2.copy()
|
||||
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
|
||||
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
|
||||
self.k1.K(X[:,:self.k1.input_dim],X2[:,:self.k1.input_dim],self._K1)
|
||||
self.k2.K(X[:,self.k1.input_dim:],X2[:,self.k1.input_dim:],self._K2)
|
||||
|
||||
80
GPy/kern/parts/rational_quadratic.py
Normal file
80
GPy/kern/parts/rational_quadratic.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
|
||||
class RationalQuadratic(Kernpart):
|
||||
"""
|
||||
rational quadratic kernel
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2 \ell^2} \\bigg)^{- \\alpha} \ \ \ \ \ \\text{ where } r^2 = (x-y)^2
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int (input_dim=1 is the only value currently supported)
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type variance: float
|
||||
:param lengthscale: the lengthscale :math:`\ell`
|
||||
:type lengthscale: float
|
||||
:param power: the power :math:`\\alpha`
|
||||
:type power: float
|
||||
:rtype: Kernpart object
|
||||
|
||||
"""
|
||||
def __init__(self,input_dim,variance=1.,lengthscale=1.,power=1.):
|
||||
assert input_dim == 1, "For this kernel we assume input_dim=1"
|
||||
self.input_dim = input_dim
|
||||
self.num_params = 3
|
||||
self.name = 'rat_quad'
|
||||
self.variance = variance
|
||||
self.lengthscale = lengthscale
|
||||
self.power = power
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.variance,self.lengthscale,self.power))
|
||||
|
||||
def _set_params(self,x):
|
||||
self.variance = x[0]
|
||||
self.lengthscale = x[1]
|
||||
self.power = x[2]
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['variance','lengthscale','power']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
if X2 is None: X2 = X
|
||||
dist2 = np.square((X-X2.T)/self.lengthscale)
|
||||
target += self.variance*(1 + dist2/2.)**(-self.power)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
target += self.variance
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
if X2 is None: X2 = X
|
||||
dist2 = np.square((X-X2.T)/self.lengthscale)
|
||||
|
||||
dvar = (1 + dist2/2.)**(-self.power)
|
||||
dl = self.power * self.variance * dist2 * self.lengthscale**(-3) * (1 + dist2/2./self.power)**(-self.power-1)
|
||||
dp = - self.variance * np.log(1 + dist2/2.) * (1 + dist2/2.)**(-self.power)
|
||||
|
||||
target[0] += np.sum(dvar*dL_dK)
|
||||
target[1] += np.sum(dl*dL_dK)
|
||||
target[2] += np.sum(dp*dL_dK)
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
# here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None: X2 = X
|
||||
dist2 = np.square((X-X2.T)/self.lengthscale)
|
||||
|
||||
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
|
||||
target += np.sum(dL_dK*dX,1)[:,np.newaxis]
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
pass
|
||||
330
GPy/kern/parts/rbf.py
Normal file
330
GPy/kern/parts/rbf.py
Normal file
|
|
@ -0,0 +1,330 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
from scipy import weave
|
||||
from ...util.linalg import tdot
|
||||
|
||||
class RBF(Kernpart):
|
||||
"""
|
||||
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
|
||||
|
||||
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
:param lengthscale: the vector of lengthscale of the kernel
|
||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
.. Note: this object implements both the ARD and 'spherical' version of the function
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False):
|
||||
self.input_dim = input_dim
|
||||
self.name = 'rbf'
|
||||
self.ARD = ARD
|
||||
if not ARD:
|
||||
self.num_params = 2
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
||||
else:
|
||||
lengthscale = np.ones(1)
|
||||
else:
|
||||
self.num_params = self.input_dim + 1
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
|
||||
else:
|
||||
lengthscale = np.ones(self.input_dim)
|
||||
|
||||
self._set_params(np.hstack((variance, lengthscale.flatten())))
|
||||
|
||||
# initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||
|
||||
# a set of optional args to pass to weave
|
||||
self.weave_options = {'headers' : ['<omp.h>'],
|
||||
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
|
||||
'extra_link_args' : ['-lgomp']}
|
||||
|
||||
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.variance, self.lengthscale))
|
||||
|
||||
def _set_params(self, x):
|
||||
assert x.size == (self.num_params)
|
||||
self.variance = x[0]
|
||||
self.lengthscale = x[1:]
|
||||
self.lengthscale2 = np.square(self.lengthscale)
|
||||
# reset cached results
|
||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
||||
|
||||
def _get_param_names(self):
|
||||
if self.num_params == 2:
|
||||
return ['variance', 'lengthscale']
|
||||
else:
|
||||
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
|
||||
|
||||
def K(self, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
target += self.variance * self._K_dvar
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
np.add(target, self.variance, target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
target[0] += np.sum(self._K_dvar * dL_dK)
|
||||
if self.ARD:
|
||||
dvardLdK = self._K_dvar * dL_dK
|
||||
var_len3 = self.variance / np.power(self.lengthscale, 3)
|
||||
if X2 is None:
|
||||
# save computation for the symmetrical case
|
||||
dvardLdK = dvardLdK + dvardLdK.T
|
||||
code = """
|
||||
int q,i,j;
|
||||
double tmp;
|
||||
for(q=0; q<input_dim; q++){
|
||||
tmp = 0;
|
||||
for(i=0; i<num_data; i++){
|
||||
for(j=0; j<i; j++){
|
||||
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
|
||||
}
|
||||
}
|
||||
target(q+1) += var_len3(q)*tmp;
|
||||
}
|
||||
"""
|
||||
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
|
||||
weave.inline(code, arg_names=['num_data','num_inducing','input_dim','X','X2','target','dvardLdK','var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
|
||||
else:
|
||||
code = """
|
||||
int q,i,j;
|
||||
double tmp;
|
||||
for(q=0; q<input_dim; q++){
|
||||
tmp = 0;
|
||||
for(i=0; i<num_data; i++){
|
||||
for(j=0; j<num_inducing; j++){
|
||||
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
|
||||
}
|
||||
}
|
||||
target(q+1) += var_len3(q)*tmp;
|
||||
}
|
||||
"""
|
||||
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
|
||||
#[np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
|
||||
weave.inline(code, arg_names=['num_data','num_inducing','input_dim','X','X2','target','dvardLdK','var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
|
||||
else:
|
||||
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
# NB: derivative of diagonal elements wrt lengthscale is 0
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
||||
|
||||
#---------------------------------------#
|
||||
# PSI statistics #
|
||||
#---------------------------------------#
|
||||
|
||||
def psi0(self, Z, mu, S, target):
|
||||
target += self.variance
|
||||
|
||||
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
|
||||
target[0] += np.sum(dL_dpsi0)
|
||||
|
||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
||||
pass
|
||||
|
||||
def psi1(self, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
target += self._psi1
|
||||
|
||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
denom_deriv = S[:, None, :] / (self.lengthscale ** 3 + self.lengthscale * S[:, None, :])
|
||||
d_length = self._psi1[:, :, None] * (self.lengthscale * np.square(self._psi1_dist / (self.lengthscale2 + S[:, None, :])) + denom_deriv)
|
||||
target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
|
||||
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
||||
if not self.ARD:
|
||||
target[1] += dpsi1_dlength.sum()
|
||||
else:
|
||||
target[1:] += dpsi1_dlength.sum(0).sum(0)
|
||||
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
denominator = (self.lengthscale2 * (self._psi1_denom))
|
||||
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
||||
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
||||
|
||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
|
||||
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
||||
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
||||
|
||||
def psi2(self, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
target += self._psi2
|
||||
|
||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
|
||||
"""Shape N,num_inducing,num_inducing,Ntheta"""
|
||||
self._psi_computations(Z, mu, S)
|
||||
d_var = 2.*self._psi2 / self.variance
|
||||
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
|
||||
|
||||
target[0] += np.sum(dL_dpsi2 * d_var)
|
||||
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
|
||||
if not self.ARD:
|
||||
target[1] += dpsi2_dlength.sum()
|
||||
else:
|
||||
target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0)
|
||||
|
||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
|
||||
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
|
||||
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
||||
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
||||
|
||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
||||
"""Think N,num_inducing,num_inducing,input_dim """
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
|
||||
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
||||
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
||||
|
||||
#---------------------------------------#
|
||||
# Precomputations #
|
||||
#---------------------------------------#
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
if not (np.array_equal(X, self._X) and np.array_equal(X2, self._X2) and np.array_equal(self._params , self._get_params())):
|
||||
self._X = X.copy()
|
||||
self._params == self._get_params().copy()
|
||||
if X2 is None:
|
||||
self._X2 = None
|
||||
X = X / self.lengthscale
|
||||
Xsquare = np.sum(np.square(X), 1)
|
||||
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
|
||||
else:
|
||||
self._X2 = X2.copy()
|
||||
X = X / self.lengthscale
|
||||
X2 = X2 / self.lengthscale
|
||||
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
|
||||
self._K_dvar = np.exp(-0.5 * self._K_dist2)
|
||||
|
||||
def _psi_computations(self, Z, mu, S):
|
||||
# here are the "statistics" for psi1 and psi2
|
||||
if not np.array_equal(Z, self._Z):
|
||||
#Z has changed, compute Z specific stuff
|
||||
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # num_inducing,num_inducing,input_dim
|
||||
self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # num_inducing,num_inducing,input_dim
|
||||
self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # num_inducing,num_inducing,input_dim
|
||||
self._Z = Z
|
||||
|
||||
if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
|
||||
#something's changed. recompute EVERYTHING
|
||||
|
||||
#psi1
|
||||
self._psi1_denom = S[:,None,:]/self.lengthscale2 + 1.
|
||||
self._psi1_dist = Z[None,:,:]-mu[:,None,:]
|
||||
self._psi1_dist_sq = np.square(self._psi1_dist)/self.lengthscale2/self._psi1_denom
|
||||
self._psi1_exponent = -0.5*np.sum(self._psi1_dist_sq+np.log(self._psi1_denom),-1)
|
||||
self._psi1 = self.variance*np.exp(self._psi1_exponent)
|
||||
|
||||
#psi2
|
||||
self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,num_inducing,num_inducing,input_dim
|
||||
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu,self._psi2_Zhat)
|
||||
#self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,num_inducing,num_inducing,input_dim
|
||||
#self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
|
||||
#self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,num_inducing,num_inducing
|
||||
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,num_inducing,num_inducing
|
||||
|
||||
#store matrices for caching
|
||||
self._Z, self._mu, self._S = Z, mu,S
|
||||
|
||||
def weave_psi2(self,mu,Zhat):
|
||||
N,input_dim = mu.shape
|
||||
num_inducing = Zhat.shape[0]
|
||||
|
||||
mudist = np.empty((N,num_inducing,num_inducing,input_dim))
|
||||
mudist_sq = np.empty((N,num_inducing,num_inducing,input_dim))
|
||||
psi2_exponent = np.zeros((N,num_inducing,num_inducing))
|
||||
psi2 = np.empty((N,num_inducing,num_inducing))
|
||||
|
||||
psi2_Zdist_sq = self._psi2_Zdist_sq
|
||||
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
|
||||
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
|
||||
variance_sq = float(np.square(self.variance))
|
||||
if self.ARD:
|
||||
lengthscale2 = self.lengthscale2
|
||||
else:
|
||||
lengthscale2 = np.ones(input_dim) * self.lengthscale2
|
||||
code = """
|
||||
double tmp;
|
||||
|
||||
#pragma omp parallel for private(tmp)
|
||||
for (int n=0; n<N; n++){
|
||||
for (int m=0; m<num_inducing; m++){
|
||||
for (int mm=0; mm<(m+1); mm++){
|
||||
for (int q=0; q<input_dim; q++){
|
||||
//compute mudist
|
||||
tmp = mu(n,q) - Zhat(m,mm,q);
|
||||
mudist(n,m,mm,q) = tmp;
|
||||
mudist(n,mm,m,q) = tmp;
|
||||
|
||||
//now mudist_sq
|
||||
tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q);
|
||||
mudist_sq(n,m,mm,q) = tmp;
|
||||
mudist_sq(n,mm,m,q) = tmp;
|
||||
|
||||
//now psi2_exponent
|
||||
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
|
||||
psi2_exponent(n,mm,m) += tmp;
|
||||
if (m !=mm){
|
||||
psi2_exponent(n,m,mm) += tmp;
|
||||
}
|
||||
//psi2 would be computed like this, but np is faster
|
||||
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
|
||||
//psi2(n,m,mm) = tmp;
|
||||
//psi2(n,mm,m) = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
support_code = """
|
||||
#include <omp.h>
|
||||
#include <math.h>
|
||||
"""
|
||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||
arg_names=['N','num_inducing','input_dim','mu','Zhat','mudist_sq','mudist','lengthscale2','_psi2_denom','psi2_Zdist_sq','psi2_exponent','half_log_psi2_denom','psi2','variance_sq'],
|
||||
type_converters=weave.converters.blitz, **self.weave_options)
|
||||
|
||||
return mudist, mudist_sq, psi2_exponent, psi2
|
||||
117
GPy/kern/parts/rbfcos.py
Normal file
117
GPy/kern/parts/rbfcos.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
|
||||
# Copyright (c) 2012, James Hensman and Andrew Gordon Wilson
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
|
||||
class RBFCos(Kernpart):
|
||||
def __init__(self,input_dim,variance=1.,frequencies=None,bandwidths=None,ARD=False):
|
||||
self.input_dim = input_dim
|
||||
self.name = 'rbfcos'
|
||||
if self.input_dim>10:
|
||||
print "Warning: the rbfcos kernel requires a lot of memory for high dimensional inputs"
|
||||
self.ARD = ARD
|
||||
|
||||
#set the default frequencies and bandwidths, appropriate num_params
|
||||
if ARD:
|
||||
self.num_params = 2*self.input_dim + 1
|
||||
if frequencies is not None:
|
||||
frequencies = np.asarray(frequencies)
|
||||
assert frequencies.size == self.input_dim, "bad number of frequencies"
|
||||
else:
|
||||
frequencies = np.ones(self.input_dim)
|
||||
if bandwidths is not None:
|
||||
bandwidths = np.asarray(bandwidths)
|
||||
assert bandwidths.size == self.input_dim, "bad number of bandwidths"
|
||||
else:
|
||||
bandwidths = np.ones(self.input_dim)
|
||||
else:
|
||||
self.num_params = 3
|
||||
if frequencies is not None:
|
||||
frequencies = np.asarray(frequencies)
|
||||
assert frequencies.size == 1, "Exactly one frequency needed for non-ARD kernel"
|
||||
else:
|
||||
frequencies = np.ones(1)
|
||||
|
||||
if bandwidths is not None:
|
||||
bandwidths = np.asarray(bandwidths)
|
||||
assert bandwidths.size == 1, "Exactly one bandwidth needed for non-ARD kernel"
|
||||
else:
|
||||
bandwidths = np.ones(1)
|
||||
|
||||
#initialise cache
|
||||
self._X, self._X2, self._params = np.empty(shape=(3,1))
|
||||
|
||||
self._set_params(np.hstack((variance,frequencies.flatten(),bandwidths.flatten())))
|
||||
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.variance,self.frequencies, self.bandwidths))
|
||||
|
||||
def _set_params(self,x):
|
||||
assert x.size==(self.num_params)
|
||||
if self.ARD:
|
||||
self.variance = x[0]
|
||||
self.frequencies = x[1:1+self.input_dim]
|
||||
self.bandwidths = x[1+self.input_dim:]
|
||||
else:
|
||||
self.variance, self.frequencies, self.bandwidths = x
|
||||
|
||||
def _get_param_names(self):
|
||||
if self.num_params == 3:
|
||||
return ['variance','frequency','bandwidth']
|
||||
else:
|
||||
return ['variance']+['frequency_%i'%i for i in range(self.input_dim)]+['bandwidth_%i'%i for i in range(self.input_dim)]
|
||||
|
||||
def K(self,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
target += self.variance*self._dvar
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
np.add(target,self.variance,target)
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
target[0] += np.sum(dL_dK*self._dvar)
|
||||
if self.ARD:
|
||||
for q in xrange(self.input_dim):
|
||||
target[q+1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.tan(2.*np.pi*self._dist[:,:,q]*self.frequencies[q])*self._dist[:,:,q])
|
||||
target[q+1+self.input_dim] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self._dist2[:,:,q])
|
||||
else:
|
||||
target[1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.sum(np.tan(2.*np.pi*self._dist*self.frequencies)*self._dist,-1))
|
||||
target[2] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self._dist2.sum(-1))
|
||||
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
#TODO!!!
|
||||
raise NotImplementedError
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
pass
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
if not (np.all(X==self._X) and np.all(X2==self._X2)):
|
||||
if X2 is None: X2 = X
|
||||
self._X = X.copy()
|
||||
self._X2 = X2.copy()
|
||||
|
||||
#do the distances: this will be high memory for large input_dim
|
||||
#NB: we don't take the abs of the dist because cos is symmetric
|
||||
self._dist = X[:,None,:] - X2[None,:,:]
|
||||
self._dist2 = np.square(self._dist)
|
||||
|
||||
#ensure the next section is computed:
|
||||
self._params = np.empty(self.num_params)
|
||||
|
||||
if not np.all(self._params == self._get_params()):
|
||||
self._params == self._get_params().copy()
|
||||
|
||||
self._rbf_part = np.exp(-2.*np.pi**2*np.sum(self._dist2*self.bandwidths,-1))
|
||||
self._cos_part = np.prod(np.cos(2.*np.pi*self._dist*self.frequencies),-1)
|
||||
self._dvar = self._rbf_part*self._cos_part
|
||||
|
||||
58
GPy/kern/parts/spline.py
Normal file
58
GPy/kern/parts/spline.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
def theta(x):
|
||||
"""Heaviside step function"""
|
||||
return np.where(x>=0.,1.,0.)
|
||||
|
||||
class Spline(Kernpart):
|
||||
"""
|
||||
Spline kernel
|
||||
|
||||
:param input_dim: the number of input dimensions (fixed to 1 right now TODO)
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,input_dim,variance=1.,lengthscale=1.):
|
||||
self.input_dim = input_dim
|
||||
assert self.input_dim==1
|
||||
self.num_params = 1
|
||||
self.name = 'spline'
|
||||
self._set_params(np.squeeze(variance))
|
||||
|
||||
def _get_params(self):
|
||||
return self.variance
|
||||
|
||||
def _set_params(self,x):
|
||||
self.variance = x
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['variance']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
assert np.all(X>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
|
||||
assert np.all(X2>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
|
||||
t = X
|
||||
s = X2.T
|
||||
s_t = s-t # broadcasted subtraction
|
||||
target += self.variance*(0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
target += self.variance*X.flatten()**3/3.
|
||||
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
|
||||
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
target += X.flatten()**3/3.
|
||||
|
||||
def dKdiag_dX(self,X,target):
|
||||
target += self.variance*X**2
|
||||
|
||||
92
GPy/kern/parts/symmetric.py
Normal file
92
GPy/kern/parts/symmetric.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
# Copyright (c) 2012 James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
|
||||
class Symmetric(Kernpart):
|
||||
"""
|
||||
Symmetrical kernels
|
||||
|
||||
:param k: the kernel to symmetrify
|
||||
:type k: Kernpart
|
||||
:param transform: the transform to use in symmetrification (allows symmetry on specified axes)
|
||||
:type transform: A numpy array (input_dim x input_dim) specifiying the transform
|
||||
:rtype: Kernpart
|
||||
|
||||
"""
|
||||
def __init__(self,k,transform=None):
|
||||
if transform is None:
|
||||
transform = np.eye(k.input_dim)*-1.
|
||||
assert transform.shape == (k.input_dim, k.input_dim)
|
||||
self.transform = transform
|
||||
self.input_dim = k.input_dim
|
||||
self.num_params = k.num_params
|
||||
self.name = k.name + '_symm'
|
||||
self.k = k
|
||||
self._set_params(k._get_params())
|
||||
|
||||
def _get_params(self):
|
||||
"""return the value of the parameters."""
|
||||
return self.k._get_params()
|
||||
|
||||
def _set_params(self,x):
|
||||
"""set the value of the parameters."""
|
||||
self.k._set_params(x)
|
||||
|
||||
def _get_param_names(self):
|
||||
"""return parameter names."""
|
||||
return self.k._get_param_names()
|
||||
|
||||
def K(self,X,X2,target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
AX = np.dot(X,self.transform)
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
AX2 = AX
|
||||
else:
|
||||
AX2 = np.dot(X2, self.transform)
|
||||
self.k.K(X,X2,target)
|
||||
self.k.K(AX,X2,target)
|
||||
self.k.K(X,AX2,target)
|
||||
self.k.K(AX,AX2,target)
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
AX = np.dot(X,self.transform)
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
ZX2 = AX
|
||||
else:
|
||||
AX2 = np.dot(X2, self.transform)
|
||||
self.k.dK_dtheta(dL_dK,X,X2,target)
|
||||
self.k.dK_dtheta(dL_dK,AX,X2,target)
|
||||
self.k.dK_dtheta(dL_dK,X,AX2,target)
|
||||
self.k.dK_dtheta(dL_dK,AX,AX2,target)
|
||||
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
AX = np.dot(X,self.transform)
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
ZX2 = AX
|
||||
else:
|
||||
AX2 = np.dot(X2, self.transform)
|
||||
self.k.dK_dX(dL_dK, X, X2, target)
|
||||
self.k.dK_dX(dL_dK, AX, X2, target)
|
||||
self.k.dK_dX(dL_dK, X, AX2, target)
|
||||
self.k.dK_dX(dL_dK, AX ,AX2, target)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
foo = np.zeros((X.shape[0],X.shape[0]))
|
||||
self.K(X,X,foo)
|
||||
target += np.diag(foo)
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
raise NotImplementedError
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
raise NotImplementedError
|
||||
10
GPy/kern/parts/sympy_helpers.cpp
Normal file
10
GPy/kern/parts/sympy_helpers.cpp
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
#include <math.h>
|
||||
double DiracDelta(double x){
|
||||
if((x<0.000001) & (x>-0.000001))//go on, laught at my c++ skills
|
||||
return 1.0;
|
||||
else
|
||||
return 0.0;
|
||||
};
|
||||
double DiracDelta(double x,int foo){
|
||||
return 0.0;
|
||||
};
|
||||
3
GPy/kern/parts/sympy_helpers.h
Normal file
3
GPy/kern/parts/sympy_helpers.h
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
#include <math.h>
|
||||
double DiracDelta(double x);
|
||||
double DiracDelta(double x, int foo);
|
||||
258
GPy/kern/parts/sympykern.py
Normal file
258
GPy/kern/parts/sympykern.py
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
import numpy as np
|
||||
import sympy as sp
|
||||
from sympy.utilities.codegen import codegen
|
||||
from sympy.core.cache import clear_cache
|
||||
from scipy import weave
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
|
||||
import tempfile
|
||||
import pdb
|
||||
from kernpart import Kernpart
|
||||
|
||||
class spkern(Kernpart):
|
||||
"""
|
||||
A kernel object, where all the hard work in done by sympy.
|
||||
|
||||
:param k: the covariance function
|
||||
:type k: a positive definite sympy function of x1, z1, x2, z2...
|
||||
|
||||
To construct a new sympy kernel, you'll need to define:
|
||||
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
|
||||
- that's it! we'll extract the variables from the function k.
|
||||
|
||||
Note:
|
||||
- to handle multiple inputs, call them x1, z1, etc
|
||||
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
|
||||
"""
|
||||
def __init__(self,input_dim,k,param=None):
|
||||
self.name='sympykern'
|
||||
self._sp_k = k
|
||||
sp_vars = [e for e in k.atoms() if e.is_Symbol]
|
||||
self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:]))
|
||||
self._sp_z= sorted([e for e in sp_vars if e.name[0]=='z'],key=lambda z:int(z.name[1:]))
|
||||
assert all([x.name=='x%i'%i for i,x in enumerate(self._sp_x)])
|
||||
assert all([z.name=='z%i'%i for i,z in enumerate(self._sp_z)])
|
||||
assert len(self._sp_x)==len(self._sp_z)
|
||||
self.input_dim = len(self._sp_x)
|
||||
assert self.input_dim == input_dim
|
||||
self._sp_theta = sorted([e for e in sp_vars if not (e.name[0]=='x' or e.name[0]=='z')],key=lambda e:e.name)
|
||||
self.num_params = len(self._sp_theta)
|
||||
|
||||
#deal with param
|
||||
if param is None:
|
||||
param = np.ones(self.num_params)
|
||||
assert param.size==self.num_params
|
||||
self._set_params(param)
|
||||
|
||||
#Differentiate!
|
||||
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
|
||||
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
|
||||
#self._sp_dk_dz = [sp.diff(k,zi) for zi in self._sp_z]
|
||||
|
||||
#self.compute_psi_stats()
|
||||
self._gen_code()
|
||||
|
||||
self.weave_kwargs = {\
|
||||
'support_code':self._function_code,\
|
||||
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],\
|
||||
'headers':['"sympy_helpers.h"'],\
|
||||
'sources':[os.path.join(current_dir,"kern/sympy_helpers.cpp")],\
|
||||
#'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\
|
||||
'extra_compile_args':[],\
|
||||
'extra_link_args':['-lgomp'],\
|
||||
'verbose':True}
|
||||
|
||||
def __add__(self,other):
|
||||
return spkern(self._sp_k+other._sp_k)
|
||||
|
||||
def compute_psi_stats(self):
|
||||
#define some normal distributions
|
||||
mus = [sp.var('mu%i'%i,real=True) for i in range(self.input_dim)]
|
||||
Ss = [sp.var('S%i'%i,positive=True) for i in range(self.input_dim)]
|
||||
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
|
||||
|
||||
#do some integration!
|
||||
#self._sp_psi0 = ??
|
||||
self._sp_psi1 = self._sp_k
|
||||
for i in range(self.input_dim):
|
||||
print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
|
||||
sys.stdout.flush()
|
||||
self._sp_psi1 *= normals[i]
|
||||
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
|
||||
clear_cache()
|
||||
self._sp_psi1 = self._sp_psi1.simplify()
|
||||
|
||||
#and here's psi2 (eek!)
|
||||
zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
|
||||
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
|
||||
for i in range(self.input_dim):
|
||||
print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
|
||||
sys.stdout.flush()
|
||||
self._sp_psi2 *= normals[i]
|
||||
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
|
||||
clear_cache()
|
||||
self._sp_psi2 = self._sp_psi2.simplify()
|
||||
|
||||
|
||||
def _gen_code(self):
|
||||
#generate c functions from sympy objects
|
||||
(foo_c,self._function_code),(foo_h,self._function_header) = \
|
||||
codegen([('k',self._sp_k)] \
|
||||
+ [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]\
|
||||
#+ [('dk_d%s'%z.name,dz) for z,dz in zip(self._sp_z,self._sp_dk_dz)]\
|
||||
+ [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]\
|
||||
,"C",'foobar',argument_sequence=self._sp_x+self._sp_z+self._sp_theta)
|
||||
#put the header file where we can find it
|
||||
f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w')
|
||||
f.write(self._function_header)
|
||||
f.close()
|
||||
|
||||
#get rid of derivatives of DiracDelta
|
||||
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
|
||||
|
||||
#Here's some code to do the looping for K
|
||||
arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]\
|
||||
+ ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]\
|
||||
+ ["param[%i]"%i for i in range(self.num_params)])
|
||||
|
||||
self._K_code =\
|
||||
"""
|
||||
int i;
|
||||
int j;
|
||||
int N = target_array->dimensions[0];
|
||||
int num_inducing = target_array->dimensions[1];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for private(j)
|
||||
for (i=0;i<N;i++){
|
||||
for (j=0;j<num_inducing;j++){
|
||||
target[i*num_inducing+j] = k(%s);
|
||||
}
|
||||
}
|
||||
%s
|
||||
"""%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
diag_arglist = re.sub('Z','X',arglist)
|
||||
diag_arglist = re.sub('j','i',diag_arglist)
|
||||
#Here's some code to do the looping for Kdiag
|
||||
self._Kdiag_code =\
|
||||
"""
|
||||
int i;
|
||||
int N = target_array->dimensions[0];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for
|
||||
for (i=0;i<N;i++){
|
||||
target[i] = k(%s);
|
||||
}
|
||||
%s
|
||||
"""%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
#here's some code to compute gradients
|
||||
funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in enumerate(self._sp_theta)])
|
||||
self._dK_dtheta_code =\
|
||||
"""
|
||||
int i;
|
||||
int j;
|
||||
int N = partial_array->dimensions[0];
|
||||
int num_inducing = partial_array->dimensions[1];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for private(j)
|
||||
for (i=0;i<N;i++){
|
||||
for (j=0;j<num_inducing;j++){
|
||||
%s
|
||||
}
|
||||
}
|
||||
%s
|
||||
"""%(funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
#here's some code to compute gradients for Kdiag TODO: thius is yucky.
|
||||
diag_funclist = re.sub('Z','X',funclist,count=0)
|
||||
diag_funclist = re.sub('j','i',diag_funclist)
|
||||
diag_funclist = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_funclist)
|
||||
self._dKdiag_dtheta_code =\
|
||||
"""
|
||||
int i;
|
||||
int N = partial_array->dimensions[0];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
for (i=0;i<N;i++){
|
||||
%s
|
||||
}
|
||||
%s
|
||||
"""%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
#Here's some code to do gradients wrt x
|
||||
gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.input_dim)])
|
||||
self._dK_dX_code = \
|
||||
"""
|
||||
int i;
|
||||
int j;
|
||||
int N = partial_array->dimensions[0];
|
||||
int num_inducing = partial_array->dimensions[1];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for private(j)
|
||||
for (i=0;i<N; i++){
|
||||
for (j=0; j<num_inducing; j++){
|
||||
%s
|
||||
//if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
|
||||
//if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}
|
||||
|
||||
}
|
||||
}
|
||||
%s
|
||||
"""%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
#now for gradients of Kdiag wrt X
|
||||
self._dKdiag_dX_code= \
|
||||
"""
|
||||
int i;
|
||||
int j;
|
||||
int N = partial_array->dimensions[0];
|
||||
int num_inducing = 0;
|
||||
int input_dim = X_array->dimensions[1];
|
||||
for (i=0;i<N; i++){
|
||||
j = i;
|
||||
%s
|
||||
}
|
||||
%s
|
||||
"""%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
|
||||
#TODO: insert multiple functions here via string manipulation
|
||||
#TODO: similar functions for psi_stats
|
||||
|
||||
def K(self,X,Z,target):
|
||||
param = self._param
|
||||
weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
param = self._param
|
||||
weave.inline(self._Kdiag_code,arg_names=['target','X','param'],**self.weave_kwargs)
|
||||
|
||||
def dK_dtheta(self,partial,X,Z,target):
|
||||
param = self._param
|
||||
weave.inline(self._dK_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||
|
||||
def dKdiag_dtheta(self,partial,X,target):
|
||||
param = self._param
|
||||
Z = X
|
||||
weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||
|
||||
def dK_dX(self,partial,X,Z,target):
|
||||
param = self._param
|
||||
weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||
|
||||
def dKdiag_dX(self,partial,X,target):
|
||||
param = self._param
|
||||
Z = X
|
||||
weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||
|
||||
def _set_params(self,param):
|
||||
#print param.flags['C_CONTIGUOUS']
|
||||
self._param = param.copy()
|
||||
|
||||
def _get_params(self):
|
||||
return self._param
|
||||
|
||||
def _get_param_names(self):
|
||||
return [x.name for x in self._sp_theta]
|
||||
84
GPy/kern/parts/white.py
Normal file
84
GPy/kern/parts/white.py
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
|
||||
class White(Kernpart):
|
||||
"""
|
||||
White noise kernel.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance:
|
||||
:type variance: float
|
||||
"""
|
||||
def __init__(self,input_dim,variance=1.):
|
||||
self.input_dim = input_dim
|
||||
self.num_params = 1
|
||||
self.name = 'white'
|
||||
self._set_params(np.array([variance]).flatten())
|
||||
self._psi1 = 0 # TODO: more elegance here
|
||||
|
||||
def _get_params(self):
|
||||
return self.variance
|
||||
|
||||
def _set_params(self,x):
|
||||
assert x.shape==(1,)
|
||||
self.variance = x
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['variance']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
if X2 is None:
|
||||
target += np.eye(X.shape[0])*self.variance
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
target += self.variance
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
if X2 is None:
|
||||
target += np.trace(dL_dK)
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
target += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
pass
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
pass
|
||||
|
||||
def psi0(self,Z,mu,S,target):
|
||||
target += self.variance
|
||||
|
||||
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
|
||||
target += dL_dpsi0.sum()
|
||||
|
||||
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
|
||||
pass
|
||||
|
||||
def psi1(self,Z,mu,S,target):
|
||||
pass
|
||||
|
||||
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
|
||||
pass
|
||||
|
||||
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
|
||||
pass
|
||||
|
||||
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
|
||||
pass
|
||||
|
||||
def psi2(self,Z,mu,S,target):
|
||||
pass
|
||||
|
||||
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
|
||||
pass
|
||||
|
||||
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
|
||||
pass
|
||||
|
||||
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
|
||||
pass
|
||||
Loading…
Add table
Add a link
Reference in a new issue