adding and producting in stationary is no stationary

This commit is contained in:
James Hensman 2014-02-24 13:51:03 +00:00
parent efd262965e
commit 1766db89fe
6 changed files with 154 additions and 91 deletions

View file

@ -1,11 +1,12 @@
from _src.rbf import RBF
from _src.kern import Kern from _src.kern import Kern
from _src.rbf import RBF
from _src.linear import Linear from _src.linear import Linear
from _src.static import Bias, White from _src.static import Bias, White
from _src.brownian import Brownian from _src.brownian import Brownian
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from _src.mlp import MLP from _src.mlp import MLP
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
from _src.independent_outputs import IndependentOutputs
#import coregionalize #import coregionalize
#import eq_ode1 #import eq_ode1
#import finite_dimensional #import finite_dimensional
@ -13,7 +14,6 @@ from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern5
#import gibbs #import gibbs
#import hetero #import hetero
#import hierarchical #import hierarchical
#import independent_outputs
#import ODE_1 #import ODE_1
#import periodic_exponential #import periodic_exponential
#import periodic_Matern32 #import periodic_Matern32

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart from kern import Kern
import numpy as np import numpy as np
def index_to_slices(index): def index_to_slices(index):
@ -31,67 +31,89 @@ def index_to_slices(index):
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))] [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret return ret
class IndependentOutputs(Kernpart): class IndependentOutputs(Kern):
""" """
A kernel part shich can reopresent several independent functions. A kernel which can reopresent several independent functions.
this kernel 'switches off' parts of the matrix where the output indexes are different. this kernel 'switches off' parts of the matrix where the output indexes are different.
The index of the functions is given by the last column in the input X The index of the functions is given by the last column in the input X
the rest of the columns of X are passed to the kernel for computation (in blocks). the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
""" """
def __init__(self,k): def __init__(self, kern, name='independ'):
self.input_dim = k.input_dim + 1 super(IndependentOutputs, self).__init__(kern.input_dim+1, name)
self.num_params = k.num_params self.kern = kern
self.name = 'iops('+ k.name + ')' self.add_parameters(self.kern)
self.k = k
def _get_params(self): def K(self,X ,X2=None):
return self.k._get_params() X, slices = X[:,:-1], index_to_slices(X[:,-1])
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[[np.copyto(target[s,s], self.kern.K(X[s], None)) for s in slices_i] for slices_i in slices]
else:
X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
target = np.zeros((X.shape[0], X2.shape[0]))
[[[np.copyto(target[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
return target
def _set_params(self,x): def Kdiag(self,X):
self.k._set_params(x) X, slices = X[:,:-1], index_to_slices(X[:,-1])
self.params = x target = np.zeros(X.shape[0])
[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices]
return target
def _get_param_names(self): def update_gradients_full(self,dL_dK,X,X2=None):
return self.k._get_param_names() target = np.zeros(self.kern.size)
def collate_grads(dL, X, X2):
self.kern.update_gradients_full(dL,X,X2)
self.kern._collect_gradient(target)
def K(self,X,X2,target):
#Sort out the slices from the input data
X,slices = X[:,:-1],index_to_slices(X[:,-1]) X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None: if X2 is None:
X2,slices2 = X,slices [[collate_grads(dL_dK[s,s], X[s], None) for s in slices_i] for slices_i in slices]
else: else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1])
[[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
[[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] self.kern._set_gradient(target)
def Kdiag(self,X,target): def gradients_X(self,dL_dK, X, X2=None):
X,slices = X[:,:-1],index_to_slices(X[:,-1]) target = np.zeros_like(X)
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices] X, slices = X[:,:-1],index_to_slices(X[:,-1])
def _param_grad_helper(self,dL_dK,X,X2,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None: if X2 is None:
X2,slices2 = X,slices [[np.copyto(target[s,:-1], self.kern.gradients_X(dL_dK[s,s],X[s],None)) for s in slices_i] for slices_i in slices]
else: else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] [[[np.copyto(target[s,:-1], self.kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
return target
def gradients_X_diag(self, dL_dKdiag, X):
X, slices = X[:,:-1], index_to_slices(X[:,-1])
target = np.zeros(X.shape)
[[np.copyto(target[s,:-1], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices]
return target
def gradients_X(self,dL_dK,X,X2,target): def update_gradients_diag(self,dL_dKdiag,X,target):
target = np.zeros(self.kern.size)
def collate_grads(dL, X):
self.kern.update_gradients_diag(dL,X)
self.kern._collect_gradient(target)
X,slices = X[:,:-1],index_to_slices(X[:,-1]) X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None: [[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices]
X2,slices2 = X,slices self.kern._set_gradient(target)
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def dKdiag_dX(self,dL_dKdiag,X,target): def Hierarchical(kern_f, kern_g, name='hierarchy'):
X,slices = X[:,:-1],index_to_slices(X[:,-1]) """
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices] A kernel which can reopresent a simple hierarchical model.
See Hensman et al 2013, "Hierarchical Bayesian modelling of gene expression time
series across irregularly sampled replicates and clusters"
http://www.biomedcentral.com/1471-2105/14/252
The index of the functions is given by the last column in the input X
the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
"""
assert kern_f.input_dim == kern_g.input_dim
return kern_f + IndependentOutputs(kern_g)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]

View file

@ -101,7 +101,7 @@ class Kern(Parameterized):
""" Here we overload the '*' operator. See self.prod for more information""" """ Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other) return self.prod(other)
def __pow__(self, other, tensor=False): def __pow__(self, other):
""" """
Shortcut for tensor `prod`. Shortcut for tensor `prod`.
""" """

View file

@ -8,6 +8,16 @@ from ...core.parameterization.transformations import Logexp
import numpy as np import numpy as np
class Static(Kern): class Static(Kern):
def __init__(self, input_dim, variance, name):
super(Static, self).__init__(input_dim, name)
self.variance = Param('variance', variance, Logexp())
self.add_parameters(self.variance)
def Kdiag(self, X):
ret = np.empty((X.shape[0],), dtype=np.float64)
ret[:] = self.variance
return ret
def gradients_X(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
return np.zeros(X.shape) return np.zeros(X.shape)
@ -34,9 +44,6 @@ class Static(Kern):
class White(Static): class White(Static):
def __init__(self, input_dim, variance=1., name='white'): def __init__(self, input_dim, variance=1., name='white'):
super(White, self).__init__(input_dim, name) super(White, self).__init__(input_dim, name)
self.input_dim = input_dim
self.variance = Param('variance', variance, Logexp())
self.add_parameters(self.variance)
def K(self, X, X2=None): def K(self, X, X2=None):
if X2 is None: if X2 is None:
@ -44,11 +51,6 @@ class White(Static):
else: else:
return np.zeros((X.shape[0], X2.shape[0])) return np.zeros((X.shape[0], X2.shape[0]))
def Kdiag(self, X):
ret = np.ones(X.shape[0])
ret[:] = self.variance
return ret
def psi2(self, Z, mu, S, target): def psi2(self, Z, mu, S, target):
return np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) return np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
@ -63,10 +65,8 @@ class White(Static):
class Bias(Static): class Bias(Static):
def __init__(self, input_dim, variance=1., name=None): def __init__(self, input_dim, variance=1., name='bias'):
super(Bias, self).__init__(input_dim, name) super(Bias, self).__init__(input_dim, name)
self.variance = Param("variance", variance, Logexp())
self.add_parameter(self.variance)
def K(self, X, X2=None): def K(self, X, X2=None):
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0]) shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
@ -74,11 +74,6 @@ class Bias(Static):
ret[:] = self.variance ret[:] = self.variance
return ret return ret
def Kdiag(self, X):
ret = np.empty((X.shape[0],), dtype=np.float64)
ret[:] = self.variance
return ret
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = dL_dK.sum() self.variance.gradient = dL_dK.sum()

View file

@ -32,6 +32,13 @@ class Stationary(Kern):
assert self.variance.size==1 assert self.variance.size==1
self.add_parameters(self.variance, self.lengthscale) self.add_parameters(self.variance, self.lengthscale)
def K_of_r(self, r):
raise NotImplementedError, "implement the covaraiance functino and a fn of r to use this class"
def K(self, X, X2=None):
r = self._scaled_dist(X, X2)
return self.K_of_r(r)
def _dist(self, X, X2): def _dist(self, X, X2):
if X2 is None: if X2 is None:
X2 = X X2 = X
@ -50,11 +57,11 @@ class Stationary(Kern):
self.lengthscale.gradient = 0. self.lengthscale.gradient = 0.
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
K = self.K(X, X2) r = self._scaled_dist(X, X2)
self.variance.gradient = np.sum(K * dL_dK)/self.variance K = self.K_of_r(r)
rinv = self._inv_dist(X, X2) rinv = self._inv_dist(X, X2)
dL_dr = self.dK_dr(X, X2) * dL_dK dL_dr = self.dK_dr(r) * dL_dK
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3 x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3
if self.ARD: if self.ARD:
@ -62,6 +69,8 @@ class Stationary(Kern):
else: else:
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum() self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum()
self.variance.gradient = np.sum(K * dL_dK)/self.variance
def _inv_dist(self, X, X2=None): def _inv_dist(self, X, X2=None):
dist = self._scaled_dist(X, X2) dist = self._scaled_dist(X, X2)
if X2 is None: if X2 is None:
@ -72,7 +81,8 @@ class Stationary(Kern):
return 1./np.where(dist != 0., dist, np.inf) return 1./np.where(dist != 0., dist, np.inf)
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
dL_dr = self.dK_dr(X, X2) * dL_dK r = self._scaled_dist(X, X2)
dL_dr = self.dK_dr(r) * dL_dK
invdist = self._inv_dist(X, X2) invdist = self._inv_dist(X, X2)
ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2 ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2
if X2 is None: if X2 is None:
@ -82,19 +92,65 @@ class Stationary(Kern):
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape) return np.zeros(X.shape)
def add(self, other, tensor=False):
if not tensor:
return StatAdd(self, other)
else:
return super(Stationary, self).add(other, tensor)
def prod(self, other, tensor=False):
if not tensor:
return StatProd(self, other)
else:
return super(Stationary, self).prod(other, tensor)
class StatAdd(Stationary):
"""
Addition of two Stationary kernels on the same space is still stationary.
If you need to add two (stationary) kernels on separate spaces, use the generic add class.
"""
def __init__(self, k1, k2):
assert isinstance(k1, Stationary)
assert isinstance(k2, Stationary)
self.k1, self.k2 = k1, k2
self.add_parameters(k1, k2)
def K_of_r(self, r):
return self.k1.K(r) + self.k2.K(r)
def dK_dr(self, r):
return self.k1.dK_dr + self.k2.dK_dr(r)
class StatProd(Stationary):
"""
Product of two Stationary kernels on the same space is still stationary.
If you need to multiply two (stationary) kernels on separate spaces, use the generic Prod class.
"""
def __init__(self, k1, k2):
assert isinstance(k1, Stationary)
assert isinstance(k2, Stationary)
self.k1, self.k2 = k1, k2
self.add_parameters(k1, k2)
def K_of_r(self, r):
return self.k1.K(r) * self.k2.K(r)
def dK_dr(self, r):
return self.k1.dK_dr(r) * self.k2.K_of_r(r) + self.k2.dK_dr(r) * self.k1.K_of_r(r)
class Exponential(Stationary): class Exponential(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'):
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name) super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name)
def K(self, X, X2=None): def K(self, X, X2=None):
dist = self._scaled_dist(X, X2)
return self.variance * np.exp(-0.5 * dist) return self.variance * np.exp(-0.5 * dist)
def dK_dr(self, X, X2): def dK_dr(self, r):
return -0.5*self.K(X, X2) return -0.5*self.K_of_r(r)
class Matern32(Stationary): class Matern32(Stationary):
""" """
@ -109,13 +165,11 @@ class Matern32(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'):
super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, name) super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, name)
def K(self, X, X2=None): def K_of_r(self, r):
dist = self._scaled_dist(X, X2) return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
return self.variance * (1. + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist)
def dK_dr(self, X, X2): def dK_dr(self,r):
dist = self._scaled_dist(X, X2) return -3.*self.variance*r*np.exp(-np.sqrt(3.)*r)
return -3.*self.variance*dist*np.exp(-np.sqrt(3.)*dist)
def Gram_matrix(self, F, F1, F2, lower, upper): def Gram_matrix(self, F, F1, F2, lower, upper):
""" """
@ -153,12 +207,10 @@ class Matern52(Stationary):
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
""" """
def K(self, X, X2=None): def K_of_r(self, r):
r = self._scaled_dist(X, X2)
return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r) return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
def dK_dr(self, X, X2): def dK_dr(self, r):
r = self._scaled_dist(X, X2)
return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r) return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r)
def Gram_matrix(self,F,F1,F2,F3,lower,upper): def Gram_matrix(self,F,F1,F2,F3,lower,upper):
@ -197,24 +249,20 @@ class ExpQuad(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'):
super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name) super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name)
def K(self, X, X2=None): def K_of_r(self, r):
r = self._scaled_dist(X, X2)
return self.variance * np.exp(-0.5 * r**2) return self.variance * np.exp(-0.5 * r**2)
def dK_dr(self, X, X2): def dK_dr(self, r):
dist = self._scaled_dist(X, X2) return -r*self.K_of_r(r)
return -dist*self.K(X, X2)
class Cosine(Stationary): class Cosine(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Cosine'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Cosine'):
super(Cosine, self).__init__(input_dim, variance, lengthscale, ARD, name) super(Cosine, self).__init__(input_dim, variance, lengthscale, ARD, name)
def K(self, X, X2=None): def K_of_r(self, r)
r = self._scaled_dist(X, X2)
return self.variance * np.cos(r) return self.variance * np.cos(r)
def dK_dr(self, X, X2): def dK_dr(self, r):
r = self._scaled_dist(X, X2)
return -self.variance * np.sin(r) return -self.variance * np.sin(r)
@ -234,12 +282,10 @@ class RatQuad(Stationary):
self.power = Param('power', power, Logexp()) self.power = Param('power', power, Logexp())
self.add_parameters(self.power) self.add_parameters(self.power)
def K(self, X, X2=None): def K_of_r(self, r)
r = self._scaled_dist(X, X2)
return self.variance*(1. + r**2/2.)**(-self.power) return self.variance*(1. + r**2/2.)**(-self.power)
def dK_dr(self, X, X2): def dK_dr(self, r):
r = self._scaled_dist(X, X2)
return -self.variance*self.power*r*(1. + r**2/2)**(-self.power - 1.) return -self.variance*self.power*r*(1. + r**2/2)**(-self.power - 1.)
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):