mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-09 03:52:39 +02:00
adding and producting in stationary is no stationary
This commit is contained in:
parent
efd262965e
commit
1766db89fe
6 changed files with 154 additions and 91 deletions
|
|
@ -1,11 +1,12 @@
|
|||
from _src.rbf import RBF
|
||||
from _src.kern import Kern
|
||||
from _src.rbf import RBF
|
||||
from _src.linear import Linear
|
||||
from _src.static import Bias, White
|
||||
from _src.brownian import Brownian
|
||||
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
||||
from _src.mlp import MLP
|
||||
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
|
||||
from _src.independent_outputs import IndependentOutputs
|
||||
#import coregionalize
|
||||
#import eq_ode1
|
||||
#import finite_dimensional
|
||||
|
|
@ -13,7 +14,6 @@ from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern5
|
|||
#import gibbs
|
||||
#import hetero
|
||||
#import hierarchical
|
||||
#import independent_outputs
|
||||
#import ODE_1
|
||||
#import periodic_exponential
|
||||
#import periodic_Matern32
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
from kern import Kern
|
||||
import numpy as np
|
||||
|
||||
def index_to_slices(index):
|
||||
|
|
@ -31,67 +31,89 @@ def index_to_slices(index):
|
|||
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
|
||||
return ret
|
||||
|
||||
class IndependentOutputs(Kernpart):
|
||||
class IndependentOutputs(Kern):
|
||||
"""
|
||||
A kernel part shich can reopresent several independent functions.
|
||||
A kernel which can reopresent several independent functions.
|
||||
this kernel 'switches off' parts of the matrix where the output indexes are different.
|
||||
|
||||
The index of the functions is given by the last column in the input X
|
||||
the rest of the columns of X are passed to the kernel for computation (in blocks).
|
||||
the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
|
||||
|
||||
"""
|
||||
def __init__(self,k):
|
||||
self.input_dim = k.input_dim + 1
|
||||
self.num_params = k.num_params
|
||||
self.name = 'iops('+ k.name + ')'
|
||||
self.k = k
|
||||
def __init__(self, kern, name='independ'):
|
||||
super(IndependentOutputs, self).__init__(kern.input_dim+1, name)
|
||||
self.kern = kern
|
||||
self.add_parameters(self.kern)
|
||||
|
||||
def _get_params(self):
|
||||
return self.k._get_params()
|
||||
|
||||
def _set_params(self,x):
|
||||
self.k._set_params(x)
|
||||
self.params = x
|
||||
|
||||
def _get_param_names(self):
|
||||
return self.k._get_param_names()
|
||||
|
||||
def K(self,X,X2,target):
|
||||
#Sort out the slices from the input data
|
||||
def K(self,X ,X2=None):
|
||||
X, slices = X[:,:-1], index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
target = np.zeros((X.shape[0], X.shape[0]))
|
||||
[[np.copyto(target[s,s], self.kern.K(X[s], None)) for s in slices_i] for slices_i in slices]
|
||||
else:
|
||||
X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||
[[[np.copyto(target[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
return target
|
||||
|
||||
[[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
def Kdiag(self,X):
|
||||
X, slices = X[:,:-1], index_to_slices(X[:,-1])
|
||||
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
||||
target = np.zeros(X.shape[0])
|
||||
[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices]
|
||||
return target
|
||||
|
||||
def update_gradients_full(self,dL_dK,X,X2=None):
|
||||
target = np.zeros(self.kern.size)
|
||||
def collate_grads(dL, X, X2):
|
||||
self.kern.update_gradients_full(dL,X,X2)
|
||||
self.kern._collect_gradient(target)
|
||||
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
[[collate_grads(dL_dK[s,s], X[s], None) for s in slices_i] for slices_i in slices]
|
||||
else:
|
||||
X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1])
|
||||
[[[self.k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
[[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
|
||||
self.kern._set_gradient(target)
|
||||
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK, X, X2=None):
|
||||
target = np.zeros_like(X)
|
||||
X, slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
[[np.copyto(target[s,:-1], self.kern.gradients_X(dL_dK[s,s],X[s],None)) for s in slices_i] for slices_i in slices]
|
||||
else:
|
||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
[[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
[[[np.copyto(target[s,:-1], self.kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
return target
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
X, slices = X[:,:-1], index_to_slices(X[:,-1])
|
||||
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
|
||||
target = np.zeros(X.shape)
|
||||
[[np.copyto(target[s,:-1], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices]
|
||||
return target
|
||||
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
def update_gradients_diag(self,dL_dKdiag,X,target):
|
||||
target = np.zeros(self.kern.size)
|
||||
def collate_grads(dL, X):
|
||||
self.kern.update_gradients_diag(dL,X)
|
||||
self.kern._collect_gradient(target)
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]
|
||||
[[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices]
|
||||
self.kern._set_gradient(target)
|
||||
|
||||
def Hierarchical(kern_f, kern_g, name='hierarchy'):
|
||||
"""
|
||||
A kernel which can reopresent a simple hierarchical model.
|
||||
|
||||
See Hensman et al 2013, "Hierarchical Bayesian modelling of gene expression time
|
||||
series across irregularly sampled replicates and clusters"
|
||||
http://www.biomedcentral.com/1471-2105/14/252
|
||||
|
||||
The index of the functions is given by the last column in the input X
|
||||
the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
|
||||
|
||||
"""
|
||||
assert kern_f.input_dim == kern_g.input_dim
|
||||
return kern_f + IndependentOutputs(kern_g)
|
||||
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ class Kern(Parameterized):
|
|||
""" Here we overload the '*' operator. See self.prod for more information"""
|
||||
return self.prod(other)
|
||||
|
||||
def __pow__(self, other, tensor=False):
|
||||
def __pow__(self, other):
|
||||
"""
|
||||
Shortcut for tensor `prod`.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -8,6 +8,16 @@ from ...core.parameterization.transformations import Logexp
|
|||
import numpy as np
|
||||
|
||||
class Static(Kern):
|
||||
def __init__(self, input_dim, variance, name):
|
||||
super(Static, self).__init__(input_dim, name)
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
self.add_parameters(self.variance)
|
||||
|
||||
def Kdiag(self, X):
|
||||
ret = np.empty((X.shape[0],), dtype=np.float64)
|
||||
ret[:] = self.variance
|
||||
return ret
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
return np.zeros(X.shape)
|
||||
|
||||
|
|
@ -34,9 +44,6 @@ class Static(Kern):
|
|||
class White(Static):
|
||||
def __init__(self, input_dim, variance=1., name='white'):
|
||||
super(White, self).__init__(input_dim, name)
|
||||
self.input_dim = input_dim
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
self.add_parameters(self.variance)
|
||||
|
||||
def K(self, X, X2=None):
|
||||
if X2 is None:
|
||||
|
|
@ -44,11 +51,6 @@ class White(Static):
|
|||
else:
|
||||
return np.zeros((X.shape[0], X2.shape[0]))
|
||||
|
||||
def Kdiag(self, X):
|
||||
ret = np.ones(X.shape[0])
|
||||
ret[:] = self.variance
|
||||
return ret
|
||||
|
||||
def psi2(self, Z, mu, S, target):
|
||||
return np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
|
||||
|
|
@ -63,10 +65,8 @@ class White(Static):
|
|||
|
||||
|
||||
class Bias(Static):
|
||||
def __init__(self, input_dim, variance=1., name=None):
|
||||
def __init__(self, input_dim, variance=1., name='bias'):
|
||||
super(Bias, self).__init__(input_dim, name)
|
||||
self.variance = Param("variance", variance, Logexp())
|
||||
self.add_parameter(self.variance)
|
||||
|
||||
def K(self, X, X2=None):
|
||||
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
|
||||
|
|
@ -74,11 +74,6 @@ class Bias(Static):
|
|||
ret[:] = self.variance
|
||||
return ret
|
||||
|
||||
def Kdiag(self, X):
|
||||
ret = np.empty((X.shape[0],), dtype=np.float64)
|
||||
ret[:] = self.variance
|
||||
return ret
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
self.variance.gradient = dL_dK.sum()
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,13 @@ class Stationary(Kern):
|
|||
assert self.variance.size==1
|
||||
self.add_parameters(self.variance, self.lengthscale)
|
||||
|
||||
def K_of_r(self, r):
|
||||
raise NotImplementedError, "implement the covaraiance functino and a fn of r to use this class"
|
||||
|
||||
def K(self, X, X2=None):
|
||||
r = self._scaled_dist(X, X2)
|
||||
return self.K_of_r(r)
|
||||
|
||||
def _dist(self, X, X2):
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
|
|
@ -50,11 +57,11 @@ class Stationary(Kern):
|
|||
self.lengthscale.gradient = 0.
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
K = self.K(X, X2)
|
||||
self.variance.gradient = np.sum(K * dL_dK)/self.variance
|
||||
r = self._scaled_dist(X, X2)
|
||||
K = self.K_of_r(r)
|
||||
|
||||
rinv = self._inv_dist(X, X2)
|
||||
dL_dr = self.dK_dr(X, X2) * dL_dK
|
||||
dL_dr = self.dK_dr(r) * dL_dK
|
||||
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3
|
||||
|
||||
if self.ARD:
|
||||
|
|
@ -62,6 +69,8 @@ class Stationary(Kern):
|
|||
else:
|
||||
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum()
|
||||
|
||||
self.variance.gradient = np.sum(K * dL_dK)/self.variance
|
||||
|
||||
def _inv_dist(self, X, X2=None):
|
||||
dist = self._scaled_dist(X, X2)
|
||||
if X2 is None:
|
||||
|
|
@ -72,7 +81,8 @@ class Stationary(Kern):
|
|||
return 1./np.where(dist != 0., dist, np.inf)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
dL_dr = self.dK_dr(X, X2) * dL_dK
|
||||
r = self._scaled_dist(X, X2)
|
||||
dL_dr = self.dK_dr(r) * dL_dK
|
||||
invdist = self._inv_dist(X, X2)
|
||||
ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2
|
||||
if X2 is None:
|
||||
|
|
@ -82,19 +92,65 @@ class Stationary(Kern):
|
|||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
return np.zeros(X.shape)
|
||||
|
||||
def add(self, other, tensor=False):
|
||||
if not tensor:
|
||||
return StatAdd(self, other)
|
||||
else:
|
||||
return super(Stationary, self).add(other, tensor)
|
||||
|
||||
def prod(self, other, tensor=False):
|
||||
if not tensor:
|
||||
return StatProd(self, other)
|
||||
else:
|
||||
return super(Stationary, self).prod(other, tensor)
|
||||
|
||||
|
||||
|
||||
class StatAdd(Stationary):
|
||||
"""
|
||||
Addition of two Stationary kernels on the same space is still stationary.
|
||||
|
||||
If you need to add two (stationary) kernels on separate spaces, use the generic add class.
|
||||
"""
|
||||
def __init__(self, k1, k2):
|
||||
assert isinstance(k1, Stationary)
|
||||
assert isinstance(k2, Stationary)
|
||||
self.k1, self.k2 = k1, k2
|
||||
self.add_parameters(k1, k2)
|
||||
|
||||
def K_of_r(self, r):
|
||||
return self.k1.K(r) + self.k2.K(r)
|
||||
|
||||
def dK_dr(self, r):
|
||||
return self.k1.dK_dr + self.k2.dK_dr(r)
|
||||
|
||||
class StatProd(Stationary):
|
||||
"""
|
||||
Product of two Stationary kernels on the same space is still stationary.
|
||||
|
||||
If you need to multiply two (stationary) kernels on separate spaces, use the generic Prod class.
|
||||
"""
|
||||
def __init__(self, k1, k2):
|
||||
assert isinstance(k1, Stationary)
|
||||
assert isinstance(k2, Stationary)
|
||||
self.k1, self.k2 = k1, k2
|
||||
self.add_parameters(k1, k2)
|
||||
|
||||
def K_of_r(self, r):
|
||||
return self.k1.K(r) * self.k2.K(r)
|
||||
|
||||
def dK_dr(self, r):
|
||||
return self.k1.dK_dr(r) * self.k2.K_of_r(r) + self.k2.dK_dr(r) * self.k1.K_of_r(r)
|
||||
|
||||
class Exponential(Stationary):
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'):
|
||||
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||
|
||||
def K(self, X, X2=None):
|
||||
dist = self._scaled_dist(X, X2)
|
||||
return self.variance * np.exp(-0.5 * dist)
|
||||
|
||||
def dK_dr(self, X, X2):
|
||||
return -0.5*self.K(X, X2)
|
||||
def dK_dr(self, r):
|
||||
return -0.5*self.K_of_r(r)
|
||||
|
||||
class Matern32(Stationary):
|
||||
"""
|
||||
|
|
@ -109,13 +165,11 @@ class Matern32(Stationary):
|
|||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'):
|
||||
super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||
|
||||
def K(self, X, X2=None):
|
||||
dist = self._scaled_dist(X, X2)
|
||||
return self.variance * (1. + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist)
|
||||
def K_of_r(self, r):
|
||||
return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
|
||||
|
||||
def dK_dr(self, X, X2):
|
||||
dist = self._scaled_dist(X, X2)
|
||||
return -3.*self.variance*dist*np.exp(-np.sqrt(3.)*dist)
|
||||
def dK_dr(self,r):
|
||||
return -3.*self.variance*r*np.exp(-np.sqrt(3.)*r)
|
||||
|
||||
def Gram_matrix(self, F, F1, F2, lower, upper):
|
||||
"""
|
||||
|
|
@ -153,12 +207,10 @@ class Matern52(Stationary):
|
|||
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
"""
|
||||
|
||||
def K(self, X, X2=None):
|
||||
r = self._scaled_dist(X, X2)
|
||||
def K_of_r(self, r):
|
||||
return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
|
||||
|
||||
def dK_dr(self, X, X2):
|
||||
r = self._scaled_dist(X, X2)
|
||||
def dK_dr(self, r):
|
||||
return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r)
|
||||
|
||||
def Gram_matrix(self,F,F1,F2,F3,lower,upper):
|
||||
|
|
@ -197,24 +249,20 @@ class ExpQuad(Stationary):
|
|||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'):
|
||||
super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||
|
||||
def K(self, X, X2=None):
|
||||
r = self._scaled_dist(X, X2)
|
||||
def K_of_r(self, r):
|
||||
return self.variance * np.exp(-0.5 * r**2)
|
||||
|
||||
def dK_dr(self, X, X2):
|
||||
dist = self._scaled_dist(X, X2)
|
||||
return -dist*self.K(X, X2)
|
||||
def dK_dr(self, r):
|
||||
return -r*self.K_of_r(r)
|
||||
|
||||
class Cosine(Stationary):
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Cosine'):
|
||||
super(Cosine, self).__init__(input_dim, variance, lengthscale, ARD, name)
|
||||
|
||||
def K(self, X, X2=None):
|
||||
r = self._scaled_dist(X, X2)
|
||||
def K_of_r(self, r)
|
||||
return self.variance * np.cos(r)
|
||||
|
||||
def dK_dr(self, X, X2):
|
||||
r = self._scaled_dist(X, X2)
|
||||
def dK_dr(self, r):
|
||||
return -self.variance * np.sin(r)
|
||||
|
||||
|
||||
|
|
@ -234,12 +282,10 @@ class RatQuad(Stationary):
|
|||
self.power = Param('power', power, Logexp())
|
||||
self.add_parameters(self.power)
|
||||
|
||||
def K(self, X, X2=None):
|
||||
r = self._scaled_dist(X, X2)
|
||||
def K_of_r(self, r)
|
||||
return self.variance*(1. + r**2/2.)**(-self.power)
|
||||
|
||||
def dK_dr(self, X, X2):
|
||||
r = self._scaled_dist(X, X2)
|
||||
def dK_dr(self, r):
|
||||
return -self.variance*self.power*r*(1. + r**2/2)**(-self.power - 1.)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue