mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-07 02:52:40 +02:00
merge with upstream
This commit is contained in:
commit
ba74e29aee
115 changed files with 1178 additions and 531 deletions
|
|
@ -10,7 +10,7 @@ from .src.add import Add
|
|||
from .src.prod import Prod
|
||||
from .src.rbf import RBF
|
||||
from .src.linear import Linear, LinearFull
|
||||
from .src.static import Bias, White, Fixed
|
||||
from .src.static import Bias, White, Fixed, WhiteHeteroscedastic
|
||||
from .src.brownian import Brownian
|
||||
from .src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
||||
from .src.mlp import MLP
|
||||
|
|
@ -28,4 +28,4 @@ from .src.trunclinear import TruncLinear,TruncLinear_inf
|
|||
from .src.splitKern import SplitKern,DEtime
|
||||
from .src.splitKern import DEtime as DiffGenomeKern
|
||||
from .src.spline import Spline
|
||||
from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
|
||||
from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
|
||||
|
|
|
|||
|
|
@ -162,4 +162,4 @@ class ODE_t(Kern):
|
|||
|
||||
self.lengthscale_Yt.gradient = np.sum(dkYdlent*(-0.5*self.lengthscale_Yt**(-2)) * dL_dK)
|
||||
|
||||
self.ubias.gradient = np.sum(dkdubias * dL_dK)
|
||||
self.ubias.gradient = np.sum(dkdubias * dL_dK)
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
from . import psi_comp
|
||||
from . import psi_comp
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ class Add(CombinationKernel):
|
|||
if isinstance(kern, Add):
|
||||
del subkerns[i]
|
||||
for part in kern.parts[::-1]:
|
||||
kern.unlink_parameter(part)
|
||||
subkerns.insert(i, part)
|
||||
#kern.unlink_parameter(part)
|
||||
subkerns.insert(i, part.copy())
|
||||
super(Add, self).__init__(subkerns, name)
|
||||
self._exact_psicomp = self._check_exact_psicomp()
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ class Add(CombinationKernel):
|
|||
else:
|
||||
return False
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||
def K(self, X, X2=None, which_parts=None):
|
||||
"""
|
||||
Add all kernels together.
|
||||
|
|
@ -51,7 +51,7 @@ class Add(CombinationKernel):
|
|||
which_parts = [which_parts]
|
||||
return reduce(np.add, (p.K(X, X2) for p in which_parts))
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||
def Kdiag(self, X, which_parts=None):
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
|
|
@ -98,17 +98,17 @@ class Add(CombinationKernel):
|
|||
[target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts]
|
||||
return target
|
||||
|
||||
@Cache_this(limit=1, force_kwargs=['which_parts'])
|
||||
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||
def psi0(self, Z, variational_posterior):
|
||||
if not self._exact_psicomp: return Kern.psi0(self,Z,variational_posterior)
|
||||
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
|
||||
|
||||
@Cache_this(limit=1, force_kwargs=['which_parts'])
|
||||
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||
def psi1(self, Z, variational_posterior):
|
||||
if not self._exact_psicomp: return Kern.psi1(self,Z,variational_posterior)
|
||||
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
|
||||
|
||||
@Cache_this(limit=1, force_kwargs=['which_parts'])
|
||||
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||
def psi2(self, Z, variational_posterior):
|
||||
if not self._exact_psicomp: return Kern.psi2(self,Z,variational_posterior)
|
||||
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
|
||||
|
|
@ -144,7 +144,7 @@ class Add(CombinationKernel):
|
|||
raise NotImplementedError("psi2 cannot be computed for this kernel")
|
||||
return psi2
|
||||
|
||||
@Cache_this(limit=1, force_kwargs=['which_parts'])
|
||||
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
if not self._exact_psicomp: return Kern.psi2n(self, Z, variational_posterior)
|
||||
psi2 = reduce(np.add, (p.psi2n(Z, variational_posterior) for p in self.parts))
|
||||
|
|
@ -241,16 +241,20 @@ class Add(CombinationKernel):
|
|||
[np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))]
|
||||
return target_grads
|
||||
|
||||
def add(self, other):
|
||||
if isinstance(other, Add):
|
||||
other_params = other.parameters[:]
|
||||
for p in other_params:
|
||||
other.unlink_parameter(p)
|
||||
self.link_parameters(*other_params)
|
||||
else:
|
||||
self.link_parameter(other)
|
||||
self.input_dim, self._all_dims_active = self.get_input_dim_active_dims(self.parts)
|
||||
return self
|
||||
#def add(self, other):
|
||||
# parts = self.parts
|
||||
# if 0:#isinstance(other, Add):
|
||||
# #other_params = other.parameters[:]
|
||||
# for p in other.parts[:]:
|
||||
# other.unlink_parameter(p)
|
||||
# parts.extend(other.parts)
|
||||
# #self.link_parameters(*other_params)
|
||||
#
|
||||
# else:
|
||||
# #self.link_parameter(other)
|
||||
# parts.append(other)
|
||||
# #self.input_dim, self._all_dims_active = self.get_input_dim_active_dims(parts)
|
||||
# return Add([p for p in parts], self.name)
|
||||
|
||||
def input_sensitivity(self, summarize=True):
|
||||
if summarize:
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ class EQ_ODE2(Kern):
|
|||
self.W = Param('W', W)
|
||||
self.link_parameters(self.lengthscale, self.C, self.B, self.W)
|
||||
|
||||
@Cache_this(limit=2)
|
||||
@Cache_this(limit=3)
|
||||
def K(self, X, X2=None):
|
||||
#This way is not working, indexes are lost after using k._slice_X
|
||||
#index = np.asarray(X, dtype=np.int)
|
||||
|
|
|
|||
|
|
@ -48,11 +48,12 @@ class Kern(Parameterized):
|
|||
|
||||
if active_dims is None:
|
||||
active_dims = np.arange(input_dim)
|
||||
|
||||
self.active_dims = active_dims
|
||||
self._all_dims_active = np.atleast_1d(active_dims).astype(int)
|
||||
|
||||
assert self._all_dims_active.size == self.input_dim, "input_dim={} does not match len(active_dim)={}, _all_dims_active={}".format(self.input_dim, self._all_dims_active.size, self._all_dims_active)
|
||||
|
||||
self.active_dims = np.asarray(active_dims, np.int_)
|
||||
|
||||
self._all_dims_active = np.atleast_1d(self.active_dims).astype(int)
|
||||
|
||||
assert self.active_dims.size == self.input_dim, "input_dim={} does not match len(active_dim)={}".format(self.input_dim, self._all_dims_active.size)
|
||||
|
||||
self._sliced_X = 0
|
||||
self.useGPU = self._support_GPU and useGPU
|
||||
|
|
@ -68,9 +69,12 @@ class Kern(Parameterized):
|
|||
def _effective_input_dim(self):
|
||||
return np.size(self._all_dims_active)
|
||||
|
||||
@Cache_this(limit=20)
|
||||
@Cache_this(limit=3)
|
||||
def _slice_X(self, X):
|
||||
return X[:, self._all_dims_active]
|
||||
try:
|
||||
return X[:, self._all_dims_active].astype('float')
|
||||
except:
|
||||
return X[:, self._all_dims_active]
|
||||
|
||||
def K(self, X, X2):
|
||||
"""
|
||||
|
|
@ -319,10 +323,20 @@ class CombinationKernel(Kern):
|
|||
:param array-like extra_dims: if needed extra dimensions for the combination kernel to work on
|
||||
"""
|
||||
assert all([isinstance(k, Kern) for k in kernels])
|
||||
extra_dims = np.array(extra_dims, dtype=int)
|
||||
input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims)
|
||||
extra_dims = np.asarray(extra_dims, dtype=int)
|
||||
|
||||
active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
|
||||
|
||||
input_dim = active_dims.size
|
||||
if extra_dims is not None:
|
||||
input_dim += extra_dims.size
|
||||
|
||||
# initialize the kernel with the full input_dim
|
||||
super(CombinationKernel, self).__init__(input_dim, active_dims, name)
|
||||
|
||||
effective_input_dim = reduce(max, (k._all_dims_active.max() for k in kernels)) + 1
|
||||
self._all_dims_active = np.array(np.concatenate((np.arange(effective_input_dim), extra_dims if extra_dims is not None else [])), dtype=int)
|
||||
|
||||
self.extra_dims = extra_dims
|
||||
self.link_parameters(*kernels)
|
||||
|
||||
|
|
@ -330,16 +344,8 @@ class CombinationKernel(Kern):
|
|||
def parts(self):
|
||||
return self.parameters
|
||||
|
||||
def get_input_dim_active_dims(self, kernels, extra_dims = None):
|
||||
self.active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
|
||||
#_all_dims_active = np.array(np.concatenate((_all_dims_active, extra_dims if extra_dims is not None else [])), dtype=int)
|
||||
input_dim = reduce(max, (k._all_dims_active.max() for k in kernels)) + 1
|
||||
|
||||
if extra_dims is not None:
|
||||
input_dim += extra_dims.size
|
||||
|
||||
_all_dims_active = np.arange(input_dim)
|
||||
return input_dim, _all_dims_active
|
||||
def _set_all_dims_ative(self):
|
||||
self._all_dims_active = np.atleast_1d(self.active_dims).astype(int)
|
||||
|
||||
def input_sensitivity(self, summarize=True):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ class Linear(Kern):
|
|||
self.link_parameter(self.variances)
|
||||
self.psicomp = PSICOMP_Linear()
|
||||
|
||||
@Cache_this(limit=2)
|
||||
@Cache_this(limit=3)
|
||||
def K(self, X, X2=None):
|
||||
if self.ARD:
|
||||
if X2 is None:
|
||||
|
|
@ -62,7 +62,7 @@ class Linear(Kern):
|
|||
else:
|
||||
return self._dot_product(X, X2) * self.variances
|
||||
|
||||
@Cache_this(limit=1, ignore_args=(0,))
|
||||
@Cache_this(limit=3, ignore_args=(0,))
|
||||
def _dot_product(self, X, X2=None):
|
||||
if X2 is None:
|
||||
return tdot(X)
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ class MLP(Kern):
|
|||
self.link_parameters(self.variance, self.weight_variance, self.bias_variance)
|
||||
|
||||
|
||||
@Cache_this(limit=20, ignore_args=())
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def K(self, X, X2=None):
|
||||
if X2 is None:
|
||||
X_denom = np.sqrt(self._comp_prod(X)+1.)
|
||||
|
|
@ -57,7 +57,7 @@ class MLP(Kern):
|
|||
XTX = self._comp_prod(X,X2)/X_denom[:,None]/X2_denom[None,:]
|
||||
return self.variance*four_over_tau*np.arcsin(XTX)
|
||||
|
||||
@Cache_this(limit=20, ignore_args=())
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def Kdiag(self, X):
|
||||
"""Compute the diagonal of the covariance matrix for X."""
|
||||
X_prod = self._comp_prod(X)
|
||||
|
|
@ -88,14 +88,14 @@ class MLP(Kern):
|
|||
"""Gradient of diagonal of covariance with respect to X"""
|
||||
return self._comp_grads_diag(dL_dKdiag, X)[3]
|
||||
|
||||
@Cache_this(limit=50, ignore_args=())
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def _comp_prod(self, X, X2=None):
|
||||
if X2 is None:
|
||||
return (np.square(X)*self.weight_variance).sum(axis=1)+self.bias_variance
|
||||
else:
|
||||
return (X*self.weight_variance).dot(X2.T)+self.bias_variance
|
||||
|
||||
@Cache_this(limit=20, ignore_args=(1,))
|
||||
@Cache_this(limit=3, ignore_args=(1,))
|
||||
def _comp_grads(self, dL_dK, X, X2=None):
|
||||
var,w,b = self.variance, self.weight_variance, self.bias_variance
|
||||
K = self.K(X, X2)
|
||||
|
|
@ -130,7 +130,7 @@ class MLP(Kern):
|
|||
dX2 = common.T.dot(X)*w-((common*XTX).sum(axis=0)/(X2_prod+1.))[:,None]*X2*w
|
||||
return dvar, dw, db, dX, dX2
|
||||
|
||||
@Cache_this(limit=20, ignore_args=(1,))
|
||||
@Cache_this(limit=3, ignore_args=(1,))
|
||||
def _comp_grads_diag(self, dL_dKdiag, X):
|
||||
var,w,b = self.variance, self.weight_variance, self.bias_variance
|
||||
K = self.Kdiag(X)
|
||||
|
|
|
|||
|
|
@ -5,32 +5,49 @@ import numpy as np
|
|||
from .kern import Kern
|
||||
from ...core.parameterization import Param
|
||||
from paramz.transformations import Logexp
|
||||
from paramz.caching import Cache_this
|
||||
|
||||
class Poly(Kern):
|
||||
"""
|
||||
Polynomial kernel
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., order=3., active_dims=None, name='poly'):
|
||||
def __init__(self, input_dim, variance=1., scale=1., bias=1., order=3., active_dims=None, name='poly'):
|
||||
super(Poly, self).__init__(input_dim, active_dims, name)
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
self.link_parameter(self.variance)
|
||||
self.scale = Param('scale', scale, Logexp())
|
||||
self.bias = Param('bias', bias, Logexp())
|
||||
|
||||
self.link_parameters(self.variance, self.scale, self.bias)
|
||||
assert order >= 1, 'The order of the polynomial has to be at least 1.'
|
||||
self.order=order
|
||||
|
||||
def K(self, X, X2=None):
|
||||
return (self._dot_product(X, X2) + 1.)**self.order * self.variance
|
||||
|
||||
def _dot_product(self, X, X2=None):
|
||||
def K(self, X, X2=None):
|
||||
_, _, B = self._AB(X, X2)
|
||||
return B * self.variance
|
||||
|
||||
@Cache_this(limit=3)
|
||||
def _AB(self, X, X2=None):
|
||||
if X2 is None:
|
||||
return np.dot(X, X.T)
|
||||
dot_prod = np.dot(X, X.T)
|
||||
else:
|
||||
return np.dot(X, X2.T)
|
||||
dot_prod = np.dot(X, X2.T)
|
||||
A = (self.scale * dot_prod) + self.bias
|
||||
B = A ** self.order
|
||||
return dot_prod, A, B
|
||||
|
||||
def Kdiag(self, X):
|
||||
return self.variance*(np.square(X).sum(1) + 1.)**self.order
|
||||
return self.K(X).diagonal()#self.variance*(np.square(X).sum(1) + 1.)**self.order
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
self.variance.gradient = np.sum(dL_dK * (self._dot_product(X, X2) + 1.)**self.order)
|
||||
dot_prod, A, B = self._AB(X, X2)
|
||||
dK_dA = self.variance * self.order * A ** (self.order-1.)
|
||||
dL_dA = dL_dK * (dK_dA)
|
||||
self.scale.gradient = (dL_dA * dot_prod).sum()
|
||||
self.bias.gradient = dL_dA.sum()
|
||||
self.variance.gradient = np.sum(dL_dK * B)
|
||||
#import ipdb;ipdb.set_trace()
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
raise NotImplementedError
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ class Prod(CombinationKernel):
|
|||
kernels.insert(i, part)
|
||||
super(Prod, self).__init__(kernels, name)
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||
def K(self, X, X2=None, which_parts=None):
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
|
|
@ -48,7 +48,7 @@ class Prod(CombinationKernel):
|
|||
which_parts = [which_parts]
|
||||
return reduce(np.multiply, (p.K(X, X2) for p in which_parts))
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||
def Kdiag(self, X, which_parts=None):
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from .gaussherm import PSICOMP_GH
|
|||
from . import rbf_psi_comp, linear_psi_comp, ssrbf_psi_comp, sslinear_psi_comp
|
||||
|
||||
class PSICOMP_RBF(PSICOMP):
|
||||
@Cache_this(limit=10, ignore_args=(0,))
|
||||
@Cache_this(limit=3, ignore_args=(0,))
|
||||
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
|
||||
variance, lengthscale = kern.variance, kern.lengthscale
|
||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||
|
|
@ -31,7 +31,7 @@ class PSICOMP_RBF(PSICOMP):
|
|||
else:
|
||||
raise ValueError("unknown distriubtion received for psi-statistics")
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0,2,3,4))
|
||||
@Cache_this(limit=3, ignore_args=(0,2,3,4))
|
||||
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
variance, lengthscale = kern.variance, kern.lengthscale
|
||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||
|
|
@ -43,7 +43,7 @@ class PSICOMP_RBF(PSICOMP):
|
|||
|
||||
class PSICOMP_Linear(PSICOMP):
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0,))
|
||||
@Cache_this(limit=3, ignore_args=(0,))
|
||||
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
|
||||
variances = kern.variances
|
||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||
|
|
@ -53,7 +53,7 @@ class PSICOMP_Linear(PSICOMP):
|
|||
else:
|
||||
raise ValueError("unknown distriubtion received for psi-statistics")
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0,2,3,4))
|
||||
@Cache_this(limit=3, ignore_args=(0,2,3,4))
|
||||
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
variances = kern.variances
|
||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class PSICOMP_GH(PSICOMP):
|
|||
def _setup_observers(self):
|
||||
pass
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0,))
|
||||
@Cache_this(limit=3, ignore_args=(0,))
|
||||
def comp_K(self, Z, qX):
|
||||
if self.Xs is None or self.Xs.shape != qX.mean.shape:
|
||||
from paramz import ObsAr
|
||||
|
|
@ -38,7 +38,7 @@ class PSICOMP_GH(PSICOMP):
|
|||
self.Xs[i] = self.locs[i]*S_sq+mu
|
||||
return self.Xs
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0,))
|
||||
@Cache_this(limit=3, ignore_args=(0,))
|
||||
def psicomputations(self, kern, Z, qX, return_psi2_n=False):
|
||||
mu, S = qX.mean.values, qX.variance.values
|
||||
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||
|
|
@ -62,7 +62,7 @@ class PSICOMP_GH(PSICOMP):
|
|||
psi2 += self.weights[i]* tdot(Kfu.T)
|
||||
return psi0, psi1, psi2
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0, 2,3,4))
|
||||
@Cache_this(limit=3, ignore_args=(0, 2,3,4))
|
||||
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, qX):
|
||||
mu, S = qX.mean.values, qX.variance.values
|
||||
if self.cache_K: Xs = self.comp_K(Z, qX)
|
||||
|
|
|
|||
|
|
@ -132,5 +132,5 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
|
|||
|
||||
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
||||
|
||||
_psi1computations = Cacher(__psi1computations, limit=5)
|
||||
_psi2computations = Cacher(__psi2computations, limit=5)
|
||||
_psi1computations = Cacher(__psi1computations, limit=3)
|
||||
_psi2computations = Cacher(__psi2computations, limit=3)
|
||||
|
|
|
|||
|
|
@ -324,7 +324,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
|
|||
except:
|
||||
return self.fall_back.psicomputations(kern, Z, variational_posterior, return_psi2_n)
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0,))
|
||||
@Cache_this(limit=3, ignore_args=(0,))
|
||||
def _psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
|
||||
"""
|
||||
Z - MxQ
|
||||
|
|
@ -369,7 +369,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
|
|||
except:
|
||||
return self.fall_back.psiDerivativecomputations(kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0,2,3,4))
|
||||
@Cache_this(limit=3, ignore_args=(0,2,3,4))
|
||||
def _psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
# resolve the requirement of dL_dpsi2 to be symmetric
|
||||
if len(dL_dpsi2.shape)==2: dL_dpsi2 = (dL_dpsi2+dL_dpsi2.T)/2
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ try:
|
|||
return psi0,psi1,psi2,psi2n
|
||||
|
||||
from GPy.util.caching import Cacher
|
||||
psicomputations = Cacher(_psicomputations, limit=1)
|
||||
psicomputations = Cacher(_psicomputations, limit=3)
|
||||
|
||||
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
ARD = (len(lengthscale)!=1)
|
||||
|
|
|
|||
|
|
@ -373,7 +373,7 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
|
|||
def get_dimensions(self, Z, variational_posterior):
|
||||
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
|
||||
|
||||
@Cache_this(limit=1, ignore_args=(0,))
|
||||
@Cache_this(limit=3, ignore_args=(0,))
|
||||
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
|
||||
"""
|
||||
Z - MxQ
|
||||
|
|
@ -407,7 +407,7 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
|
|||
else:
|
||||
return psi0, psi1_gpu.get(), psi2_gpu.get()
|
||||
|
||||
@Cache_this(limit=1, ignore_args=(0,2,3,4))
|
||||
@Cache_this(limit=3, ignore_args=(0,2,3,4))
|
||||
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
variance, lengthscale = kern.variance, kern.lengthscale
|
||||
from ....util.linalg_gpu import sum_axis
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (c) 2014, GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2015, Alex Grigorevskiy
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
"""
|
||||
The standard periodic kernel which mentioned in:
|
||||
|
|
@ -9,7 +8,7 @@ The standard periodic kernel which mentioned in:
|
|||
The MIT Press, 2005.
|
||||
|
||||
|
||||
[2] Introduction to Gaussian processes. D. J. C. MacKay. In C. M. Bishop, editor,
|
||||
[2] Introduction to Gaussian processes. D. J. C. MacKay. In C. M. Bishop, editor,
|
||||
Neural Networks and Machine Learning, pages 133-165. Springer, 1998.
|
||||
"""
|
||||
|
||||
|
|
@ -25,56 +24,56 @@ class StdPeriodic(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
|
||||
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
|
||||
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} \sum_{i=1}^{input\_dim}
|
||||
\left( \frac{\sin(\frac{\pi}{T_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\theta_1` in the formula above
|
||||
:type variance: float
|
||||
:param wavelength: the vector of wavelengths :math:`\lambda_i`. If None then 1.0 is assumed.
|
||||
:type wavelength: array or list of the appropriate size (or float if there is only one wavelength parameter)
|
||||
:param period: the vector of periods :math:`\T_i`. If None then 1.0 is assumed.
|
||||
:type period: array or list of the appropriate size (or float if there is only one period parameter)
|
||||
:param lengthscale: the vector of lengthscale :math:`\l_i`. If None then 1.0 is assumed.
|
||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||
:param ARD1: Auto Relevance Determination with respect to wavelength.
|
||||
If equal to "False" one single wavelength parameter :math:`\lambda_i` for
|
||||
each dimension is assumed, otherwise there is one lengthscale
|
||||
:param ARD1: Auto Relevance Determination with respect to period.
|
||||
If equal to "False" one single period parameter :math:`\T_i` for
|
||||
each dimension is assumed, otherwise there is one lengthscale
|
||||
parameter per dimension.
|
||||
:type ARD1: Boolean
|
||||
:param ARD2: Auto Relevance Determination with respect to lengthscale.
|
||||
If equal to "False" one single wavelength parameter :math:`l_i` for
|
||||
each dimension is assumed, otherwise there is one lengthscale
|
||||
:param ARD2: Auto Relevance Determination with respect to lengthscale.
|
||||
If equal to "False" one single lengthscale parameter :math:`l_i` for
|
||||
each dimension is assumed, otherwise there is one lengthscale
|
||||
parameter per dimension.
|
||||
:type ARD2: Boolean
|
||||
:param active_dims: indices of dimensions which are used in the computation of the kernel
|
||||
:type wavelength: array or list of the appropriate size
|
||||
:type active_dims: array or list of the appropriate size
|
||||
:param name: Name of the kernel for output
|
||||
:type String
|
||||
:param useGPU: whether of not use GPU
|
||||
:type Boolean
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., wavelength=None, lengthscale=None, ARD1=False, ARD2=False, active_dims=None, name='std_periodic',useGPU=False):
|
||||
|
||||
def __init__(self, input_dim, variance=1., period=None, lengthscale=None, ARD1=False, ARD2=False, active_dims=None, name='std_periodic',useGPU=False):
|
||||
super(StdPeriodic, self).__init__(input_dim, active_dims, name, useGPU=useGPU)
|
||||
self.input_dim = input_dim
|
||||
self.ARD1 = ARD1 # correspond to wavelengths
|
||||
self.ARD1 = ARD1 # correspond to periods
|
||||
self.ARD2 = ARD2 # correspond to lengthscales
|
||||
|
||||
|
||||
self.name = name
|
||||
|
||||
|
||||
if self.ARD1 == False:
|
||||
if wavelength is not None:
|
||||
wavelength = np.asarray(wavelength)
|
||||
assert wavelength.size == 1, "Only one wavelength needed for non-ARD kernel"
|
||||
if period is not None:
|
||||
period = np.asarray(period)
|
||||
assert period.size == 1, "Only one period needed for non-ARD kernel"
|
||||
else:
|
||||
wavelength = np.ones(1)
|
||||
period = np.ones(1.0)
|
||||
else:
|
||||
if wavelength is not None:
|
||||
wavelength = np.asarray(wavelength)
|
||||
assert wavelength.size == input_dim, "bad number of wavelengths"
|
||||
if period is not None:
|
||||
period = np.asarray(period)
|
||||
assert period.size == input_dim, "bad number of periods"
|
||||
else:
|
||||
wavelength = np.ones(input_dim)
|
||||
|
||||
period = np.ones(input_dim)
|
||||
|
||||
if self.ARD2 == False:
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
|
|
@ -87,33 +86,33 @@ class StdPeriodic(Kern):
|
|||
assert lengthscale.size == input_dim, "bad number of lengthscales"
|
||||
else:
|
||||
lengthscale = np.ones(input_dim)
|
||||
|
||||
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
assert self.variance.size==1, "Variance size must be one"
|
||||
self.wavelengths = Param('wavelengths', wavelength, Logexp())
|
||||
self.lengthscales = Param('lengthscales', lengthscale, Logexp())
|
||||
|
||||
self.link_parameters(self.variance, self.wavelengths, self.lengthscales)
|
||||
self.period = Param('period', period, Logexp())
|
||||
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
|
||||
|
||||
self.link_parameters(self.variance, self.period, self.lengthscale)
|
||||
|
||||
def parameters_changed(self):
|
||||
"""
|
||||
This functions deals as a callback for each optimization iteration.
|
||||
This functions deals as a callback for each optimization iteration.
|
||||
If one optimization step was successfull and the parameters
|
||||
this callback function will be called to be able to update any
|
||||
this callback function will be called to be able to update any
|
||||
precomputations for the kernel.
|
||||
"""
|
||||
|
||||
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
def K(self, X, X2=None):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
if X2 is None:
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
|
||||
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths
|
||||
exp_dist = np.exp( -0.5* np.sum( np.square( np.sin( base ) / self.lengthscales ), axis = -1 ) )
|
||||
|
||||
|
||||
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.period
|
||||
exp_dist = np.exp( -0.5* np.sum( np.square( np.sin( base ) / self.lengthscale ), axis = -1 ) )
|
||||
|
||||
return self.variance * exp_dist
|
||||
|
||||
|
||||
|
|
@ -125,42 +124,42 @@ class StdPeriodic(Kern):
|
|||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None:
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
|
||||
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths
|
||||
|
||||
sin_base = np.sin( base )
|
||||
exp_dist = np.exp( -0.5* np.sum( np.square( sin_base / self.lengthscales ), axis = -1 ) )
|
||||
|
||||
dwl = self.variance * (1.0/np.square(self.lengthscales)) * sin_base*np.cos(base) * (base / self.wavelengths)
|
||||
|
||||
dl = self.variance * np.square( sin_base) / np.power( self.lengthscales, 3)
|
||||
|
||||
self.variance.gradient = np.sum(exp_dist * dL_dK)
|
||||
#target[0] += np.sum( exp_dist * dL_dK)
|
||||
|
||||
if self.ARD1: # different wavelengths
|
||||
self.wavelengths.gradient = (dwl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
|
||||
else: # same wavelengths
|
||||
self.wavelengths.gradient = np.sum(dwl.sum(-1) * exp_dist * dL_dK)
|
||||
|
||||
|
||||
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.period
|
||||
|
||||
sin_base = np.sin( base )
|
||||
exp_dist = np.exp( -0.5* np.sum( np.square( sin_base / self.lengthscale ), axis = -1 ) )
|
||||
|
||||
dwl = self.variance * (1.0/np.square(self.lengthscale)) * sin_base*np.cos(base) * (base / self.period)
|
||||
|
||||
dl = self.variance * np.square( sin_base) / np.power( self.lengthscale, 3)
|
||||
|
||||
self.variance.gradient = np.sum(exp_dist * dL_dK)
|
||||
#target[0] += np.sum( exp_dist * dL_dK)
|
||||
|
||||
if self.ARD1: # different periods
|
||||
self.period.gradient = (dwl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
|
||||
else: # same period
|
||||
self.period.gradient = np.sum(dwl.sum(-1) * exp_dist * dL_dK)
|
||||
|
||||
if self.ARD2: # different lengthscales
|
||||
self.lengthscales.gradient = (dl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
|
||||
self.lengthscale.gradient = (dl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
|
||||
else: # same lengthscales
|
||||
self.lengthscales.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK)
|
||||
|
||||
self.lengthscale.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK)
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
||||
self.variance.gradient = np.sum(dL_dKdiag)
|
||||
self.wavelengths.gradient = 0
|
||||
self.lengthscales.gradient = 0
|
||||
self.period.gradient = 0
|
||||
self.lengthscale.gradient = 0
|
||||
|
||||
# def gradients_X(self, dL_dK, X, X2=None):
|
||||
# """derivative of the covariance matrix with respect to X."""
|
||||
#
|
||||
#
|
||||
# raise NotImplemented("Periodic kernel: dK_dX not implemented")
|
||||
#
|
||||
# def gradients_X_diag(self, dL_dKdiag, X):
|
||||
#
|
||||
#
|
||||
# raise NotImplemented("Periodic kernel: dKdiag_dX not implemented")
|
||||
|
|
|
|||
|
|
@ -81,6 +81,52 @@ class White(Static):
|
|||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
self.variance.gradient = dL_dpsi0.sum()
|
||||
|
||||
class WhiteHeteroscedastic(Static):
|
||||
def __init__(self, input_dim, num_data, variance=1., active_dims=None, name='white_hetero'):
|
||||
"""
|
||||
A heteroscedastic White kernel (nugget/noise).
|
||||
It defines one variance (nugget) per input sample.
|
||||
|
||||
Prediction excludes any noise learnt by this Kernel, so be careful using this kernel.
|
||||
|
||||
You can plot the errors learnt by this kernel by something similar as:
|
||||
plt.errorbar(m.X, m.Y, yerr=2*np.sqrt(m.kern.white.variance))
|
||||
"""
|
||||
super(Static, self).__init__(input_dim, active_dims, name)
|
||||
self.variance = Param('variance', np.ones(num_data) * variance, Logexp())
|
||||
self.link_parameters(self.variance)
|
||||
|
||||
def Kdiag(self, X):
|
||||
if X.shape[0] == self.variance.shape[0]:
|
||||
# If the input has the same number of samples as
|
||||
# the number of variances, we return the variances
|
||||
return self.variance
|
||||
return 0.
|
||||
|
||||
def K(self, X, X2=None):
|
||||
if X2 is None and X.shape[0] == self.variance.shape[0]:
|
||||
return np.eye(X.shape[0]) * self.variance
|
||||
else:
|
||||
return 0.
|
||||
|
||||
def psi2(self, Z, variational_posterior):
|
||||
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
return np.zeros((1, Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
if X2 is None:
|
||||
self.variance.gradient = np.diagonal(dL_dK)
|
||||
else:
|
||||
self.variance.gradient = 0.
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
self.variance.gradient = dL_dKdiag
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
self.variance.gradient = dL_dpsi0
|
||||
|
||||
class Bias(Static):
|
||||
def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
|
||||
super(Bias, self).__init__(input_dim, variance, active_dims, name)
|
||||
|
|
|
|||
|
|
@ -81,11 +81,11 @@ class Stationary(Kern):
|
|||
def dK_dr(self, r):
|
||||
raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
|
||||
|
||||
@Cache_this(limit=20, ignore_args=())
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def dK2_drdr(self, r):
|
||||
raise NotImplementedError("implement second derivative of covariance wrt r to use this method")
|
||||
|
||||
@Cache_this(limit=5, ignore_args=())
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def K(self, X, X2=None):
|
||||
"""
|
||||
Kernel function applied on inputs X and X2.
|
||||
|
|
@ -99,6 +99,9 @@ class Stationary(Kern):
|
|||
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def dK_dr_via_X(self, X, X2):
|
||||
"""
|
||||
compute the derivative of K wrt X going through X
|
||||
"""
|
||||
#a convenience function, so we can cache dK_dr
|
||||
return self.dK_dr(self._scaled_dist(X, X2))
|
||||
|
||||
|
|
|
|||
|
|
@ -54,12 +54,12 @@ class TruncLinear(Kern):
|
|||
self.add_parameter(self.variances)
|
||||
self.add_parameter(self.delta)
|
||||
|
||||
@Cache_this(limit=2)
|
||||
@Cache_this(limit=3)
|
||||
def K(self, X, X2=None):
|
||||
XX = self.variances*self._product(X, X2)
|
||||
return XX.sum(axis=-1)
|
||||
|
||||
@Cache_this(limit=2)
|
||||
@Cache_this(limit=3)
|
||||
def _product(self, X, X2=None):
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
|
|
@ -149,12 +149,12 @@ class TruncLinear_inf(Kern):
|
|||
self.add_parameter(self.variances)
|
||||
|
||||
|
||||
# @Cache_this(limit=2)
|
||||
# @Cache_this(limit=3)
|
||||
def K(self, X, X2=None):
|
||||
tmp = self._product(X, X2)
|
||||
return (self.variances*tmp).sum(axis=-1)
|
||||
|
||||
# @Cache_this(limit=2)
|
||||
# @Cache_this(limit=3)
|
||||
def _product(self, X, X2=None):
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue