merge with upstream

This commit is contained in:
Zhenwen Dai 2016-03-10 18:17:35 +00:00
commit ba74e29aee
115 changed files with 1178 additions and 531 deletions

View file

@ -10,7 +10,7 @@ from .src.add import Add
from .src.prod import Prod
from .src.rbf import RBF
from .src.linear import Linear, LinearFull
from .src.static import Bias, White, Fixed
from .src.static import Bias, White, Fixed, WhiteHeteroscedastic
from .src.brownian import Brownian
from .src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from .src.mlp import MLP
@ -28,4 +28,4 @@ from .src.trunclinear import TruncLinear,TruncLinear_inf
from .src.splitKern import SplitKern,DEtime
from .src.splitKern import DEtime as DiffGenomeKern
from .src.spline import Spline
from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel

View file

@ -162,4 +162,4 @@ class ODE_t(Kern):
self.lengthscale_Yt.gradient = np.sum(dkYdlent*(-0.5*self.lengthscale_Yt**(-2)) * dL_dK)
self.ubias.gradient = np.sum(dkdubias * dL_dK)
self.ubias.gradient = np.sum(dkdubias * dL_dK)

View file

@ -1 +1 @@
from . import psi_comp
from . import psi_comp

View file

@ -19,8 +19,8 @@ class Add(CombinationKernel):
if isinstance(kern, Add):
del subkerns[i]
for part in kern.parts[::-1]:
kern.unlink_parameter(part)
subkerns.insert(i, part)
#kern.unlink_parameter(part)
subkerns.insert(i, part.copy())
super(Add, self).__init__(subkerns, name)
self._exact_psicomp = self._check_exact_psicomp()
@ -37,7 +37,7 @@ class Add(CombinationKernel):
else:
return False
@Cache_this(limit=2, force_kwargs=['which_parts'])
@Cache_this(limit=3, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None):
"""
Add all kernels together.
@ -51,7 +51,7 @@ class Add(CombinationKernel):
which_parts = [which_parts]
return reduce(np.add, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
@Cache_this(limit=3, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
if which_parts is None:
which_parts = self.parts
@ -98,17 +98,17 @@ class Add(CombinationKernel):
[target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts]
return target
@Cache_this(limit=1, force_kwargs=['which_parts'])
@Cache_this(limit=3, force_kwargs=['which_parts'])
def psi0(self, Z, variational_posterior):
if not self._exact_psicomp: return Kern.psi0(self,Z,variational_posterior)
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=1, force_kwargs=['which_parts'])
@Cache_this(limit=3, force_kwargs=['which_parts'])
def psi1(self, Z, variational_posterior):
if not self._exact_psicomp: return Kern.psi1(self,Z,variational_posterior)
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=1, force_kwargs=['which_parts'])
@Cache_this(limit=3, force_kwargs=['which_parts'])
def psi2(self, Z, variational_posterior):
if not self._exact_psicomp: return Kern.psi2(self,Z,variational_posterior)
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
@ -144,7 +144,7 @@ class Add(CombinationKernel):
raise NotImplementedError("psi2 cannot be computed for this kernel")
return psi2
@Cache_this(limit=1, force_kwargs=['which_parts'])
@Cache_this(limit=3, force_kwargs=['which_parts'])
def psi2n(self, Z, variational_posterior):
if not self._exact_psicomp: return Kern.psi2n(self, Z, variational_posterior)
psi2 = reduce(np.add, (p.psi2n(Z, variational_posterior) for p in self.parts))
@ -241,16 +241,20 @@ class Add(CombinationKernel):
[np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))]
return target_grads
def add(self, other):
if isinstance(other, Add):
other_params = other.parameters[:]
for p in other_params:
other.unlink_parameter(p)
self.link_parameters(*other_params)
else:
self.link_parameter(other)
self.input_dim, self._all_dims_active = self.get_input_dim_active_dims(self.parts)
return self
#def add(self, other):
# parts = self.parts
# if 0:#isinstance(other, Add):
# #other_params = other.parameters[:]
# for p in other.parts[:]:
# other.unlink_parameter(p)
# parts.extend(other.parts)
# #self.link_parameters(*other_params)
#
# else:
# #self.link_parameter(other)
# parts.append(other)
# #self.input_dim, self._all_dims_active = self.get_input_dim_active_dims(parts)
# return Add([p for p in parts], self.name)
def input_sensitivity(self, summarize=True):
if summarize:

View file

@ -64,7 +64,7 @@ class EQ_ODE2(Kern):
self.W = Param('W', W)
self.link_parameters(self.lengthscale, self.C, self.B, self.W)
@Cache_this(limit=2)
@Cache_this(limit=3)
def K(self, X, X2=None):
#This way is not working, indexes are lost after using k._slice_X
#index = np.asarray(X, dtype=np.int)

View file

@ -48,11 +48,12 @@ class Kern(Parameterized):
if active_dims is None:
active_dims = np.arange(input_dim)
self.active_dims = active_dims
self._all_dims_active = np.atleast_1d(active_dims).astype(int)
assert self._all_dims_active.size == self.input_dim, "input_dim={} does not match len(active_dim)={}, _all_dims_active={}".format(self.input_dim, self._all_dims_active.size, self._all_dims_active)
self.active_dims = np.asarray(active_dims, np.int_)
self._all_dims_active = np.atleast_1d(self.active_dims).astype(int)
assert self.active_dims.size == self.input_dim, "input_dim={} does not match len(active_dim)={}".format(self.input_dim, self._all_dims_active.size)
self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU
@ -68,9 +69,12 @@ class Kern(Parameterized):
def _effective_input_dim(self):
return np.size(self._all_dims_active)
@Cache_this(limit=20)
@Cache_this(limit=3)
def _slice_X(self, X):
return X[:, self._all_dims_active]
try:
return X[:, self._all_dims_active].astype('float')
except:
return X[:, self._all_dims_active]
def K(self, X, X2):
"""
@ -319,10 +323,20 @@ class CombinationKernel(Kern):
:param array-like extra_dims: if needed extra dimensions for the combination kernel to work on
"""
assert all([isinstance(k, Kern) for k in kernels])
extra_dims = np.array(extra_dims, dtype=int)
input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims)
extra_dims = np.asarray(extra_dims, dtype=int)
active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
input_dim = active_dims.size
if extra_dims is not None:
input_dim += extra_dims.size
# initialize the kernel with the full input_dim
super(CombinationKernel, self).__init__(input_dim, active_dims, name)
effective_input_dim = reduce(max, (k._all_dims_active.max() for k in kernels)) + 1
self._all_dims_active = np.array(np.concatenate((np.arange(effective_input_dim), extra_dims if extra_dims is not None else [])), dtype=int)
self.extra_dims = extra_dims
self.link_parameters(*kernels)
@ -330,16 +344,8 @@ class CombinationKernel(Kern):
def parts(self):
return self.parameters
def get_input_dim_active_dims(self, kernels, extra_dims = None):
self.active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
#_all_dims_active = np.array(np.concatenate((_all_dims_active, extra_dims if extra_dims is not None else [])), dtype=int)
input_dim = reduce(max, (k._all_dims_active.max() for k in kernels)) + 1
if extra_dims is not None:
input_dim += extra_dims.size
_all_dims_active = np.arange(input_dim)
return input_dim, _all_dims_active
def _set_all_dims_ative(self):
self._all_dims_active = np.atleast_1d(self.active_dims).astype(int)
def input_sensitivity(self, summarize=True):
"""

View file

@ -51,7 +51,7 @@ class Linear(Kern):
self.link_parameter(self.variances)
self.psicomp = PSICOMP_Linear()
@Cache_this(limit=2)
@Cache_this(limit=3)
def K(self, X, X2=None):
if self.ARD:
if X2 is None:
@ -62,7 +62,7 @@ class Linear(Kern):
else:
return self._dot_product(X, X2) * self.variances
@Cache_this(limit=1, ignore_args=(0,))
@Cache_this(limit=3, ignore_args=(0,))
def _dot_product(self, X, X2=None):
if X2 is None:
return tdot(X)

View file

@ -45,7 +45,7 @@ class MLP(Kern):
self.link_parameters(self.variance, self.weight_variance, self.bias_variance)
@Cache_this(limit=20, ignore_args=())
@Cache_this(limit=3, ignore_args=())
def K(self, X, X2=None):
if X2 is None:
X_denom = np.sqrt(self._comp_prod(X)+1.)
@ -57,7 +57,7 @@ class MLP(Kern):
XTX = self._comp_prod(X,X2)/X_denom[:,None]/X2_denom[None,:]
return self.variance*four_over_tau*np.arcsin(XTX)
@Cache_this(limit=20, ignore_args=())
@Cache_this(limit=3, ignore_args=())
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix for X."""
X_prod = self._comp_prod(X)
@ -88,14 +88,14 @@ class MLP(Kern):
"""Gradient of diagonal of covariance with respect to X"""
return self._comp_grads_diag(dL_dKdiag, X)[3]
@Cache_this(limit=50, ignore_args=())
@Cache_this(limit=3, ignore_args=())
def _comp_prod(self, X, X2=None):
if X2 is None:
return (np.square(X)*self.weight_variance).sum(axis=1)+self.bias_variance
else:
return (X*self.weight_variance).dot(X2.T)+self.bias_variance
@Cache_this(limit=20, ignore_args=(1,))
@Cache_this(limit=3, ignore_args=(1,))
def _comp_grads(self, dL_dK, X, X2=None):
var,w,b = self.variance, self.weight_variance, self.bias_variance
K = self.K(X, X2)
@ -130,7 +130,7 @@ class MLP(Kern):
dX2 = common.T.dot(X)*w-((common*XTX).sum(axis=0)/(X2_prod+1.))[:,None]*X2*w
return dvar, dw, db, dX, dX2
@Cache_this(limit=20, ignore_args=(1,))
@Cache_this(limit=3, ignore_args=(1,))
def _comp_grads_diag(self, dL_dKdiag, X):
var,w,b = self.variance, self.weight_variance, self.bias_variance
K = self.Kdiag(X)

View file

@ -5,32 +5,49 @@ import numpy as np
from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
from paramz.caching import Cache_this
class Poly(Kern):
"""
Polynomial kernel
"""
def __init__(self, input_dim, variance=1., order=3., active_dims=None, name='poly'):
def __init__(self, input_dim, variance=1., scale=1., bias=1., order=3., active_dims=None, name='poly'):
super(Poly, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.link_parameter(self.variance)
self.scale = Param('scale', scale, Logexp())
self.bias = Param('bias', bias, Logexp())
self.link_parameters(self.variance, self.scale, self.bias)
assert order >= 1, 'The order of the polynomial has to be at least 1.'
self.order=order
def K(self, X, X2=None):
return (self._dot_product(X, X2) + 1.)**self.order * self.variance
def _dot_product(self, X, X2=None):
def K(self, X, X2=None):
_, _, B = self._AB(X, X2)
return B * self.variance
@Cache_this(limit=3)
def _AB(self, X, X2=None):
if X2 is None:
return np.dot(X, X.T)
dot_prod = np.dot(X, X.T)
else:
return np.dot(X, X2.T)
dot_prod = np.dot(X, X2.T)
A = (self.scale * dot_prod) + self.bias
B = A ** self.order
return dot_prod, A, B
def Kdiag(self, X):
return self.variance*(np.square(X).sum(1) + 1.)**self.order
return self.K(X).diagonal()#self.variance*(np.square(X).sum(1) + 1.)**self.order
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.sum(dL_dK * (self._dot_product(X, X2) + 1.)**self.order)
dot_prod, A, B = self._AB(X, X2)
dK_dA = self.variance * self.order * A ** (self.order-1.)
dL_dA = dL_dK * (dK_dA)
self.scale.gradient = (dL_dA * dot_prod).sum()
self.bias.gradient = dL_dA.sum()
self.variance.gradient = np.sum(dL_dK * B)
#import ipdb;ipdb.set_trace()
def update_gradients_diag(self, dL_dKdiag, X):
raise NotImplementedError

View file

@ -39,7 +39,7 @@ class Prod(CombinationKernel):
kernels.insert(i, part)
super(Prod, self).__init__(kernels, name)
@Cache_this(limit=2, force_kwargs=['which_parts'])
@Cache_this(limit=3, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None):
if which_parts is None:
which_parts = self.parts
@ -48,7 +48,7 @@ class Prod(CombinationKernel):
which_parts = [which_parts]
return reduce(np.multiply, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
@Cache_this(limit=3, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
if which_parts is None:
which_parts = self.parts

View file

@ -21,7 +21,7 @@ from .gaussherm import PSICOMP_GH
from . import rbf_psi_comp, linear_psi_comp, ssrbf_psi_comp, sslinear_psi_comp
class PSICOMP_RBF(PSICOMP):
@Cache_this(limit=10, ignore_args=(0,))
@Cache_this(limit=3, ignore_args=(0,))
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
variance, lengthscale = kern.variance, kern.lengthscale
if isinstance(variational_posterior, variational.NormalPosterior):
@ -31,7 +31,7 @@ class PSICOMP_RBF(PSICOMP):
else:
raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=10, ignore_args=(0,2,3,4))
@Cache_this(limit=3, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variance, lengthscale = kern.variance, kern.lengthscale
if isinstance(variational_posterior, variational.NormalPosterior):
@ -43,7 +43,7 @@ class PSICOMP_RBF(PSICOMP):
class PSICOMP_Linear(PSICOMP):
@Cache_this(limit=10, ignore_args=(0,))
@Cache_this(limit=3, ignore_args=(0,))
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
variances = kern.variances
if isinstance(variational_posterior, variational.NormalPosterior):
@ -53,7 +53,7 @@ class PSICOMP_Linear(PSICOMP):
else:
raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=10, ignore_args=(0,2,3,4))
@Cache_this(limit=3, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variances = kern.variances
if isinstance(variational_posterior, variational.NormalPosterior):

View file

@ -27,7 +27,7 @@ class PSICOMP_GH(PSICOMP):
def _setup_observers(self):
pass
@Cache_this(limit=10, ignore_args=(0,))
@Cache_this(limit=3, ignore_args=(0,))
def comp_K(self, Z, qX):
if self.Xs is None or self.Xs.shape != qX.mean.shape:
from paramz import ObsAr
@ -38,7 +38,7 @@ class PSICOMP_GH(PSICOMP):
self.Xs[i] = self.locs[i]*S_sq+mu
return self.Xs
@Cache_this(limit=10, ignore_args=(0,))
@Cache_this(limit=3, ignore_args=(0,))
def psicomputations(self, kern, Z, qX, return_psi2_n=False):
mu, S = qX.mean.values, qX.variance.values
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
@ -62,7 +62,7 @@ class PSICOMP_GH(PSICOMP):
psi2 += self.weights[i]* tdot(Kfu.T)
return psi0, psi1, psi2
@Cache_this(limit=10, ignore_args=(0, 2,3,4))
@Cache_this(limit=3, ignore_args=(0, 2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, qX):
mu, S = qX.mean.values, qX.variance.values
if self.cache_K: Xs = self.comp_K(Z, qX)

View file

@ -132,5 +132,5 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
_psi1computations = Cacher(__psi1computations, limit=5)
_psi2computations = Cacher(__psi2computations, limit=5)
_psi1computations = Cacher(__psi1computations, limit=3)
_psi2computations = Cacher(__psi2computations, limit=3)

View file

@ -324,7 +324,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
except:
return self.fall_back.psicomputations(kern, Z, variational_posterior, return_psi2_n)
@Cache_this(limit=10, ignore_args=(0,))
@Cache_this(limit=3, ignore_args=(0,))
def _psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
"""
Z - MxQ
@ -369,7 +369,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
except:
return self.fall_back.psiDerivativecomputations(kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
@Cache_this(limit=10, ignore_args=(0,2,3,4))
@Cache_this(limit=3, ignore_args=(0,2,3,4))
def _psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
# resolve the requirement of dL_dpsi2 to be symmetric
if len(dL_dpsi2.shape)==2: dL_dpsi2 = (dL_dpsi2+dL_dpsi2.T)/2

View file

@ -88,7 +88,7 @@ try:
return psi0,psi1,psi2,psi2n
from GPy.util.caching import Cacher
psicomputations = Cacher(_psicomputations, limit=1)
psicomputations = Cacher(_psicomputations, limit=3)
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)

View file

@ -373,7 +373,7 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
def get_dimensions(self, Z, variational_posterior):
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
@Cache_this(limit=1, ignore_args=(0,))
@Cache_this(limit=3, ignore_args=(0,))
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
"""
Z - MxQ
@ -407,7 +407,7 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
else:
return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,2,3,4))
@Cache_this(limit=3, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variance, lengthscale = kern.variance, kern.lengthscale
from ....util.linalg_gpu import sum_axis

View file

@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014, GPy authors (see AUTHORS.txt).
# Copyright (c) 2015, Alex Grigorevskiy
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The standard periodic kernel which mentioned in:
@ -9,7 +8,7 @@ The standard periodic kernel which mentioned in:
The MIT Press, 2005.
[2] Introduction to Gaussian processes. D. J. C. MacKay. In C. M. Bishop, editor,
[2] Introduction to Gaussian processes. D. J. C. MacKay. In C. M. Bishop, editor,
Neural Networks and Machine Learning, pages 133-165. Springer, 1998.
"""
@ -25,56 +24,56 @@ class StdPeriodic(Kern):
.. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} \sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{T_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\theta_1` in the formula above
:type variance: float
:param wavelength: the vector of wavelengths :math:`\lambda_i`. If None then 1.0 is assumed.
:type wavelength: array or list of the appropriate size (or float if there is only one wavelength parameter)
:param period: the vector of periods :math:`\T_i`. If None then 1.0 is assumed.
:type period: array or list of the appropriate size (or float if there is only one period parameter)
:param lengthscale: the vector of lengthscale :math:`\l_i`. If None then 1.0 is assumed.
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD1: Auto Relevance Determination with respect to wavelength.
If equal to "False" one single wavelength parameter :math:`\lambda_i` for
each dimension is assumed, otherwise there is one lengthscale
:param ARD1: Auto Relevance Determination with respect to period.
If equal to "False" one single period parameter :math:`\T_i` for
each dimension is assumed, otherwise there is one lengthscale
parameter per dimension.
:type ARD1: Boolean
:param ARD2: Auto Relevance Determination with respect to lengthscale.
If equal to "False" one single wavelength parameter :math:`l_i` for
each dimension is assumed, otherwise there is one lengthscale
:param ARD2: Auto Relevance Determination with respect to lengthscale.
If equal to "False" one single lengthscale parameter :math:`l_i` for
each dimension is assumed, otherwise there is one lengthscale
parameter per dimension.
:type ARD2: Boolean
:param active_dims: indices of dimensions which are used in the computation of the kernel
:type wavelength: array or list of the appropriate size
:type active_dims: array or list of the appropriate size
:param name: Name of the kernel for output
:type String
:param useGPU: whether of not use GPU
:type Boolean
"""
def __init__(self, input_dim, variance=1., wavelength=None, lengthscale=None, ARD1=False, ARD2=False, active_dims=None, name='std_periodic',useGPU=False):
def __init__(self, input_dim, variance=1., period=None, lengthscale=None, ARD1=False, ARD2=False, active_dims=None, name='std_periodic',useGPU=False):
super(StdPeriodic, self).__init__(input_dim, active_dims, name, useGPU=useGPU)
self.input_dim = input_dim
self.ARD1 = ARD1 # correspond to wavelengths
self.ARD1 = ARD1 # correspond to periods
self.ARD2 = ARD2 # correspond to lengthscales
self.name = name
if self.ARD1 == False:
if wavelength is not None:
wavelength = np.asarray(wavelength)
assert wavelength.size == 1, "Only one wavelength needed for non-ARD kernel"
if period is not None:
period = np.asarray(period)
assert period.size == 1, "Only one period needed for non-ARD kernel"
else:
wavelength = np.ones(1)
period = np.ones(1.0)
else:
if wavelength is not None:
wavelength = np.asarray(wavelength)
assert wavelength.size == input_dim, "bad number of wavelengths"
if period is not None:
period = np.asarray(period)
assert period.size == input_dim, "bad number of periods"
else:
wavelength = np.ones(input_dim)
period = np.ones(input_dim)
if self.ARD2 == False:
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
@ -87,33 +86,33 @@ class StdPeriodic(Kern):
assert lengthscale.size == input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(input_dim)
self.variance = Param('variance', variance, Logexp())
assert self.variance.size==1, "Variance size must be one"
self.wavelengths = Param('wavelengths', wavelength, Logexp())
self.lengthscales = Param('lengthscales', lengthscale, Logexp())
self.link_parameters(self.variance, self.wavelengths, self.lengthscales)
self.period = Param('period', period, Logexp())
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
self.link_parameters(self.variance, self.period, self.lengthscale)
def parameters_changed(self):
"""
This functions deals as a callback for each optimization iteration.
This functions deals as a callback for each optimization iteration.
If one optimization step was successfull and the parameters
this callback function will be called to be able to update any
this callback function will be called to be able to update any
precomputations for the kernel.
"""
pass
def K(self, X, X2=None):
"""Compute the covariance matrix between X and X2."""
if X2 is None:
if X2 is None:
X2 = X
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths
exp_dist = np.exp( -0.5* np.sum( np.square( np.sin( base ) / self.lengthscales ), axis = -1 ) )
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.period
exp_dist = np.exp( -0.5* np.sum( np.square( np.sin( base ) / self.lengthscale ), axis = -1 ) )
return self.variance * exp_dist
@ -125,42 +124,42 @@ class StdPeriodic(Kern):
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None:
if X2 is None:
X2 = X
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths
sin_base = np.sin( base )
exp_dist = np.exp( -0.5* np.sum( np.square( sin_base / self.lengthscales ), axis = -1 ) )
dwl = self.variance * (1.0/np.square(self.lengthscales)) * sin_base*np.cos(base) * (base / self.wavelengths)
dl = self.variance * np.square( sin_base) / np.power( self.lengthscales, 3)
self.variance.gradient = np.sum(exp_dist * dL_dK)
#target[0] += np.sum( exp_dist * dL_dK)
if self.ARD1: # different wavelengths
self.wavelengths.gradient = (dwl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
else: # same wavelengths
self.wavelengths.gradient = np.sum(dwl.sum(-1) * exp_dist * dL_dK)
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.period
sin_base = np.sin( base )
exp_dist = np.exp( -0.5* np.sum( np.square( sin_base / self.lengthscale ), axis = -1 ) )
dwl = self.variance * (1.0/np.square(self.lengthscale)) * sin_base*np.cos(base) * (base / self.period)
dl = self.variance * np.square( sin_base) / np.power( self.lengthscale, 3)
self.variance.gradient = np.sum(exp_dist * dL_dK)
#target[0] += np.sum( exp_dist * dL_dK)
if self.ARD1: # different periods
self.period.gradient = (dwl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
else: # same period
self.period.gradient = np.sum(dwl.sum(-1) * exp_dist * dL_dK)
if self.ARD2: # different lengthscales
self.lengthscales.gradient = (dl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
self.lengthscale.gradient = (dl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
else: # same lengthscales
self.lengthscales.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK)
self.lengthscale.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
self.variance.gradient = np.sum(dL_dKdiag)
self.wavelengths.gradient = 0
self.lengthscales.gradient = 0
self.period.gradient = 0
self.lengthscale.gradient = 0
# def gradients_X(self, dL_dK, X, X2=None):
# """derivative of the covariance matrix with respect to X."""
#
#
# raise NotImplemented("Periodic kernel: dK_dX not implemented")
#
# def gradients_X_diag(self, dL_dKdiag, X):
#
#
# raise NotImplemented("Periodic kernel: dKdiag_dX not implemented")

View file

@ -81,6 +81,52 @@ class White(Static):
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum()
class WhiteHeteroscedastic(Static):
def __init__(self, input_dim, num_data, variance=1., active_dims=None, name='white_hetero'):
"""
A heteroscedastic White kernel (nugget/noise).
It defines one variance (nugget) per input sample.
Prediction excludes any noise learnt by this Kernel, so be careful using this kernel.
You can plot the errors learnt by this kernel by something similar as:
plt.errorbar(m.X, m.Y, yerr=2*np.sqrt(m.kern.white.variance))
"""
super(Static, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', np.ones(num_data) * variance, Logexp())
self.link_parameters(self.variance)
def Kdiag(self, X):
if X.shape[0] == self.variance.shape[0]:
# If the input has the same number of samples as
# the number of variances, we return the variances
return self.variance
return 0.
def K(self, X, X2=None):
if X2 is None and X.shape[0] == self.variance.shape[0]:
return np.eye(X.shape[0]) * self.variance
else:
return 0.
def psi2(self, Z, variational_posterior):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def psi2n(self, Z, variational_posterior):
return np.zeros((1, Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
self.variance.gradient = np.diagonal(dL_dK)
else:
self.variance.gradient = 0.
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dKdiag
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0
class Bias(Static):
def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
super(Bias, self).__init__(input_dim, variance, active_dims, name)

View file

@ -81,11 +81,11 @@ class Stationary(Kern):
def dK_dr(self, r):
raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
@Cache_this(limit=20, ignore_args=())
@Cache_this(limit=3, ignore_args=())
def dK2_drdr(self, r):
raise NotImplementedError("implement second derivative of covariance wrt r to use this method")
@Cache_this(limit=5, ignore_args=())
@Cache_this(limit=3, ignore_args=())
def K(self, X, X2=None):
"""
Kernel function applied on inputs X and X2.
@ -99,6 +99,9 @@ class Stationary(Kern):
@Cache_this(limit=3, ignore_args=())
def dK_dr_via_X(self, X, X2):
"""
compute the derivative of K wrt X going through X
"""
#a convenience function, so we can cache dK_dr
return self.dK_dr(self._scaled_dist(X, X2))

View file

@ -54,12 +54,12 @@ class TruncLinear(Kern):
self.add_parameter(self.variances)
self.add_parameter(self.delta)
@Cache_this(limit=2)
@Cache_this(limit=3)
def K(self, X, X2=None):
XX = self.variances*self._product(X, X2)
return XX.sum(axis=-1)
@Cache_this(limit=2)
@Cache_this(limit=3)
def _product(self, X, X2=None):
if X2 is None:
X2 = X
@ -149,12 +149,12 @@ class TruncLinear_inf(Kern):
self.add_parameter(self.variances)
# @Cache_this(limit=2)
# @Cache_this(limit=3)
def K(self, X, X2=None):
tmp = self._product(X, X2)
return (self.variances*tmp).sum(axis=-1)
# @Cache_this(limit=2)
# @Cache_this(limit=3)
def _product(self, X, X2=None):
if X2 is None:
X2 = X