mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-07-02 16:01:03 +02:00
slicing support for kernel input dimension
This commit is contained in:
parent
5f3524e7da
commit
db5fd17609
10 changed files with 178 additions and 65 deletions
|
|
@ -48,7 +48,7 @@ class GP(Model):
|
||||||
self.Y_metadata = None
|
self.Y_metadata = None
|
||||||
|
|
||||||
assert isinstance(kernel, kern.Kern)
|
assert isinstance(kernel, kern.Kern)
|
||||||
assert self.input_dim == kernel.input_dim
|
#assert self.input_dim == kernel.input_dim
|
||||||
self.kern = kernel
|
self.kern = kernel
|
||||||
|
|
||||||
assert isinstance(likelihood, likelihoods.Likelihood)
|
assert isinstance(likelihood, likelihoods.Likelihood)
|
||||||
|
|
@ -68,8 +68,9 @@ class GP(Model):
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, Y_metadata=self.Y_metadata)
|
self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, Y_metadata=self.Y_metadata)
|
||||||
|
self.likelihood.update_gradients(np.diag(grad_dict['dL_dK']))
|
||||||
self.kern.update_gradients_full(grad_dict['dL_dK'], self.X)
|
self.kern.update_gradients_full(grad_dict['dL_dK'], self.X)
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
return self._log_marginal_likelihood
|
return self._log_marginal_likelihood
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ class ObservableArray(np.ndarray, Observable):
|
||||||
__array_priority__ = -1 # Never give back ObservableArray
|
__array_priority__ = -1 # Never give back ObservableArray
|
||||||
def __new__(cls, input_array):
|
def __new__(cls, input_array):
|
||||||
if not isinstance(input_array, ObservableArray):
|
if not isinstance(input_array, ObservableArray):
|
||||||
obj = np.atleast_1d(input_array).view(cls)
|
obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls)
|
||||||
else: obj = input_array
|
else: obj = input_array
|
||||||
cls.__name__ = "ObservableArray\n "
|
cls.__name__ = "ObservableArray\n "
|
||||||
return obj
|
return obj
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,6 @@ Observable Pattern for patameterization
|
||||||
|
|
||||||
from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import itertools
|
|
||||||
|
|
||||||
__updated__ = '2013-12-16'
|
__updated__ = '2013-12-16'
|
||||||
|
|
||||||
|
|
@ -43,6 +42,7 @@ class Observable(object):
|
||||||
_updated = True
|
_updated = True
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
self._observer_callables_ = []
|
self._observer_callables_ = []
|
||||||
|
|
||||||
def __del__(self, *args, **kwargs):
|
def __del__(self, *args, **kwargs):
|
||||||
del self._observer_callables_
|
del self._observer_callables_
|
||||||
|
|
||||||
|
|
@ -551,8 +551,8 @@ class OptimizationHandlable(Constrainable, Observable):
|
||||||
return p
|
return p
|
||||||
|
|
||||||
def _set_params_transformed(self, p):
|
def _set_params_transformed(self, p):
|
||||||
if p is self._param_array_:
|
#if p is self._param_array_:
|
||||||
p = p.copy()
|
p = p.copy()
|
||||||
if self._has_fixes(): self._param_array_[self._fixes_] = p
|
if self._has_fixes(): self._param_array_[self._fixes_] = p
|
||||||
else: self._param_array_[:] = p
|
else: self._param_array_[:] = p
|
||||||
self.untransform()
|
self.untransform()
|
||||||
|
|
|
||||||
|
|
@ -66,10 +66,10 @@ class VariationalPosterior(Parameterized):
|
||||||
def __init__(self, means=None, variances=None, name=None, **kw):
|
def __init__(self, means=None, variances=None, name=None, **kw):
|
||||||
super(VariationalPosterior, self).__init__(name=name, **kw)
|
super(VariationalPosterior, self).__init__(name=name, **kw)
|
||||||
self.mean = Param("mean", means)
|
self.mean = Param("mean", means)
|
||||||
self.ndim = self.mean.ndim
|
|
||||||
self.shape = self.mean.shape
|
|
||||||
self.variance = Param("variance", variances, Logexp())
|
self.variance = Param("variance", variances, Logexp())
|
||||||
self.add_parameters(self.mean, self.variance)
|
self.add_parameters(self.mean, self.variance)
|
||||||
|
self.ndim = self.mean.ndim
|
||||||
|
self.shape = self.mean.shape
|
||||||
self.num_data, self.input_dim = self.mean.shape
|
self.num_data, self.input_dim = self.mean.shape
|
||||||
if self.has_uncertain_inputs():
|
if self.has_uncertain_inputs():
|
||||||
assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion"
|
assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion"
|
||||||
|
|
@ -77,6 +77,18 @@ class VariationalPosterior(Parameterized):
|
||||||
def has_uncertain_inputs(self):
|
def has_uncertain_inputs(self):
|
||||||
return not self.variance is None
|
return not self.variance is None
|
||||||
|
|
||||||
|
def __getitem__(self, s):
|
||||||
|
import copy
|
||||||
|
n = self.__new__(self.__class__)
|
||||||
|
dc = copy.copy(self.__dict__)
|
||||||
|
dc['mean'] = dc['mean'][s]
|
||||||
|
dc['variance'] = dc['variance'][s]
|
||||||
|
dc['shape'] = dc['mean'].shape
|
||||||
|
dc['ndim'] = dc['ndim']
|
||||||
|
dc['num_data'], dc['input_dim'] = self.mean.shape[0], self.mean.shape[1] if dc['ndim'] > 1 else 1
|
||||||
|
n.__dict__ = dc
|
||||||
|
return n
|
||||||
|
|
||||||
|
|
||||||
class NormalPosterior(VariationalPosterior):
|
class NormalPosterior(VariationalPosterior):
|
||||||
'''
|
'''
|
||||||
|
|
|
||||||
|
|
@ -64,8 +64,8 @@ class SparseGP(GP):
|
||||||
self.kern.gradient += target
|
self.kern.gradient += target
|
||||||
|
|
||||||
#gradients wrt Z
|
#gradients wrt Z
|
||||||
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
|
self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(dL_dKmm, self.Z)
|
||||||
self.Z.gradient += self.kern.gradients_Z_expectations(
|
self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_Z_expectations(
|
||||||
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
|
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
|
||||||
else:
|
else:
|
||||||
#gradients wrt kernel
|
#gradients wrt kernel
|
||||||
|
|
@ -77,8 +77,8 @@ class SparseGP(GP):
|
||||||
self.kern.gradient += target
|
self.kern.gradient += target
|
||||||
|
|
||||||
#gradients wrt Z
|
#gradients wrt Z
|
||||||
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
||||||
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
|
self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
|
||||||
|
|
||||||
def _raw_predict(self, Xnew, full_cov=False):
|
def _raw_predict(self, Xnew, full_cov=False):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -49,9 +49,6 @@ class ExactGaussianInference(object):
|
||||||
|
|
||||||
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
|
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
|
||||||
|
|
||||||
#TODO: does this really live here?
|
|
||||||
likelihood.update_gradients(np.diag(dL_dK))
|
|
||||||
|
|
||||||
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK}
|
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,10 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
import sys
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import itertools
|
import itertools
|
||||||
from linear import Linear
|
|
||||||
from ...core.parameterization import Parameterized
|
from ...core.parameterization import Parameterized
|
||||||
from ...core.parameterization.param import Param
|
from ...util.caching import Cache_this
|
||||||
from kern import Kern
|
from kern import Kern
|
||||||
|
|
||||||
class Add(Kern):
|
class Add(Kern):
|
||||||
|
|
@ -14,19 +12,24 @@ class Add(Kern):
|
||||||
assert all([isinstance(k, Kern) for k in subkerns])
|
assert all([isinstance(k, Kern) for k in subkerns])
|
||||||
if tensor:
|
if tensor:
|
||||||
input_dim = sum([k.input_dim for k in subkerns])
|
input_dim = sum([k.input_dim for k in subkerns])
|
||||||
self.input_slices = []
|
self.self.active_dims = []
|
||||||
n = 0
|
n = 0
|
||||||
for k in subkerns:
|
for k in subkerns:
|
||||||
self.input_slices.append(slice(n, n+k.input_dim))
|
self.self.active_dims.append(slice(n, n+k.input_dim))
|
||||||
n += k.input_dim
|
n += k.input_dim
|
||||||
else:
|
else:
|
||||||
assert all([k.input_dim == subkerns[0].input_dim for k in subkerns])
|
#assert all([k.input_dim == subkerns[0].input_dim for k in subkerns])
|
||||||
input_dim = subkerns[0].input_dim
|
#input_dim = subkerns[0].input_dim
|
||||||
self.input_slices = [slice(None) for k in subkerns]
|
#self.input_slices = [slice(None) for k in subkerns]
|
||||||
|
input_dim = reduce(np.union1d, map(lambda x: np.r_[x.active_dims], subkerns))
|
||||||
super(Add, self).__init__(input_dim, 'add')
|
super(Add, self).__init__(input_dim, 'add')
|
||||||
self.add_parameters(*subkerns)
|
self.add_parameters(*subkerns)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parts(self):
|
||||||
|
return self._parameters_
|
||||||
|
|
||||||
|
@Cache_this(limit=1, force_kwargs=('which_parts',))
|
||||||
def K(self, X, X2=None):
|
def K(self, X, X2=None):
|
||||||
"""
|
"""
|
||||||
Compute the kernel function.
|
Compute the kernel function.
|
||||||
|
|
@ -37,13 +40,19 @@ class Add(Kern):
|
||||||
handLes this as X2 == X.
|
handLes this as X2 == X.
|
||||||
"""
|
"""
|
||||||
assert X.shape[1] == self.input_dim
|
assert X.shape[1] == self.input_dim
|
||||||
if X2 is None:
|
which_parts=None
|
||||||
return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)])
|
if which_parts is None:
|
||||||
else:
|
which_parts = self.parts
|
||||||
return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
|
elif not isinstance(which_parts, (list, tuple)):
|
||||||
|
# if only one part is given
|
||||||
|
which_parts = [which_parts]
|
||||||
|
return sum([p.K(X, X2) for p in which_parts])
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
[p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dK, X):
|
||||||
|
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
"""Compute the gradient of the objective function with respect to X.
|
"""Compute the gradient of the objective function with respect to X.
|
||||||
|
|
@ -55,16 +64,17 @@ class Add(Kern):
|
||||||
:param X2: Observed data inputs (optional, defaults to X)
|
:param X2: Observed data inputs (optional, defaults to X)
|
||||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||||
|
|
||||||
target = np.zeros_like(X)
|
target = np.zeros(X.shape)
|
||||||
if X2 is None:
|
for p in self.parts:
|
||||||
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
target[:, p.active_dims] += p.gradients_X(dL_dK, X, X2)
|
||||||
else:
|
|
||||||
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def Kdiag(self, X):
|
def Kdiag(self, X):
|
||||||
|
which_parts=None
|
||||||
assert X.shape[1] == self.input_dim
|
assert X.shape[1] == self.input_dim
|
||||||
return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
|
if which_parts is None:
|
||||||
|
which_parts = self.parts
|
||||||
|
return sum([p.Kdiag(X) for p in which_parts])
|
||||||
|
|
||||||
|
|
||||||
def psi0(self, Z, variational_posterior):
|
def psi0(self, Z, variational_posterior):
|
||||||
|
|
|
||||||
|
|
@ -2,13 +2,22 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import numpy as np
|
from ...core.parameterization.parameterized import ParametersChangedMeta, Parameterized
|
||||||
import itertools
|
from ...util.caching import Cache_this
|
||||||
from ...core.parameterization import Parameterized
|
|
||||||
from ...core.parameterization.param import Param
|
|
||||||
|
|
||||||
|
|
||||||
|
class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||||
|
def __call__(self, *args, **kw):
|
||||||
|
instance = super(KernCallsViaSlicerMeta, self).__call__(*args, **kw)
|
||||||
|
instance.K = instance._slice_wrapper(instance.K)
|
||||||
|
instance.Kdiag = instance._slice_wrapper(instance.Kdiag, True)
|
||||||
|
instance.update_gradients_full = instance._slice_wrapper(instance.update_gradients_full, False, True)
|
||||||
|
instance.update_gradients_diag = instance._slice_wrapper(instance.update_gradients_diag, True, True)
|
||||||
|
instance.gradients_X = instance._slice_wrapper(instance.gradients_X, False, True)
|
||||||
|
instance.gradients_X_diag = instance._slice_wrapper(instance.gradients_X_diag, True, True)
|
||||||
|
return instance
|
||||||
|
|
||||||
class Kern(Parameterized):
|
class Kern(Parameterized):
|
||||||
|
__metaclass__ = KernCallsViaSlicerMeta
|
||||||
def __init__(self, input_dim, name, *a, **kw):
|
def __init__(self, input_dim, name, *a, **kw):
|
||||||
"""
|
"""
|
||||||
The base class for a kernel: a positive definite function
|
The base class for a kernel: a positive definite function
|
||||||
|
|
@ -20,11 +29,83 @@ class Kern(Parameterized):
|
||||||
Do not instantiate.
|
Do not instantiate.
|
||||||
"""
|
"""
|
||||||
super(Kern, self).__init__(name=name, *a, **kw)
|
super(Kern, self).__init__(name=name, *a, **kw)
|
||||||
self.input_dim = input_dim
|
if isinstance(input_dim, int):
|
||||||
|
self.active_dims = slice(0, input_dim)
|
||||||
|
self.input_dim = input_dim
|
||||||
|
else:
|
||||||
|
self.active_dims = input_dim
|
||||||
|
self.input_dim = len(self.active_dims)
|
||||||
|
self._sliced_X = False
|
||||||
|
self._sliced_X2 = False
|
||||||
|
|
||||||
|
@Cache_this(limit=10, ignore_args = (0,))
|
||||||
|
def _slice_X(self, X):
|
||||||
|
return X[:, self.active_dims]
|
||||||
|
|
||||||
|
def _slice_wrapper(self, operation, diag=False, derivative=False):
|
||||||
|
"""
|
||||||
|
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension.
|
||||||
|
The different switches are:
|
||||||
|
diag: if X2 exists
|
||||||
|
derivative: if firest arg is dL_dK
|
||||||
|
"""
|
||||||
|
if derivative:
|
||||||
|
if diag:
|
||||||
|
def x_slice_wrapper(dL_dK, X, *args, **kw):
|
||||||
|
X = self._slice_X(X) if not self._sliced_X else X
|
||||||
|
self._sliced_X = True
|
||||||
|
try:
|
||||||
|
ret = operation(dL_dK, X, *args, **kw)
|
||||||
|
except: raise
|
||||||
|
finally:
|
||||||
|
self._sliced_X = False
|
||||||
|
return ret
|
||||||
|
else:
|
||||||
|
def x_slice_wrapper(dL_dK, X, X2=None, *args, **kw):
|
||||||
|
X, X2 = self._slice_X(X) if not self._sliced_X else X, self._slice_X(X2) if X2 is not None and not self._sliced_X2 else X2
|
||||||
|
self._sliced_X = True
|
||||||
|
self._sliced_X2 = True
|
||||||
|
try:
|
||||||
|
ret = operation(dL_dK, X, X2, *args, **kw)
|
||||||
|
except: raise
|
||||||
|
finally:
|
||||||
|
self._sliced_X = False
|
||||||
|
self._sliced_X2 = False
|
||||||
|
return ret
|
||||||
|
else:
|
||||||
|
if diag:
|
||||||
|
def x_slice_wrapper(X, *args, **kw):
|
||||||
|
X = self._slice_X(X) if not self._sliced_X else X
|
||||||
|
self._sliced_X = True
|
||||||
|
try:
|
||||||
|
ret = operation(X, *args, **kw)
|
||||||
|
except: raise
|
||||||
|
finally:
|
||||||
|
self._sliced_X = False
|
||||||
|
return ret
|
||||||
|
else:
|
||||||
|
def x_slice_wrapper(X, X2=None, *args, **kw):
|
||||||
|
X, X2 = self._slice_X(X) if not self._sliced_X else X, self._slice_X(X2) if X2 is not None and not self._sliced_X2 else X2
|
||||||
|
self._sliced_X = True
|
||||||
|
self._sliced_X2 = True
|
||||||
|
try:
|
||||||
|
ret = operation(X, X2, *args, **kw)
|
||||||
|
except: raise
|
||||||
|
finally:
|
||||||
|
self._sliced_X = False
|
||||||
|
self._sliced_X2 = False
|
||||||
|
return ret
|
||||||
|
x_slice_wrapper._operation = operation
|
||||||
|
x_slice_wrapper.__name__ = ("slicer("+operation.__name__
|
||||||
|
+(","+str(bool(diag)) if diag else'')
|
||||||
|
+(','+str(bool(derivative)) if derivative else '')
|
||||||
|
+')')
|
||||||
|
x_slice_wrapper.__doc__ = "**sliced**\n\n" + (operation.__doc__ or "")
|
||||||
|
return x_slice_wrapper
|
||||||
|
|
||||||
def K(self, X, X2):
|
def K(self, X, X2):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def Kdiag(self, Xa):
|
def Kdiag(self, X):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def psi0(self, Z, variational_posterior):
|
def psi0(self, Z, variational_posterior):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
@ -34,13 +115,16 @@ class Kern(Parameterized):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def gradients_X(self, dL_dK, X, X2):
|
def gradients_X(self, dL_dK, X, X2):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def gradients_X_diag(self, dL_dK, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2):
|
def update_gradients_full(self, dL_dK, X, X2):
|
||||||
"""Set the gradients of all parameters when doing full (N) inference."""
|
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
"""Set the gradients for all parameters for the derivative of the diagonal of the covariance w.r.t the kernel parameters."""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
"""
|
"""
|
||||||
Set the gradients of all parameters when doing inference with
|
Set the gradients of all parameters when doing inference with
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ class Stationary(Kern):
|
||||||
if lengthscale.size != input_dim:
|
if lengthscale.size != input_dim:
|
||||||
lengthscale = np.ones(input_dim)*lengthscale
|
lengthscale = np.ones(input_dim)*lengthscale
|
||||||
else:
|
else:
|
||||||
lengthscale = np.ones(self.input_dim)
|
lengthscale = np.ones(self.input_dim)
|
||||||
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
|
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
|
||||||
self.variance = Param('variance', variance, Logexp())
|
self.variance = Param('variance', variance, Logexp())
|
||||||
assert self.variance.size==1
|
assert self.variance.size==1
|
||||||
|
|
@ -85,12 +85,14 @@ class Stationary(Kern):
|
||||||
Compute the Euclidean distance between each row of X and X2, or between
|
Compute the Euclidean distance between each row of X and X2, or between
|
||||||
each pair of rows of X if X2 is None.
|
each pair of rows of X if X2 is None.
|
||||||
"""
|
"""
|
||||||
|
#X, = self._slice_X(X)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
Xsq = np.sum(np.square(X),1)
|
Xsq = np.sum(np.square(X),1)
|
||||||
r2 = -2.*tdot(X) + (Xsq[:,None] + Xsq[None,:])
|
r2 = -2.*tdot(X) + (Xsq[:,None] + Xsq[None,:])
|
||||||
util.diag.view(r2)[:,]= 0. # force diagnoal to be zero: sometime numerically a little negative
|
util.diag.view(r2)[:,]= 0. # force diagnoal to be zero: sometime numerically a little negative
|
||||||
return np.sqrt(r2)
|
return np.sqrt(r2)
|
||||||
else:
|
else:
|
||||||
|
#X2, = self._slice_X(X2)
|
||||||
X1sq = np.sum(np.square(X),1)
|
X1sq = np.sum(np.square(X),1)
|
||||||
X2sq = np.sum(np.square(X2),1)
|
X2sq = np.sum(np.square(X2),1)
|
||||||
return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:]))
|
return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:]))
|
||||||
|
|
@ -124,7 +126,6 @@ class Stationary(Kern):
|
||||||
self.lengthscale.gradient = 0.
|
self.lengthscale.gradient = 0.
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
|
||||||
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
|
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
|
||||||
|
|
||||||
#now the lengthscale gradient(s)
|
#now the lengthscale gradient(s)
|
||||||
|
|
@ -136,7 +137,7 @@ class Stationary(Kern):
|
||||||
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
|
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
|
||||||
tmp = dL_dr*self._inv_dist(X, X2)
|
tmp = dL_dr*self._inv_dist(X, X2)
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
|
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(self._slice_X(X)[:,q:q+1] - self._slice_X(X2)[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
|
||||||
else:
|
else:
|
||||||
r = self._scaled_dist(X, X2)
|
r = self._scaled_dist(X, X2)
|
||||||
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
|
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
|
||||||
|
|
@ -176,7 +177,6 @@ class Stationary(Kern):
|
||||||
ret = np.empty(X.shape, dtype=np.float64)
|
ret = np.empty(X.shape, dtype=np.float64)
|
||||||
[np.einsum('ij,ij->i', tmp, X[:,q][:,None]-X2[:,q][None,:], out=ret[:,q]) for q in xrange(self.input_dim)]
|
[np.einsum('ij,ij->i', tmp, X[:,q][:,None]-X2[:,q][None,:], out=ret[:,q]) for q in xrange(self.input_dim)]
|
||||||
ret /= self.lengthscale**2
|
ret /= self.lengthscale**2
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
|
|
|
||||||
|
|
@ -9,24 +9,27 @@ class Cacher(object):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, operation, limit=5, ignore_args=()):
|
def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()):
|
||||||
self.limit = int(limit)
|
self.limit = int(limit)
|
||||||
self.ignore_args = ignore_args
|
self.ignore_args = ignore_args
|
||||||
|
self.force_kwargs = force_kwargs
|
||||||
self.operation=operation
|
self.operation=operation
|
||||||
self.cached_inputs = []
|
self.cached_inputs = []
|
||||||
self.cached_outputs = []
|
self.cached_outputs = []
|
||||||
self.inputs_changed = []
|
self.inputs_changed = []
|
||||||
|
|
||||||
def __call__(self, *args):
|
def __call__(self, *args, **kw):
|
||||||
"""
|
"""
|
||||||
A wrapper function for self.operation,
|
A wrapper function for self.operation,
|
||||||
"""
|
"""
|
||||||
|
|
||||||
#ensure that specified arguments are ignored
|
#ensure that specified arguments are ignored
|
||||||
|
items = sorted(kw.items(), key=lambda x: x[0])
|
||||||
|
oa_all = args + tuple(a for _,a in items)
|
||||||
if len(self.ignore_args) != 0:
|
if len(self.ignore_args) != 0:
|
||||||
oa = [a for i,a in enumerate(args) if i not in self.ignore_args]
|
oa = [a for i,a in itertools.chain(enumerate(args), items) if i not in self.ignore_args and i not in self.force_kwargs]
|
||||||
else:
|
else:
|
||||||
oa = args
|
oa = oa_all
|
||||||
|
|
||||||
# this makes sure we only add an observer once, and that None can be in args
|
# this makes sure we only add an observer once, and that None can be in args
|
||||||
observable_args = []
|
observable_args = []
|
||||||
|
|
@ -37,8 +40,13 @@ class Cacher(object):
|
||||||
#make sure that all the found argument really are observable:
|
#make sure that all the found argument really are observable:
|
||||||
#otherswise don't cache anything, pass args straight though
|
#otherswise don't cache anything, pass args straight though
|
||||||
if not all([isinstance(arg, Observable) for arg in observable_args]):
|
if not all([isinstance(arg, Observable) for arg in observable_args]):
|
||||||
return self.operation(*args)
|
return self.operation(*args, **kw)
|
||||||
|
|
||||||
|
if len(self.force_kwargs) != 0:
|
||||||
|
# check if there are force args, which force reloading
|
||||||
|
for k in self.force_kwargs:
|
||||||
|
if k in kw and kw[k] is not None:
|
||||||
|
return self.operation(*args, **kw)
|
||||||
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
|
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
|
||||||
# return self.operation(*args)
|
# return self.operation(*args)
|
||||||
|
|
||||||
|
|
@ -48,7 +56,7 @@ class Cacher(object):
|
||||||
i = state.index(True)
|
i = state.index(True)
|
||||||
if self.inputs_changed[i]:
|
if self.inputs_changed[i]:
|
||||||
#(elements of) the args have changed since we last computed: update
|
#(elements of) the args have changed since we last computed: update
|
||||||
self.cached_outputs[i] = self.operation(*args)
|
self.cached_outputs[i] = self.operation(*args, **kw)
|
||||||
self.inputs_changed[i] = False
|
self.inputs_changed[i] = False
|
||||||
return self.cached_outputs[i]
|
return self.cached_outputs[i]
|
||||||
else:
|
else:
|
||||||
|
|
@ -62,11 +70,11 @@ class Cacher(object):
|
||||||
self.cached_outputs.pop(0)
|
self.cached_outputs.pop(0)
|
||||||
|
|
||||||
#compute
|
#compute
|
||||||
self.cached_inputs.append(args)
|
self.cached_inputs.append(oa_all)
|
||||||
self.cached_outputs.append(self.operation(*args))
|
self.cached_outputs.append(self.operation(*args, **kw))
|
||||||
self.inputs_changed.append(False)
|
self.inputs_changed.append(False)
|
||||||
[a.add_observer(self, self.on_cache_changed) for a in observable_args]
|
[a.add_observer(self, self.on_cache_changed) for a in observable_args]
|
||||||
return self.cached_outputs[-1]#Max says return.
|
return self.cached_outputs[-1]#return
|
||||||
|
|
||||||
def on_cache_changed(self, arg):
|
def on_cache_changed(self, arg):
|
||||||
"""
|
"""
|
||||||
|
|
@ -90,15 +98,16 @@ class Cache_this(object):
|
||||||
"""
|
"""
|
||||||
A decorator which can be applied to bound methods in order to cache them
|
A decorator which can be applied to bound methods in order to cache them
|
||||||
"""
|
"""
|
||||||
def __init__(self, limit=5, ignore_args=()):
|
def __init__(self, limit=5, ignore_args=(), force_kwargs=()):
|
||||||
self.limit = limit
|
self.limit = limit
|
||||||
self.ignore_args = ignore_args
|
self.ignore_args = ignore_args
|
||||||
|
self.force_args = force_kwargs
|
||||||
self.c = None
|
self.c = None
|
||||||
def __call__(self, f):
|
def __call__(self, f):
|
||||||
def f_wrap(*args):
|
def f_wrap(*args, **kw):
|
||||||
if self.c is None:
|
if self.c is None:
|
||||||
self.c = Cacher(f, self.limit, ignore_args=self.ignore_args)
|
self.c = Cacher(f, self.limit, ignore_args=self.ignore_args, force_kwargs=self.force_args)
|
||||||
return self.c(*args)
|
return self.c(*args, **kw)
|
||||||
f_wrap._cacher = self
|
f_wrap._cacher = self
|
||||||
f_wrap.__doc__ = "**cached**\n\n" + (f.__doc__ or "")
|
f_wrap.__doc__ = "**cached**\n\n" + (f.__doc__ or "")
|
||||||
return f_wrap
|
return f_wrap
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue