mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-24 14:15:14 +02:00
[GPU] GPU version of varDTC is ready
This commit is contained in:
commit
bbcba2553c
59 changed files with 2012 additions and 1186 deletions
|
|
@ -1,9 +1,9 @@
|
|||
from _src.kern import Kern
|
||||
from _src.rbf import RBF
|
||||
from _src.linear import Linear
|
||||
from _src.linear import Linear, LinearFull
|
||||
from _src.static import Bias, White
|
||||
from _src.brownian import Brownian
|
||||
from _src.sympykern import Sympykern
|
||||
from _src.symbolic import Symbolic
|
||||
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
||||
from _src.mlp import MLP
|
||||
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ class Add(CombinationKernel):
|
|||
If a list of parts (of this kernel!) `which_parts` is given, only
|
||||
the parts of the list are taken to compute the covariance.
|
||||
"""
|
||||
assert X.shape[1] > max(np.r_[self.active_dims])
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
elif not isinstance(which_parts, (list, tuple)):
|
||||
|
|
@ -33,7 +32,6 @@ class Add(CombinationKernel):
|
|||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
def Kdiag(self, X, which_parts=None):
|
||||
assert X.shape[1] > max(np.r_[self.active_dims])
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
elif not isinstance(which_parts, (list, tuple)):
|
||||
|
|
@ -160,21 +158,13 @@ class Add(CombinationKernel):
|
|||
target_S += b
|
||||
return target_mu, target_S
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Get the current state of the class,
|
||||
here just all the indices, rest can get recomputed
|
||||
"""
|
||||
return super(Add, self)._getstate()
|
||||
|
||||
def _setstate(self, state):
|
||||
super(Add, self)._setstate(state)
|
||||
|
||||
def add(self, other, name='sum'):
|
||||
if isinstance(other, Add):
|
||||
other_params = other._parameters_[:]
|
||||
for p in other_params:
|
||||
other.remove_parameter(p)
|
||||
self.add_parameters(*other_params)
|
||||
else: self.add_parameter(other)
|
||||
else:
|
||||
self.add_parameter(other)
|
||||
self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts)
|
||||
return self
|
||||
|
|
@ -54,85 +54,93 @@ class IndependentOutputs(CombinationKernel):
|
|||
self.kern = kernels
|
||||
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
|
||||
self.index_dim = index_dim
|
||||
self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0])
|
||||
|
||||
def K(self,X ,X2=None):
|
||||
slices = index_to_slices(X[:,self.index_dim])
|
||||
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||
if X2 is None:
|
||||
target = np.zeros((X.shape[0], X.shape[0]))
|
||||
[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)]
|
||||
[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)]
|
||||
else:
|
||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||
[[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
|
||||
[[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)]
|
||||
return target
|
||||
|
||||
def Kdiag(self,X):
|
||||
slices = index_to_slices(X[:,self.index_dim])
|
||||
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||
target = np.zeros(X.shape[0])
|
||||
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
|
||||
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
||||
return target
|
||||
|
||||
def update_gradients_full(self,dL_dK,X,X2=None):
|
||||
slices = index_to_slices(X[:,self.index_dim])
|
||||
if self.single_kern: target = np.zeros(self.kern.size)
|
||||
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
|
||||
if self.single_kern:
|
||||
target = np.zeros(self.kern.size)
|
||||
kerns = itertools.repeat(self.kern)
|
||||
else:
|
||||
kerns = self.kern
|
||||
target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
|
||||
def collate_grads(kern, i, dL, X, X2):
|
||||
kern.update_gradients_full(dL,X,X2)
|
||||
if self.single_kern: target[:] += kern.gradient
|
||||
else: target[i][:] += kern.gradient
|
||||
if X2 is None:
|
||||
[[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))]
|
||||
[[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(kerns,slices))]
|
||||
else:
|
||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))]
|
||||
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
|
||||
if self.single_kern: kern.gradient = target
|
||||
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
|
||||
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
|
||||
|
||||
def gradients_X(self,dL_dK, X, X2=None):
|
||||
target = np.zeros(X.shape)
|
||||
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||
if X2 is None:
|
||||
# TODO: make use of index_to_slices
|
||||
values = np.unique(X[:,self.index_dim])
|
||||
slices = [X[:,self.index_dim]==i for i in values]
|
||||
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
|
||||
for kern, s in zip(self.kerns, slices)]
|
||||
for kern, s in zip(kerns, slices)]
|
||||
#slices = index_to_slices(X[:,self.index_dim])
|
||||
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
|
||||
# for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
|
||||
# for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
||||
#import ipdb;ipdb.set_trace()
|
||||
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
|
||||
# np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss]))
|
||||
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)]
|
||||
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(kerns, slices)]
|
||||
else:
|
||||
values = np.unique(X[:,self.index_dim])
|
||||
slices = [X[:,self.index_dim]==i for i in values]
|
||||
slices2 = [X2[:,self.index_dim]==i for i in values]
|
||||
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2]))
|
||||
for kern, s, s2 in zip(self.kerns, slices, slices2)]
|
||||
for kern, s, s2 in zip(kerns, slices, slices2)]
|
||||
# TODO: make work with index_to_slices
|
||||
#slices = index_to_slices(X[:,self.index_dim])
|
||||
#slices2 = index_to_slices(X2[:,self.index_dim])
|
||||
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
|
||||
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)]
|
||||
return target
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
slices = index_to_slices(X[:,self.index_dim])
|
||||
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||
target = np.zeros(X.shape)
|
||||
[[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
|
||||
[[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
||||
return target
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
slices = index_to_slices(X[:,self.index_dim])
|
||||
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||
if self.single_kern: target = np.zeros(self.kern.size)
|
||||
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
|
||||
else: target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
|
||||
def collate_grads(kern, i, dL, X):
|
||||
kern.update_gradients_diag(dL,X)
|
||||
if self.single_kern: target[:] += kern.gradient
|
||||
else: target[i][:] += kern.gradient
|
||||
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))]
|
||||
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(kerns, slices))]
|
||||
if self.single_kern: kern.gradient = target
|
||||
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
|
||||
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
|
||||
|
||||
class Hierarchical(CombinationKernel):
|
||||
"""
|
||||
|
|
@ -148,30 +156,30 @@ class Hierarchical(CombinationKernel):
|
|||
def __init__(self, kern, name='hierarchy'):
|
||||
assert all([k.input_dim==kerns[0].input_dim for k in kerns])
|
||||
super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
|
||||
self.kerns = kerns
|
||||
self.add_parameters(self.kerns)
|
||||
kerns = kerns
|
||||
self.add_parameters(kerns)
|
||||
|
||||
def K(self,X ,X2=None):
|
||||
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)]
|
||||
K = self.kerns[0].K(X, X2)
|
||||
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)]
|
||||
K = kerns[0].K(X, X2)
|
||||
if X2 is None:
|
||||
[[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)]
|
||||
[[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)]
|
||||
else:
|
||||
X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
[[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(self.kerns[1:], slices, slices2)]
|
||||
[[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(kerns[1:], slices, slices2)]
|
||||
return target
|
||||
|
||||
def Kdiag(self,X):
|
||||
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)]
|
||||
K = self.kerns[0].K(X, X2)
|
||||
[[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)]
|
||||
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)]
|
||||
K = kerns[0].K(X, X2)
|
||||
[[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)]
|
||||
return target
|
||||
|
||||
def update_gradients_full(self,dL_dK,X,X2=None):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
self.kerns[0].update_gradients_full(dL_dK, X, None)
|
||||
for k, slices_k in zip(self.kerns[1:], slices):
|
||||
kerns[0].update_gradients_full(dL_dK, X, None)
|
||||
for k, slices_k in zip(kerns[1:], slices):
|
||||
target = np.zeros(k.size)
|
||||
def collate_grads(dL, X, X2):
|
||||
k.update_gradients_full(dL,X,X2)
|
||||
|
|
@ -180,8 +188,8 @@ class Hierarchical(CombinationKernel):
|
|||
k._set_gradient(target)
|
||||
else:
|
||||
X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1])
|
||||
self.kerns[0].update_gradients_full(dL_dK, X, None)
|
||||
for k, slices_k in zip(self.kerns[1:], slices):
|
||||
kerns[0].update_gradients_full(dL_dK, X, None)
|
||||
for k, slices_k in zip(kerns[1:], slices):
|
||||
target = np.zeros(k.size)
|
||||
def collate_grads(dL, X, X2):
|
||||
k.update_gradients_full(dL,X,X2)
|
||||
|
|
|
|||
|
|
@ -11,13 +11,12 @@ from ...util.caching import Cache_this
|
|||
|
||||
class Kern(Parameterized):
|
||||
#===========================================================================
|
||||
# This adds input slice support. The rather ugly code for slicing can be
|
||||
# This adds input slice support. The rather ugly code for slicing can be
|
||||
# found in kernel_slice_operations
|
||||
__metaclass__ = KernCallsViaSlicerMeta
|
||||
#===========================================================================
|
||||
_debug=False
|
||||
_support_GPU=False
|
||||
def __init__(self, input_dim, active_dims, name, useGPU=False,*a, **kw):
|
||||
def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw):
|
||||
"""
|
||||
The base class for a kernel: a positive definite function
|
||||
which forms of a covariance function (kernel).
|
||||
|
|
@ -178,22 +177,6 @@ class Kern(Parameterized):
|
|||
#else: kernels.append(other)
|
||||
return Prod([self, other], name)
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Get the current state of the class,
|
||||
here just all the indices, rest can get recomputed
|
||||
"""
|
||||
return super(Kern, self)._getstate() + [
|
||||
self.active_dims,
|
||||
self.input_dim,
|
||||
self._sliced_X]
|
||||
|
||||
def _setstate(self, state):
|
||||
self._sliced_X = state.pop()
|
||||
self.input_dim = state.pop()
|
||||
self.active_dims = state.pop()
|
||||
super(Kern, self)._setstate(state)
|
||||
|
||||
class CombinationKernel(Kern):
|
||||
"""
|
||||
Abstract super class for combination kernels.
|
||||
|
|
@ -211,9 +194,7 @@ class CombinationKernel(Kern):
|
|||
:param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on
|
||||
"""
|
||||
assert all([isinstance(k, Kern) for k in kernels])
|
||||
active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
|
||||
input_dim = active_dims.max()+1 + len(extra_dims)
|
||||
active_dims = slice(active_dims.max()+1+len(extra_dims))
|
||||
input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims)
|
||||
# initialize the kernel with the full input_dim
|
||||
super(CombinationKernel, self).__init__(input_dim, active_dims, name)
|
||||
self.extra_dims = extra_dims
|
||||
|
|
@ -223,6 +204,12 @@ class CombinationKernel(Kern):
|
|||
def parts(self):
|
||||
return self._parameters_
|
||||
|
||||
def get_input_dim_active_dims(self, kernels, extra_dims = None):
|
||||
active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
|
||||
input_dim = active_dims.max()+1 + (len(np.r_[extra_dims]) if extra_dims is not None else 0)
|
||||
active_dims = slice(0, input_dim, 1)
|
||||
return input_dim, active_dims
|
||||
|
||||
def input_sensitivity(self):
|
||||
in_sen = np.zeros((self.num_params, self.input_dim))
|
||||
for i, p in enumerate(self.parts):
|
||||
|
|
|
|||
|
|
@ -5,130 +5,135 @@ Created on 11 Mar 2014
|
|||
'''
|
||||
from ...core.parameterization.parameterized import ParametersChangedMeta
|
||||
import numpy as np
|
||||
from functools import wraps
|
||||
|
||||
def put_clean(dct, name, func):
|
||||
if name in dct:
|
||||
dct['_clean_{}'.format(name)] = dct[name]
|
||||
dct[name] = func(dct[name])
|
||||
|
||||
class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||
def __call__(self, *args, **kw):
|
||||
instance = super(ParametersChangedMeta, self).__call__(*args, **kw)
|
||||
instance.K = _slice_wrapper(instance, instance.K)
|
||||
instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True)
|
||||
instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True)
|
||||
instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True)
|
||||
instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True)
|
||||
instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True)
|
||||
instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False)
|
||||
instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False)
|
||||
instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False)
|
||||
instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True)
|
||||
instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True)
|
||||
instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True)
|
||||
instance.parameters_changed()
|
||||
return instance
|
||||
def __new__(cls, name, bases, dct):
|
||||
put_clean(dct, 'K', _slice_K)
|
||||
put_clean(dct, 'Kdiag', _slice_Kdiag)
|
||||
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
|
||||
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
|
||||
put_clean(dct, 'gradients_X', _slice_gradients_X)
|
||||
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
|
||||
|
||||
def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False):
|
||||
"""
|
||||
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension.
|
||||
The different switches are:
|
||||
diag: if X2 exists
|
||||
derivative: if first arg is dL_dK
|
||||
psi_stat: if first 3 args are dL_dpsi0..2
|
||||
psi_stat_Z: if first 2 args are dL_dpsi1..2
|
||||
"""
|
||||
if derivative:
|
||||
if diag:
|
||||
def x_slice_wrapper(dL_dKdiag, X):
|
||||
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
||||
if ret_X_not_sliced:
|
||||
ret = np.zeros(X.shape)
|
||||
X = kern._slice_X(X) if not kern._sliced_X else X
|
||||
# if the return value is of shape X.shape, we need to make sure to return the right shape
|
||||
kern._sliced_X += 1
|
||||
try:
|
||||
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X)
|
||||
else: ret = operation(dL_dKdiag, X)
|
||||
except:
|
||||
raise
|
||||
finally:
|
||||
kern._sliced_X -= 1
|
||||
return ret
|
||||
elif psi_stat:
|
||||
def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
||||
if ret_X_not_sliced:
|
||||
ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
|
||||
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
|
||||
kern._sliced_X += 1
|
||||
# if the return value is of shape X.shape, we need to make sure to return the right shape
|
||||
try:
|
||||
if ret_X_not_sliced:
|
||||
ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior))
|
||||
r2 = ret[:2]
|
||||
ret[0] = ret1
|
||||
ret[1] = ret2
|
||||
ret[0][:, kern.active_dims] = r2[0]
|
||||
ret[1][:, kern.active_dims] = r2[1]
|
||||
del r2
|
||||
else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
except:
|
||||
raise
|
||||
finally:
|
||||
kern._sliced_X -= 1
|
||||
return ret
|
||||
elif psi_stat_Z:
|
||||
def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
||||
if ret_X_not_sliced: ret = np.zeros(Z.shape)
|
||||
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
|
||||
kern._sliced_X += 1
|
||||
try:
|
||||
if ret_X_not_sliced:
|
||||
ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
except:
|
||||
raise
|
||||
finally:
|
||||
kern._sliced_X -= 1
|
||||
return ret
|
||||
put_clean(dct, 'psi0', _slice_psi)
|
||||
put_clean(dct, 'psi1', _slice_psi)
|
||||
put_clean(dct, 'psi2', _slice_psi)
|
||||
put_clean(dct, 'update_gradients_expectations', _slice_update_gradients_expectations)
|
||||
put_clean(dct, 'gradients_Z_expectations', _slice_gradients_Z_expectations)
|
||||
put_clean(dct, 'gradients_qX_expectations', _slice_gradients_qX_expectations)
|
||||
return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
|
||||
|
||||
class _Slice_wrap(object):
|
||||
def __init__(self, k, X, X2=None):
|
||||
self.k = k
|
||||
self.shape = X.shape
|
||||
if self.k._sliced_X == 0:
|
||||
assert X.shape[1] > max(np.r_[self.k.active_dims]), "At least {} dimensional X needed".format(max(np.r_[self.k.active_dims]))
|
||||
self.X = self.k._slice_X(X)
|
||||
self.X2 = self.k._slice_X(X2) if X2 is not None else X2
|
||||
self.ret = True
|
||||
else:
|
||||
def x_slice_wrapper(dL_dK, X, X2=None):
|
||||
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
||||
if ret_X_not_sliced:
|
||||
ret = np.zeros(X.shape)
|
||||
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
|
||||
kern._sliced_X += 1
|
||||
try:
|
||||
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2)
|
||||
else: ret = operation(dL_dK, X, X2)
|
||||
except:
|
||||
raise
|
||||
finally:
|
||||
kern._sliced_X -= 1
|
||||
return ret
|
||||
else:
|
||||
if diag:
|
||||
def x_slice_wrapper(X, *args, **kw):
|
||||
X = kern._slice_X(X) if not kern._sliced_X else X
|
||||
kern._sliced_X += 1
|
||||
try:
|
||||
ret = operation(X, *args, **kw)
|
||||
except:
|
||||
raise
|
||||
finally:
|
||||
kern._sliced_X -= 1
|
||||
return ret
|
||||
else:
|
||||
def x_slice_wrapper(X, X2=None, *args, **kw):
|
||||
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
|
||||
kern._sliced_X += 1
|
||||
try:
|
||||
ret = operation(X, X2, *args, **kw)
|
||||
except: raise
|
||||
finally:
|
||||
kern._sliced_X -= 1
|
||||
return ret
|
||||
x_slice_wrapper._operation = operation
|
||||
x_slice_wrapper.__name__ = ("slicer("+str(operation)
|
||||
+(","+str(bool(diag)) if diag else'')
|
||||
+(','+str(bool(derivative)) if derivative else '')
|
||||
+')')
|
||||
x_slice_wrapper.__doc__ = "**sliced**\n" + (operation.__doc__ or "")
|
||||
return x_slice_wrapper
|
||||
self.X = X
|
||||
self.X2 = X2
|
||||
self.ret = False
|
||||
def __enter__(self):
|
||||
self.k._sliced_X += 1
|
||||
return self
|
||||
def __exit__(self, *a):
|
||||
self.k._sliced_X -= 1
|
||||
def handle_return_array(self, return_val):
|
||||
if self.ret:
|
||||
ret = np.zeros(self.shape)
|
||||
ret[:, self.k.active_dims] = return_val
|
||||
return ret
|
||||
return return_val
|
||||
|
||||
def _slice_K(f):
|
||||
@wraps(f)
|
||||
def wrap(self, X, X2 = None, *a, **kw):
|
||||
with _Slice_wrap(self, X, X2) as s:
|
||||
ret = f(self, s.X, s.X2, *a, **kw)
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_Kdiag(f):
|
||||
@wraps(f)
|
||||
def wrap(self, X, *a, **kw):
|
||||
with _Slice_wrap(self, X, None) as s:
|
||||
ret = f(self, s.X, *a, **kw)
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_update_gradients_full(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dK, X, X2=None):
|
||||
with _Slice_wrap(self, X, X2) as s:
|
||||
ret = f(self, dL_dK, s.X, s.X2)
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_update_gradients_diag(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dKdiag, X):
|
||||
with _Slice_wrap(self, X, None) as s:
|
||||
ret = f(self, dL_dKdiag, s.X)
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_X(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dK, X, X2=None):
|
||||
with _Slice_wrap(self, X, X2) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_X_diag(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dKdiag, X):
|
||||
with _Slice_wrap(self, X, None) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dKdiag, s.X))
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_psi(f):
|
||||
@wraps(f)
|
||||
def wrap(self, Z, variational_posterior):
|
||||
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||
ret = f(self, s.X, s.X2)
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_update_gradients_expectations(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||
ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2)
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_Z_expectations(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dpsi1, dL_dpsi2, s.X, s.X2))
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_qX_expectations(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
with _Slice_wrap(self, variational_posterior, Z) as s:
|
||||
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X))
|
||||
r2 = ret[:2]
|
||||
ret[0] = s.handle_return_array(r2[0])
|
||||
ret[1] = s.handle_return_array(r2[1])
|
||||
del r2
|
||||
return ret
|
||||
return wrap
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ class Linear(Kern):
|
|||
gamma = variational_posterior.binary_prob
|
||||
mu = variational_posterior.mean
|
||||
return np.einsum('nq,q,mq,nq->nm',gamma,self.variances,Z,mu)
|
||||
# return (self.variances*gamma*mu).sum(axis=1)
|
||||
# return (self.variances*gamma*mu).sum(axis=1)
|
||||
else:
|
||||
return self.K(variational_posterior.mean, Z) #the variance, it does nothing
|
||||
|
||||
|
|
@ -177,7 +177,7 @@ class Linear(Kern):
|
|||
|
||||
grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\
|
||||
np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ)
|
||||
|
||||
|
||||
return grad
|
||||
else:
|
||||
#psi1
|
||||
|
|
@ -191,15 +191,15 @@ class Linear(Kern):
|
|||
gamma = variational_posterior.binary_prob
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
mu2S = np.square(mu)+S
|
||||
mu2S = np.square(mu)+S
|
||||
_, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
|
||||
|
||||
|
||||
grad_gamma = np.einsum('n,q,nq->nq',dL_dpsi0,self.variances,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,self.variances,Z,mu) +\
|
||||
np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dgamma)
|
||||
grad_mu = np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*self.variances,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,self.variances,Z) +\
|
||||
np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dmu)
|
||||
grad_S = np.einsum('n,nq,q->nq',dL_dpsi0,gamma,self.variances) + np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dS)
|
||||
|
||||
|
||||
return grad_mu, grad_S, grad_gamma
|
||||
else:
|
||||
grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape)
|
||||
|
|
@ -210,7 +210,7 @@ class Linear(Kern):
|
|||
grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
|
||||
# psi2
|
||||
self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S)
|
||||
|
||||
|
||||
return grad_mu, grad_S
|
||||
|
||||
#--------------------------------------------------#
|
||||
|
|
@ -313,3 +313,47 @@ class Linear(Kern):
|
|||
|
||||
def input_sensitivity(self):
|
||||
return np.ones(self.input_dim) * self.variances
|
||||
|
||||
class LinearFull(Kern):
|
||||
def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'):
|
||||
super(LinearFull, self).__init__(input_dim, active_dims, name)
|
||||
if W is None:
|
||||
W = np.ones((input_dim, rank))
|
||||
if kappa is None:
|
||||
kappa = np.ones(input_dim)
|
||||
assert W.shape == (input_dim, rank)
|
||||
assert kappa.shape == (input_dim,)
|
||||
|
||||
self.W = Param('W', W)
|
||||
self.kappa = Param('kappa', kappa, Logexp())
|
||||
self.add_parameters(self.W, self.kappa)
|
||||
|
||||
def K(self, X, X2=None):
|
||||
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||
return np.einsum('ij,jk,lk->il', X, P, X if X2 is None else X2)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
self.kappa.gradient = np.einsum('ij,ik,kj->j', X, dL_dK, X if X2 is None else X2)
|
||||
self.W.gradient = np.einsum('ij,kl,ik,lm->jm', X, X if X2 is None else X2, dL_dK, self.W)
|
||||
self.W.gradient += np.einsum('ij,kl,ik,jm->lm', X, X if X2 is None else X2, dL_dK, self.W)
|
||||
|
||||
def Kdiag(self, X):
|
||||
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||
return np.einsum('ij,jk,ik->i', X, P, X)
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
self.kappa.gradient = np.einsum('ij,i->j', np.square(X), dL_dKdiag)
|
||||
self.W.gradient = 2.*np.einsum('ij,ik,jl,i->kl', X, X, self.W, dL_dKdiag)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||
if X2 is None:
|
||||
return 2.*np.einsum('ij,jk,kl->il', dL_dK, X, P)
|
||||
else:
|
||||
return np.einsum('ij,jk,kl->il', dL_dK, X2, P)
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||
return 2.*np.einsum('jk,i,ij->ik', P, dL_dKdiag, X)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ class Prod(CombinationKernel):
|
|||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
def K(self, X, X2=None, which_parts=None):
|
||||
assert X.shape[1] == self.input_dim
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
elif not isinstance(which_parts, (list, tuple)):
|
||||
|
|
@ -33,7 +32,6 @@ class Prod(CombinationKernel):
|
|||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
def Kdiag(self, X, which_parts=None):
|
||||
assert X.shape[1] == self.input_dim
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
|
||||
|
|
@ -58,8 +56,6 @@ class Prod(CombinationKernel):
|
|||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
target = np.zeros(X.shape)
|
||||
for k1,k2 in itertools.combinations(self.parts, 2):
|
||||
target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X)
|
||||
target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X)
|
||||
target += k1.gradients_X_diag(dL_dKdiag*k2.Kdiag(X), X)
|
||||
target += k2.gradients_X_diag(dL_dKdiag*k1.Kdiag(X), X)
|
||||
return target
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -74,9 +74,6 @@ class RBF(Stationary):
|
|||
# Spike-and-Slab GPLVM
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
if self.useGPU:
|
||||
# dL_dpsi0_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi0))
|
||||
# dL_dpsi1_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi1))
|
||||
# dL_dpsi2_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi2))
|
||||
self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
|
||||
|
|
@ -99,7 +96,7 @@ class RBF(Stationary):
|
|||
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
|
||||
else:
|
||||
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
|
||||
|
||||
|
||||
elif isinstance(variational_posterior, variational.NormalPosterior):
|
||||
l2 = self.lengthscale**2
|
||||
if l2.size != self.input_dim:
|
||||
|
|
@ -107,8 +104,6 @@ class RBF(Stationary):
|
|||
|
||||
#contributions from psi0:
|
||||
self.variance.gradient = np.sum(dL_dpsi0)
|
||||
if self._debug:
|
||||
num_grad = self.lengthscale.gradient.copy()
|
||||
self.lengthscale.gradient = 0.
|
||||
|
||||
#from psi1
|
||||
|
|
@ -128,8 +123,6 @@ class RBF(Stationary):
|
|||
else:
|
||||
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
|
||||
|
||||
if self._debug:
|
||||
import ipdb;ipdb.set_trace()
|
||||
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
|
||||
|
||||
else:
|
||||
|
|
@ -139,8 +132,6 @@ class RBF(Stationary):
|
|||
# Spike-and-Slab GPLVM
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
if self.useGPU:
|
||||
# dL_dpsi1_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi1))
|
||||
# dL_dpsi2_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi2))
|
||||
return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
|
@ -177,8 +168,6 @@ class RBF(Stationary):
|
|||
# Spike-and-Slab GPLVM
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
if self.useGPU:
|
||||
# dL_dpsi1_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi1))
|
||||
# dL_dpsi2_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi2))
|
||||
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
ndata = variational_posterior.mean.shape[0]
|
||||
|
|
|
|||
|
|
@ -11,9 +11,9 @@ from kern import Kern
|
|||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
|
||||
class Sympykern(Kern):
|
||||
class Symbolic(Kern):
|
||||
"""
|
||||
A kernel object, where all the hard work in done by sympy.
|
||||
A kernel object, where all the hard work is done by sympy.
|
||||
|
||||
:param k: the covariance function
|
||||
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
|
||||
|
|
@ -26,10 +26,8 @@ class Sympykern(Kern):
|
|||
- to handle multiple inputs, call them x_1, z_1, etc
|
||||
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
|
||||
"""
|
||||
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None, active_dims=None):
|
||||
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None):
|
||||
|
||||
if name is None:
|
||||
name='sympykern'
|
||||
if k is None:
|
||||
raise ValueError, "You must provide an argument for the covariance function."
|
||||
super(Sympykern, self).__init__(input_dim, active_dims, name)
|
||||
|
|
@ -60,7 +58,6 @@ class Sympykern(Kern):
|
|||
# extract parameter names from the covariance
|
||||
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
|
||||
|
||||
|
||||
# Look for parameters with index (subscripts), they are associated with different outputs.
|
||||
if self.output_dim>1:
|
||||
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
|
||||
|
|
@ -97,8 +94,8 @@ class Sympykern(Kern):
|
|||
val = 1.0
|
||||
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
|
||||
if param is not None:
|
||||
if param.has_key(theta):
|
||||
val = param[theta]
|
||||
if param.has_key(theta.name):
|
||||
val = param[theta.name]
|
||||
setattr(self, theta.name, Param(theta.name, val, None))
|
||||
self.add_parameters(getattr(self, theta.name))
|
||||
|
||||
|
|
@ -117,6 +114,12 @@ class Sympykern(Kern):
|
|||
self.arg_list += self._sp_theta_i + self._sp_theta_j
|
||||
self.diag_arg_list += self._sp_theta_i
|
||||
|
||||
# Check if there are additional linear operators on the covariance.
|
||||
self._sp_operators = operators
|
||||
# TODO: Deal with linear operators
|
||||
#if self._sp_operators:
|
||||
# for operator in self._sp_operators:
|
||||
|
||||
# psi_stats aren't yet implemented.
|
||||
if False:
|
||||
self.compute_psi_stats()
|
||||
|
|
@ -254,3 +257,176 @@ class Sympykern(Kern):
|
|||
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
|
||||
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
|
||||
|
||||
if False:
|
||||
class Symcombine(CombinationKernel):
|
||||
"""
|
||||
Combine list of given sympy covariances together with the provided operations.
|
||||
"""
|
||||
def __init__(self, subkerns, operations, name='sympy_combine'):
|
||||
super(Symcombine, self).__init__(subkerns, name)
|
||||
for subkern, operation in zip(subkerns, operations):
|
||||
self._sp_k += self._k_double_operate(subkern._sp_k, operation)
|
||||
|
||||
#def _double_operate(self, k, operation):
|
||||
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
def K(self, X, X2=None, which_parts=None):
|
||||
"""
|
||||
Combine covariances with a linear operator.
|
||||
"""
|
||||
assert X.shape[1] == self.input_dim
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
elif not isinstance(which_parts, (list, tuple)):
|
||||
# if only one part is given
|
||||
which_parts = [which_parts]
|
||||
return reduce(np.add, (p.K(X, X2) for p in which_parts))
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
def Kdiag(self, X, which_parts=None):
|
||||
assert X.shape[1] == self.input_dim
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
elif not isinstance(which_parts, (list, tuple)):
|
||||
# if only one part is given
|
||||
which_parts = [which_parts]
|
||||
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
|
||||
|
||||
def update_gradients_diag(self, dL_dK, X):
|
||||
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
"""Compute the gradient of the objective function with respect to X.
|
||||
|
||||
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
|
||||
:type dL_dK: np.ndarray (num_samples x num_inducing)
|
||||
:param X: Observed data inputs
|
||||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||
|
||||
target = np.zeros(X.shape)
|
||||
[target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
|
||||
return target
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
target = np.zeros(X.shape)
|
||||
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
|
||||
return target
|
||||
|
||||
def psi0(self, Z, variational_posterior):
|
||||
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
|
||||
|
||||
def psi1(self, Z, variational_posterior):
|
||||
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
|
||||
|
||||
def psi2(self, Z, variational_posterior):
|
||||
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
|
||||
#return psi2
|
||||
# compute the "cross" terms
|
||||
from static import White, Bias
|
||||
from rbf import RBF
|
||||
#from rbf_inv import RBFInv
|
||||
from linear import Linear
|
||||
#ffrom fixed import Fixed
|
||||
|
||||
for p1, p2 in itertools.combinations(self.parts, 2):
|
||||
# i1, i2 = p1.active_dims, p2.active_dims
|
||||
# white doesn;t combine with anything
|
||||
if isinstance(p1, White) or isinstance(p2, White):
|
||||
pass
|
||||
# rbf X bias
|
||||
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
|
||||
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
|
||||
tmp = p2.psi1(Z, variational_posterior)
|
||||
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
|
||||
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
|
||||
tmp = p1.psi1(Z, variational_posterior)
|
||||
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
|
||||
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
|
||||
tmp1 = p1.psi1(Z, variational_posterior)
|
||||
tmp2 = p2.psi1(Z, variational_posterior)
|
||||
psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
|
||||
else:
|
||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||
return psi2
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
for p1 in self.parts:
|
||||
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2 in self.parts:
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
target = np.zeros(Z.shape)
|
||||
for p1 in self.parts:
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2 in self.parts:
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
return target
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
target_mu = np.zeros(variational_posterior.shape)
|
||||
target_S = np.zeros(variational_posterior.shape)
|
||||
for p1 in self._parameters_:
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2 in self._parameters_:
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
target_mu += a
|
||||
target_S += b
|
||||
return target_mu, target_S
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Get the current state of the class,
|
||||
here just all the indices, rest can get recomputed
|
||||
"""
|
||||
return super(Add, self)._getstate()
|
||||
|
||||
def _setstate(self, state):
|
||||
super(Add, self)._setstate(state)
|
||||
|
||||
def add(self, other, name='sum'):
|
||||
if isinstance(other, Add):
|
||||
other_params = other._parameters_.copy()
|
||||
for p in other_params:
|
||||
other.remove_parameter(p)
|
||||
self.add_parameters(*other_params)
|
||||
else: self.add_parameter(other)
|
||||
return self
|
||||
Loading…
Add table
Add a link
Reference in a new issue