mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-15 06:52:39 +02:00
everything is broken
This commit is contained in:
parent
de51ad638a
commit
d636c8c30c
13 changed files with 325 additions and 323 deletions
|
|
@ -44,6 +44,7 @@ class GP(Model):
|
|||
self.Y_metadata = None
|
||||
|
||||
assert isinstance(kernel, kern.Kern)
|
||||
assert self.input_dim == kernel.input_dim
|
||||
self.kern = kernel
|
||||
|
||||
assert isinstance(likelihood, likelihoods.Likelihood)
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
|
|||
:param input_array: array which this parameter handles
|
||||
:type input_array: numpy.ndarray
|
||||
:param default_constraint: The default constraint for this parameter
|
||||
:type default_constraint:
|
||||
:type default_constraint:
|
||||
|
||||
You can add/remove constraints by calling constrain on the parameter itself, e.g:
|
||||
|
||||
|
|
@ -59,7 +59,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
|
|||
|
||||
def __init__(self, name, input_array, default_constraint=None):
|
||||
super(Param, self).__init__(name=name, default_constraint=default_constraint)
|
||||
|
||||
|
||||
def __array_finalize__(self, obj):
|
||||
# see InfoArray.__array_finalize__ for comments
|
||||
if obj is None: return
|
||||
|
|
@ -192,7 +192,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
|
|||
return numpy.r_[a]
|
||||
return numpy.r_[:b]
|
||||
return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size)))
|
||||
|
||||
|
||||
#===========================================================================
|
||||
# Convenience
|
||||
#===========================================================================
|
||||
|
|
@ -260,7 +260,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri
|
|||
clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
|
||||
for i in range(self._realndim_-len(clean_curr_slice)):
|
||||
i+=len(clean_curr_slice)
|
||||
clean_curr_slice += range(self._realshape_[i])
|
||||
clean_curr_slice += range(self._realshape_[i])
|
||||
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
|
||||
and len(set(map(len, clean_curr_slice))) <= 1):
|
||||
return numpy.fromiter(itertools.izip(*clean_curr_slice),
|
||||
|
|
@ -426,4 +426,4 @@ class ParamConcatenation(object):
|
|||
start = False
|
||||
return "\n".join(strings)
|
||||
def __repr__(self):
|
||||
return "\n".join(map(repr,self.params))
|
||||
return "\n".join(map(repr,self.params))
|
||||
|
|
|
|||
|
|
@ -18,8 +18,8 @@ class Observable(object):
|
|||
def remove_observer(self, observer):
|
||||
del self._observers_[observer]
|
||||
def _notify_observers(self):
|
||||
[callble(self) for callble in self._observers_.itervalues()]
|
||||
|
||||
[callble(self) for callble in self._observers_.values()]
|
||||
|
||||
class Pickleable(object):
|
||||
def _getstate(self):
|
||||
"""
|
||||
|
|
@ -51,7 +51,7 @@ class Parentable(object):
|
|||
super(Parentable,self).__init__()
|
||||
self._direct_parent_ = direct_parent
|
||||
self._parent_index_ = parent_index
|
||||
|
||||
|
||||
def has_parent(self):
|
||||
return self._direct_parent_ is not None
|
||||
|
||||
|
|
@ -82,7 +82,7 @@ class Nameable(Parentable):
|
|||
from_name = self.name
|
||||
self._name = name
|
||||
if self.has_parent():
|
||||
self._direct_parent_._name_changed(self, from_name)
|
||||
self._direct_parent_._name_changed(self, from_name)
|
||||
|
||||
|
||||
class Parameterizable(Parentable):
|
||||
|
|
@ -90,7 +90,7 @@ class Parameterizable(Parentable):
|
|||
super(Parameterizable, self).__init__(*args, **kwargs)
|
||||
from GPy.core.parameterization.array_core import ParamList
|
||||
_parameters_ = ParamList()
|
||||
|
||||
|
||||
def parameter_names(self, add_name=False):
|
||||
if add_name:
|
||||
return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)]
|
||||
|
|
@ -142,21 +142,21 @@ class Gradcheckable(Parentable):
|
|||
class Indexable(object):
|
||||
def _raveled_index(self):
|
||||
raise NotImplementedError, "Need to be able to get the raveled Index"
|
||||
|
||||
|
||||
def _internal_offset(self):
|
||||
return 0
|
||||
|
||||
|
||||
def _offset_for(self, param):
|
||||
raise NotImplementedError, "shouldnt happen, offset required from non parameterization object?"
|
||||
|
||||
|
||||
def _raveled_index_for(self, param):
|
||||
"""
|
||||
get the raveled index for a param
|
||||
that is an int array, containing the indexes for the flattened
|
||||
param inside this parameterized logic.
|
||||
"""
|
||||
raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
|
||||
|
||||
raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
|
||||
|
||||
class Constrainable(Nameable, Indexable, Parameterizable):
|
||||
def __init__(self, name, default_constraint=None):
|
||||
super(Constrainable,self).__init__(name)
|
||||
|
|
@ -166,7 +166,7 @@ class Constrainable(Nameable, Indexable, Parameterizable):
|
|||
self.priors = ParameterIndexOperations()
|
||||
if self._default_constraint_ is not None:
|
||||
self.constrain(self._default_constraint_)
|
||||
|
||||
|
||||
#===========================================================================
|
||||
# Fixing Parameters:
|
||||
#===========================================================================
|
||||
|
|
@ -182,21 +182,21 @@ class Constrainable(Nameable, Indexable, Parameterizable):
|
|||
rav_i = self._highest_parent_._raveled_index_for(self)
|
||||
self._highest_parent_._set_fixed(rav_i)
|
||||
fix = constrain_fixed
|
||||
|
||||
|
||||
def unconstrain_fixed(self):
|
||||
"""
|
||||
This parameter will no longer be fixed.
|
||||
"""
|
||||
unconstrained = self.unconstrain(__fixed__)
|
||||
self._highest_parent_._set_unfixed(unconstrained)
|
||||
self._highest_parent_._set_unfixed(unconstrained)
|
||||
unfix = unconstrain_fixed
|
||||
|
||||
|
||||
def _set_fixed(self, index):
|
||||
import numpy as np
|
||||
if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
|
||||
self._fixes_[index] = FIXED
|
||||
if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED
|
||||
|
||||
|
||||
def _set_unfixed(self, index):
|
||||
import numpy as np
|
||||
if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
|
||||
|
|
@ -212,7 +212,7 @@ class Constrainable(Nameable, Indexable, Parameterizable):
|
|||
self._fixes_[fixed_indices] = FIXED
|
||||
else:
|
||||
self._fixes_ = None
|
||||
|
||||
|
||||
def _has_fixes(self):
|
||||
return hasattr(self, "_fixes_") and self._fixes_ is not None
|
||||
|
||||
|
|
@ -222,17 +222,17 @@ class Constrainable(Nameable, Indexable, Parameterizable):
|
|||
def set_prior(self, prior, warning=True, update=True):
|
||||
repriorized = self.unset_priors()
|
||||
self._add_to_index_operations(self.priors, repriorized, prior, warning, update)
|
||||
|
||||
|
||||
def unset_priors(self, *priors):
|
||||
return self._remove_from_index_operations(self.priors, priors)
|
||||
|
||||
|
||||
def log_prior(self):
|
||||
"""evaluate the prior"""
|
||||
if self.priors.size > 0:
|
||||
x = self._get_params()
|
||||
return reduce(lambda a,b: a+b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0)
|
||||
return 0.
|
||||
|
||||
|
||||
def _log_prior_gradients(self):
|
||||
"""evaluate the gradients of the priors"""
|
||||
import numpy as np
|
||||
|
|
@ -242,7 +242,7 @@ class Constrainable(Nameable, Indexable, Parameterizable):
|
|||
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
|
||||
return ret
|
||||
return 0.
|
||||
|
||||
|
||||
#===========================================================================
|
||||
# Constrain operations -> done
|
||||
#===========================================================================
|
||||
|
|
@ -269,7 +269,7 @@ class Constrainable(Nameable, Indexable, Parameterizable):
|
|||
transformats of this parameter object.
|
||||
"""
|
||||
return self._remove_from_index_operations(self.constraints, transforms)
|
||||
|
||||
|
||||
def constrain_positive(self, warning=True, update=True):
|
||||
"""
|
||||
:param warning: print a warning if re-constraining parameters.
|
||||
|
|
@ -314,7 +314,7 @@ class Constrainable(Nameable, Indexable, Parameterizable):
|
|||
Remove (lower, upper) bounded constrain from this parameter/
|
||||
"""
|
||||
self.unconstrain(Logistic(lower, upper))
|
||||
|
||||
|
||||
def _parent_changed(self, parent):
|
||||
from index_operations import ParameterIndexOperationsView
|
||||
self.constraints = ParameterIndexOperationsView(parent.constraints, parent._offset_for(self), self.size)
|
||||
|
|
@ -340,7 +340,7 @@ class Constrainable(Nameable, Indexable, Parameterizable):
|
|||
removed = np.union1d(removed, unconstrained)
|
||||
if t is __fixed__:
|
||||
self._highest_parent_._set_unfixed(unconstrained)
|
||||
|
||||
|
||||
return removed
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -53,20 +53,19 @@ class SparseGP(GP):
|
|||
self.add_parameter(self.Z, index=0)
|
||||
self.parameters_changed()
|
||||
|
||||
def _update_gradients_Z(self, add=False):
|
||||
#The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed)
|
||||
def _gradients_Z(self):
|
||||
#The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed)
|
||||
if not self.Z.is_fixed:
|
||||
if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
||||
else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
||||
if self.X_variance is None:
|
||||
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
|
||||
self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict)
|
||||
else:
|
||||
self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
|
||||
self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
|
||||
self.Z.gradient = self.kern.gradients_Z_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict)
|
||||
print self.Z.gradient
|
||||
print id(self.Z)
|
||||
|
||||
def parameters_changed(self):
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
|
||||
self._update_gradients_Z(add=False)
|
||||
self.Z.gradient = self._gradients_Z()
|
||||
|
||||
def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -22,18 +22,18 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False,
|
|||
# generate GPLVM-like data
|
||||
X = _np.random.rand(num_inputs, input_dim)
|
||||
lengthscales = _np.random.rand(input_dim)
|
||||
k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True)
|
||||
k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
|
||||
#+ GPy.kern.white(input_dim, 0.01)
|
||||
)
|
||||
K = k.K(X)
|
||||
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
|
||||
|
||||
# k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
|
||||
k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
|
||||
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
|
||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
|
||||
# k = GPy.kern.RBF_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
|
||||
#k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.RBF(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.RBF(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
|
||||
# k = GPy.kern.RBF(input_dim, .5, 2., ARD=0) + GPy.kern.RBF(input_dim, .3, .2, ARD=0)
|
||||
# k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
|
||||
|
||||
p = .3
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True):
|
|||
data = GPy.util.datasets.oil_100()
|
||||
Y = data['X']
|
||||
# create simple GP model
|
||||
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
|
||||
kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.bias(6)
|
||||
m = GPy.models.GPLVM(Y, 6, kernel=kernel)
|
||||
m.data_labels = data['Y'].argmax(axis=1)
|
||||
if optimize: m.optimize('scg', messages=verbose)
|
||||
|
|
@ -88,7 +88,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci
|
|||
Y = Y - Y.mean(0)
|
||||
Y /= Y.std(0)
|
||||
# Create the model
|
||||
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q)
|
||||
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q)
|
||||
m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing)
|
||||
m.data_labels = data['Y'][:N].argmax(axis=1)
|
||||
|
||||
|
|
@ -138,7 +138,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
|
|||
(1 - var))) + .001
|
||||
Z = _np.random.permutation(X)[:num_inducing]
|
||||
|
||||
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
|
||||
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
|
||||
|
||||
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
|
||||
m.data_colors = c
|
||||
|
|
@ -164,7 +164,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
|
|||
_np.random.seed(0)
|
||||
data = GPy.util.datasets.oil()
|
||||
|
||||
kernel = GPy.kern.rbf_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2))
|
||||
kernel = GPy.kern.RBF_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2))
|
||||
Y = data['X'][:N]
|
||||
Yn = Gaussian(Y, normalize=True)
|
||||
m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k)
|
||||
|
|
@ -435,7 +435,7 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
|
|||
|
||||
data = GPy.util.datasets.osu_run1()
|
||||
# optimize
|
||||
back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.)
|
||||
back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.)
|
||||
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
|
||||
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
|
||||
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
||||
|
|
@ -470,7 +470,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
|
|||
|
||||
data = GPy.util.datasets.osu_run1()
|
||||
Q = 6
|
||||
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
|
||||
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
|
||||
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
|
||||
# optimize
|
||||
m.ensure_default_constraints()
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from _src.rbf import RBF
|
||||
from _src.white import White
|
||||
from _src.kern import Kern
|
||||
Linear = 'foo'
|
||||
from _src.linear import Linear
|
||||
#import bias
|
||||
#import Brownian
|
||||
#import coregionalize
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ class Add(Kern):
|
|||
:param X: the first set of inputs to the kernel
|
||||
:param X2: (optional) the second set of arguments to the kernel. If X2
|
||||
is None, this is passed throgh to the 'part' object, which
|
||||
handles this as X2 == X.
|
||||
handLes this as X2 == X.
|
||||
"""
|
||||
assert X.shape[1] == self.input_dim
|
||||
if X2 is None:
|
||||
|
|
@ -48,9 +48,6 @@ class Add(Kern):
|
|||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
"""Compute the gradient of the objective function with respect to X.
|
||||
|
||||
|
|
@ -69,123 +66,125 @@ class Add(Kern):
|
|||
return target
|
||||
|
||||
def Kdiag(self, X):
|
||||
"""Compute the diagonal of the covariance function for inputs X."""
|
||||
assert X.shape[1] == self.input_dim
|
||||
return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
|
||||
|
||||
|
||||
def psi0(self, Z, mu, S):
|
||||
target = np.zeros(mu.shape[0])
|
||||
[p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
return target
|
||||
|
||||
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
|
||||
target = np.zeros(self.size)
|
||||
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
|
||||
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
|
||||
[p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
return target_mu, target_S
|
||||
return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices))],0)
|
||||
|
||||
def psi1(self, Z, mu, S):
|
||||
target = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
[p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
return target
|
||||
|
||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
|
||||
target = np.zeros((self.size))
|
||||
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
|
||||
target = np.zeros_like(Z)
|
||||
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
return target
|
||||
|
||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
|
||||
"""return shapes are num_samples,num_inducing,input_dim"""
|
||||
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
||||
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
return target_mu, target_S
|
||||
return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
|
||||
|
||||
def psi2(self, Z, mu, S):
|
||||
"""
|
||||
Computer the psi2 statistics for the covariance function.
|
||||
|
||||
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
|
||||
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
|
||||
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
|
||||
|
||||
"""
|
||||
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
|
||||
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
|
||||
|
||||
# compute the "cross" terms
|
||||
# TODO: input_slices needed
|
||||
crossterms = 0
|
||||
from white import White
|
||||
from rbf import RBF
|
||||
#from rbf_inv import RBFInv
|
||||
#from bias import Bias
|
||||
from linear import Linear
|
||||
#ffrom fixed import Fixed
|
||||
|
||||
for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2):
|
||||
if i_s1 == i_s2:
|
||||
# TODO psi1 this must be faster/better/precached/more nice
|
||||
tmp1 = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1)
|
||||
tmp2 = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2)
|
||||
for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self.input_slices), 2):
|
||||
# white doesn;t combine with anything
|
||||
if isinstance(p1, White) or isinstance(p2, White):
|
||||
pass
|
||||
# rbf X bias
|
||||
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
|
||||
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear))):
|
||||
tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2])
|
||||
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
|
||||
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
|
||||
tmp = p1.psi1(Z[:,i1], mu[:,i1], S[:,i1])
|
||||
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||
else:
|
||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||
return psi2
|
||||
|
||||
prod = np.multiply(tmp1, tmp2)
|
||||
crossterms += prod[:, :, None] + prod[:, None, :]
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
from white import White
|
||||
from rbf import RBF
|
||||
#from rbf_inv import RBFInv
|
||||
#from bias import Bias
|
||||
from linear import Linear
|
||||
#ffrom fixed import Fixed
|
||||
|
||||
target += crossterms
|
||||
for p1, is1 in zip(self._parameters_, self.input_slices):
|
||||
|
||||
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2, is2 in zip(self._parameters_, self.input_slices):
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
|
||||
|
||||
|
||||
p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
|
||||
|
||||
|
||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
from white import white
|
||||
from rbf import rbf
|
||||
#from rbf_inv import rbfinv
|
||||
#from bias import bias
|
||||
from linear import linear
|
||||
#ffrom fixed import fixed
|
||||
|
||||
target = np.zeros(Z.shape)
|
||||
for p1, is1 in zip(self._parameters_, self.input_slices):
|
||||
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2, is2 in zip(self._parameters_, self.input_slices):
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, white):
|
||||
continue
|
||||
elif isinstance(p2, bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
|
||||
|
||||
|
||||
target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
|
||||
return target
|
||||
|
||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
|
||||
"""Gradient of the psi2 statistics with respect to the parameters."""
|
||||
target = np.zeros(self.size)
|
||||
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
from white import white
|
||||
from rbf import rbf
|
||||
#from rbf_inv import rbfinv
|
||||
#from bias import bias
|
||||
from linear import linear
|
||||
#ffrom fixed import fixed
|
||||
|
||||
# compute the "cross" terms
|
||||
# TODO: better looping, input_slices
|
||||
for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2):
|
||||
p1, p2 = self._parameters_[i1], self._parameters_[i2]
|
||||
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
|
||||
ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2]
|
||||
target_mu = np.zeros(mu.shape)
|
||||
target_S = np.zeros(S.shape)
|
||||
for p1, is1 in zip(self._parameters_, self.input_slices):
|
||||
|
||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
p1.psi1(Z, mu, S, tmp)
|
||||
p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2])
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2, is2 in zip(self._parameters_, self.input_slices):
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, white):
|
||||
continue
|
||||
elif isinstance(p2, bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
|
||||
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
|
||||
target = np.zeros_like(Z)
|
||||
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
# target *= 2
|
||||
|
||||
# compute the "cross" terms
|
||||
# TODO: we need input_slices here.
|
||||
for p1, p2 in itertools.permutations(self._parameters_, 2):
|
||||
# if p1.name == 'linear' and p2.name == 'linear':
|
||||
# raise NotImplementedError("We don't handle linear/linear cross-terms")
|
||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
p1.psi1(Z, mu, S, tmp)
|
||||
p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target)
|
||||
|
||||
return target * 2
|
||||
|
||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
|
||||
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
||||
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
|
||||
# compute the "cross" terms
|
||||
# TODO: we need input_slices here.
|
||||
for p1, p2 in itertools.permutations(self._parameters_, 2):
|
||||
# if p1.name == 'linear' and p2.name == 'linear':
|
||||
# raise NotImplementedError("We don't handle linear/linear cross-terms")
|
||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
p1.psi1(Z, mu, S, tmp)
|
||||
p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S)
|
||||
|
||||
a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
|
||||
target_mu += a
|
||||
target_S += b
|
||||
return target_mu, target_S
|
||||
|
||||
def plot(self, *args, **kwargs):
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from ...core.parameterization.param import Param
|
|||
|
||||
|
||||
class Kern(Parameterized):
|
||||
def __init__(self,input_dim,name):
|
||||
def __init__(self, input_dim, name):
|
||||
"""
|
||||
The base class for a kernel: a positive definite function
|
||||
which forms of a covariance function (kernel).
|
||||
|
|
@ -22,21 +22,15 @@ class Kern(Parameterized):
|
|||
super(Kern, self).__init__(name)
|
||||
self.input_dim = input_dim
|
||||
|
||||
def K(self,X,X2,target):
|
||||
def K(self, X, X2, target):
|
||||
raise NotImplementedError
|
||||
def Kdiag(self,X,target):
|
||||
def Kdiag(self, Xa ,target):
|
||||
raise NotImplementedError
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self, dL_dK,X, X2, target):
|
||||
raise NotImplementedError
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target): # TODO: Max??
|
||||
# In the base case compute this by calling _param_grad_helper. Need to
|
||||
# override for stationary covariances (for example) to save
|
||||
# time.
|
||||
for i in range(X.shape[0]):
|
||||
self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
|
||||
def psi0(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
|
||||
def dpsi0_dtheta(self,dL_dpsi0, Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from ...util.linalg import tdot
|
|||
from ...util.misc import fast_array_equal, param_to_array
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
from ...util.caching import Cacher, cache_this
|
||||
|
||||
class Linear(Kern):
|
||||
"""
|
||||
|
|
@ -45,22 +46,35 @@ class Linear(Kern):
|
|||
variances = np.ones(self.input_dim)
|
||||
|
||||
self.variances = Param('variances', variances, Logexp())
|
||||
#TODO: remove?self.variances.gradient = np.zeros(self.variances.shape)
|
||||
self.add_parameter(self.variances)
|
||||
self.variances.add_observer(self, self.update_variance)
|
||||
self.variances.add_observer(self, self._on_changed)
|
||||
|
||||
# initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||
self._X, self._X2 = np.empty(shape=(2, 1))
|
||||
def _on_changed(self, obj):
|
||||
self._notify_observers()
|
||||
|
||||
def update_variance(self, v):
|
||||
self.variances2 = np.square(self.variances)
|
||||
@cache_this(limit=3, reset_on_self=True)
|
||||
def K(self, X, X2=None):
|
||||
if self.ARD:
|
||||
if X2 is None:
|
||||
return tdot(X*np.sqrt(self.variances))
|
||||
else:
|
||||
rv = np.sqrt(self.variances)
|
||||
return np.dot(X*rv, (X2*rv).T)
|
||||
else:
|
||||
return self._dot_product(X, X2) * self.variances
|
||||
|
||||
def on_input_change(self, X):
|
||||
self._K_computations(X, None)
|
||||
@cache_this(limit=3, reset_on_self=False)
|
||||
def _dot_product(self, X, X2=None):
|
||||
if X2 is None:
|
||||
return tdot(X)
|
||||
else:
|
||||
return np.dot(X, X2.T)
|
||||
|
||||
def Kdiag(self, X):
|
||||
return np.sum(self.variances * np.square(X), -1)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
self.variances.gradient[:] = 0
|
||||
self.variances.gradient = np.zeros(self.variances.size)
|
||||
self._param_grad_helper(dL_dK, X, None, self.variances.gradient)
|
||||
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
|
|
@ -68,7 +82,7 @@ class Linear(Kern):
|
|||
if self.ARD:
|
||||
self.variances.gradient = tmp.sum(0)
|
||||
else:
|
||||
self.variances.gradient = tmp.sum()
|
||||
self.variances.gradient = np.atleast_1d(tmp.sum())
|
||||
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
|
||||
self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient)
|
||||
|
||||
|
|
@ -85,25 +99,8 @@ class Linear(Kern):
|
|||
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
|
||||
else: self.variances.gradient += tmp.sum()
|
||||
#from Kmm
|
||||
self._K_computations(Z, None)
|
||||
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
|
||||
|
||||
def K(self, X, X2, target):
|
||||
if self.ARD:
|
||||
XX = X * np.sqrt(self.variances)
|
||||
if X2 is None:
|
||||
target += tdot(XX)
|
||||
else:
|
||||
XX2 = X2 * np.sqrt(self.variances)
|
||||
target += np.dot(XX, XX2.T)
|
||||
else:
|
||||
if X is not self._X or X2 is not None:
|
||||
self._K_computations(X, X2)
|
||||
target += self.variances * self._dot_product
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
np.add(target, np.sum(self.variances * np.square(X), -1), target)
|
||||
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
if self.ARD:
|
||||
if X2 is None:
|
||||
|
|
@ -112,18 +109,16 @@ class Linear(Kern):
|
|||
product = X[:, None, :] * X2[None, :, :]
|
||||
target += (dL_dK[:, :, None] * product).sum(0).sum(0)
|
||||
else:
|
||||
if X is not self._X or X2 is not None:
|
||||
self._K_computations(X, X2)
|
||||
target += np.sum(self._dot_product * dL_dK)
|
||||
target += np.sum(self._dot_product(X, X2) * dL_dK)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
if X2 is None:
|
||||
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
return 2.*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
else:
|
||||
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
target += 2.*self.variances*dL_dKdiag[:,None]*X
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
return 2.*self.variances*dL_dKdiag[:,None]*X
|
||||
|
||||
#---------------------------------------#
|
||||
# PSI statistics #
|
||||
|
|
@ -273,15 +268,15 @@ class Linear(Kern):
|
|||
# Precomputations #
|
||||
#---------------------------------------#
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
|
||||
self._X = X.copy()
|
||||
if X2 is None:
|
||||
self._dot_product = tdot(param_to_array(X))
|
||||
self._X2 = None
|
||||
else:
|
||||
self._X2 = X2.copy()
|
||||
self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
|
||||
#def _K_computations(self, X, X2):
|
||||
#if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
|
||||
#self._X = X.copy()
|
||||
#if X2 is None:
|
||||
##self._dot_product = tdot(param_to_array(X))
|
||||
#self._X2 = None
|
||||
#else:
|
||||
#self._X2 = X2.copy()
|
||||
#self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
|
||||
|
||||
def _psi_computations(self, Z, mu, S):
|
||||
# here are the "statistics" for psi1 and psi2
|
||||
|
|
|
|||
|
|
@ -2,9 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kern import Kern
|
||||
from coregionalize import Coregionalize
|
||||
import numpy as np
|
||||
import hashlib
|
||||
|
||||
class Prod(Kern):
|
||||
"""
|
||||
|
|
@ -17,7 +15,7 @@ class Prod(Kern):
|
|||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
def __init__(self,k1,k2,tensor=False):
|
||||
def __init__(self, k1, k2, tensor=False):
|
||||
if tensor:
|
||||
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
|
||||
self.slice1 = slice(0,k1.input_dim)
|
||||
|
|
@ -25,64 +23,43 @@ class Prod(Kern):
|
|||
else:
|
||||
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
|
||||
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
|
||||
self.slice1 = slice(0,self.input_dim)
|
||||
self.slice2 = slice(0,self.input_dim)
|
||||
self.slice1 = slice(0, self.input_dim)
|
||||
self.slice2 = slice(0, self.input_dim)
|
||||
self.k1 = k1
|
||||
self.k2 = k2
|
||||
self.add_parameters(self.k1, self.k2)
|
||||
|
||||
#initialize cache
|
||||
self._X, self._X2 = np.empty(shape=(2,1))
|
||||
self._params = None
|
||||
|
||||
def K(self, X, X2=None):
|
||||
self._K_computations(X, X2)
|
||||
return self._K1 * self._K2
|
||||
if X2 is None:
|
||||
return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None)
|
||||
else:
|
||||
return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2])
|
||||
|
||||
def Kdiag(self, X):
|
||||
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
self._K_computations(X, None)
|
||||
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
|
||||
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
|
||||
self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1])
|
||||
self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2])
|
||||
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] )
|
||||
self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] )
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
self._K_computations(X, X2)
|
||||
target = np.zeros(X.shape)
|
||||
if X2 is None:
|
||||
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None)
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None)
|
||||
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1], None)
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2], None)
|
||||
else:
|
||||
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1])
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2])
|
||||
|
||||
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1])
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2])
|
||||
return target
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
K2 = np.zeros(X.shape[0])
|
||||
self.k1.Kdiag(X[:,self.slice1],K1)
|
||||
self.k2.Kdiag(X[:,self.slice2],K2)
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
target = np.zeros(X.shape)
|
||||
target[:,self.slice1] = self.k1.gradients_X(dL_dKdiag*self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1])
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dKdiag*self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2])
|
||||
return target
|
||||
|
||||
self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
||||
self._X = X.copy()
|
||||
self._params == self._get_params().copy()
|
||||
if X2 is None:
|
||||
self._X2 = None
|
||||
self._K1 = self.k1.K(X[:,self.slice1],None)
|
||||
self._K2 = self.k2.K(X[:,self.slice2],None)
|
||||
else:
|
||||
self._X2 = X2.copy()
|
||||
self._K1 = self.k1.K(X[:,self.slice1],X2[:,self.slice1])
|
||||
self._K2 = self.k2.K(X[:,self.slice2],X2[:,self.slice2])
|
||||
|
||||
|
|
|
|||
|
|
@ -79,17 +79,18 @@ class RBF(Kern):
|
|||
ret[:] = self.variance
|
||||
return ret
|
||||
|
||||
#TODO: remove TARGET!
|
||||
def psi0(self, Z, mu, S, target):
|
||||
target += self.variance
|
||||
def psi0(self, Z, mu, S):
|
||||
ret = np.empty(mu.shape[0], dtype=np.float64)
|
||||
ret[:] = self.variance
|
||||
return ret
|
||||
|
||||
def psi1(self, Z, mu, S, target):
|
||||
def psi1(self, Z, mu, S):
|
||||
self._psi_computations(Z, mu, S)
|
||||
target += self._psi1
|
||||
return self._psi1
|
||||
|
||||
def psi2(self, Z, mu, S, target):
|
||||
def psi2(self, Z, mu, S):
|
||||
self._psi_computations(Z, mu, S)
|
||||
target += self._psi2
|
||||
return self._psi2
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
self._K_computations(X, None)
|
||||
|
|
@ -154,6 +155,37 @@ class RBF(Kern):
|
|||
else:
|
||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||
|
||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
self._psi_computations(Z, mu, S)
|
||||
|
||||
#psi1
|
||||
denominator = (self.lengthscale2 * (self._psi1_denom))
|
||||
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
||||
grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
||||
|
||||
#psi2
|
||||
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
|
||||
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
|
||||
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
||||
grad += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
||||
|
||||
return grad
|
||||
|
||||
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
self._psi_computations(Z, mu, S)
|
||||
#psi1
|
||||
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
|
||||
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
||||
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
||||
|
||||
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
|
||||
grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
||||
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
||||
|
||||
return grad_mu, grad_S
|
||||
|
||||
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
||||
self._K_computations(X, X2)
|
||||
|
|
@ -171,36 +203,7 @@ class RBF(Kern):
|
|||
# PSI statistics #
|
||||
#---------------------------------------#
|
||||
|
||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
||||
pass
|
||||
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
denominator = (self.lengthscale2 * (self._psi1_denom))
|
||||
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
||||
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
||||
|
||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
|
||||
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
||||
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
||||
|
||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
|
||||
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
|
||||
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
||||
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
||||
|
||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
||||
"""Think N,num_inducing,num_inducing,input_dim """
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
|
||||
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
||||
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
||||
|
||||
#---------------------------------------#
|
||||
#---------------------------------------#
|
||||
# Precomputations #
|
||||
#---------------------------------------#
|
||||
|
||||
|
|
@ -362,6 +365,7 @@ class RBF(Kern):
|
|||
#include <omp.h>
|
||||
#include <math.h>
|
||||
"""
|
||||
mu = param_to_array(mu)
|
||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
|
||||
type_converters=weave.converters.blitz, **self.weave_options)
|
||||
|
|
|
|||
|
|
@ -57,26 +57,16 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
|||
self.init = state.pop()
|
||||
SparseGP._setstate(self, state)
|
||||
|
||||
def dL_dmuS(self):
|
||||
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
|
||||
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
|
||||
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
|
||||
|
||||
return dL_dmu, dL_dS
|
||||
|
||||
def KL_divergence(self):
|
||||
var_mean = np.square(self.X).sum()
|
||||
var_S = np.sum(self.X_variance - np.log(self.X_variance))
|
||||
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
|
||||
|
||||
def parameters_changed(self):
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
|
||||
self._update_gradients_Z(add=False)
|
||||
super(BayesianGPLVM, self).parameters_changed()
|
||||
|
||||
self._log_marginal_likelihood -= self.KL_divergence()
|
||||
dL_dmu, dL_dS = self.dL_dmuS()
|
||||
dL_dmu, dL_dS = self.kern.gradients_muS_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict)
|
||||
|
||||
# dL:
|
||||
self.q.mean.gradient = dL_dmu
|
||||
|
|
|
|||
|
|
@ -1,46 +1,89 @@
|
|||
from ..core.parameterization.array_core import ObservableArray, ParamList
|
||||
from ..core.parameterization.parameter_core import Observable
|
||||
from ..core.parameterization.array_core import ParamList
|
||||
|
||||
class Cacher(object):
|
||||
def __init__(self, operation, limit=5):
|
||||
def __init__(self, operation, limit=5, reset_on_first=False):
|
||||
self.limit = int(limit)
|
||||
self._reset_on_first = reset_on_first
|
||||
self.operation=operation
|
||||
self.cached_inputs = ParamList([])
|
||||
self.cached_inputs = []
|
||||
self.cached_outputs = []
|
||||
self.inputs_changed = []
|
||||
|
||||
def __call__(self, X):
|
||||
assert isinstance(X, ObservableArray)
|
||||
if X in self.cached_inputs:
|
||||
i = self.cached_inputs.index(X)
|
||||
def __call__(self, *args):
|
||||
if self._reset_on_first:
|
||||
assert isinstance(args[0], Observable)
|
||||
args[0].add_observer(args[0], self.reset)
|
||||
cached_args = args
|
||||
else:
|
||||
cached_args = args[1:]
|
||||
|
||||
|
||||
if not all([isinstance(arg, Observable) for arg in cached_args]):
|
||||
return self.operation(*args)
|
||||
if cached_args in self.cached_inputs:
|
||||
i = self.cached_inputs.index(cached_args)
|
||||
if self.inputs_changed[i]:
|
||||
self.cached_outputs[i] = self.operation(X)
|
||||
self.cached_outputs[i] = self.operation(*args)
|
||||
self.inputs_changed[i] = False
|
||||
return self.cached_outputs[i]
|
||||
else:
|
||||
if len(self.cached_inputs) == self.limit:
|
||||
X_ = self.cached_inputs.pop(0)
|
||||
X_.remove_observer(self)
|
||||
args_ = self.cached_inputs.pop(0)
|
||||
[a.remove_observer(self) for a in args_]
|
||||
self.inputs_changed.pop(0)
|
||||
self.cached_outputs.pop(0)
|
||||
|
||||
self.cached_inputs.append(X)
|
||||
self.cached_outputs.append(self.operation(X))
|
||||
self.cached_inputs.append(cached_args)
|
||||
self.cached_outputs.append(self.operation(*args))
|
||||
self.inputs_changed.append(False)
|
||||
X.add_observer(self, self.on_cache_changed)
|
||||
[a.add_observer(self, self.on_cache_changed) for a in args]
|
||||
return self.cached_outputs[-1]
|
||||
|
||||
def on_cache_changed(self, X):
|
||||
#print id(X)
|
||||
Xbase = X
|
||||
while Xbase is not None:
|
||||
try:
|
||||
i = self.cached_inputs.index(X)
|
||||
break
|
||||
except ValueError:
|
||||
Xbase = X.base
|
||||
continue
|
||||
self.inputs_changed[i] = True
|
||||
def on_cache_changed(self, arg):
|
||||
self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)]
|
||||
|
||||
def reset(self, obj):
|
||||
[[a.remove_observer(self) for a in args] for args in self.cached_inputs]
|
||||
self.cached_inputs = []
|
||||
self.cached_outputs = []
|
||||
self.inputs_changed = []
|
||||
|
||||
|
||||
|
||||
|
||||
def cache_this(limit=5, reset_on_self=False):
|
||||
def limited_cache(f):
|
||||
c = Cacher(f, limit, reset_on_first=reset_on_self)
|
||||
def f_wrap(*args):
|
||||
return c(*args)
|
||||
f_wrap._cacher = c
|
||||
return f_wrap
|
||||
return limited_cache
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#Xbase = X
|
||||
#while Xbase is not None:
|
||||
#try:
|
||||
#i = self.cached_inputs.index(X)
|
||||
#break
|
||||
#except ValueError:
|
||||
#Xbase = X.base
|
||||
#continue
|
||||
#self.inputs_changed[i] = True
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue