mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-09 12:02:38 +02:00
Merge branch 'params' of github.com:SheffieldML/GPy into params
This commit is contained in:
commit
835934ff51
39 changed files with 262 additions and 295 deletions
|
|
@ -260,17 +260,18 @@ class Model(Parameterized):
|
||||||
"""
|
"""
|
||||||
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
|
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
|
||||||
# param_names = self._get_param_names()
|
# param_names = self._get_param_names()
|
||||||
for s in positive_strings:
|
|
||||||
paramlist = self.grep_param_names(".*"+s)
|
# for s in positive_strings:
|
||||||
for param in paramlist:
|
# paramlist = self.grep_param_names(".*"+s)
|
||||||
for p in param.flattened_parameters:
|
# for param in paramlist:
|
||||||
rav_i = set(self._raveled_index_for(p))
|
# for p in param.flattened_parameters:
|
||||||
for constraint in self.constraints.iter_properties():
|
# rav_i = set(self._raveled_index_for(p))
|
||||||
rav_i -= set(self._constraint_indices(p, constraint))
|
# for constraint in self.constraints.iter_properties():
|
||||||
rav_i -= set(np.nonzero(self._fixes_for(p)!=UNFIXED)[0])
|
# rav_i -= set(self._constraint_indices(p, constraint))
|
||||||
ind = self._backtranslate_index(p, np.array(list(rav_i), dtype=int))
|
# rav_i -= set(np.nonzero(self._fixes_for(p)!=UNFIXED)[0])
|
||||||
if ind.size != 0:
|
# ind = self._backtranslate_index(p, np.array(list(rav_i), dtype=int))
|
||||||
p[np.unravel_index(ind, p.shape)].constrain_positive()
|
# if ind.size != 0:
|
||||||
|
# p[np.unravel_index(ind, p.shape)].constrain_positive(warning=warning)
|
||||||
# if paramlist:
|
# if paramlist:
|
||||||
# self.__getitem__(None, paramlist).constrain_positive(warning=warning)
|
# self.__getitem__(None, paramlist).constrain_positive(warning=warning)
|
||||||
# currently_constrained = self.all_constrained_indices()
|
# currently_constrained = self.all_constrained_indices()
|
||||||
|
|
@ -405,8 +406,8 @@ class Model(Parameterized):
|
||||||
dx = step * np.sign(np.random.uniform(-1, 1, x.size))
|
dx = step * np.sign(np.random.uniform(-1, 1, x.size))
|
||||||
|
|
||||||
# evaulate around the point x
|
# evaulate around the point x
|
||||||
f1, g1 = self.objective_and_gradients(x + dx)
|
f1 = self.objective_function(x + dx)
|
||||||
f2, g2 = self.objective_and_gradients(x - dx)
|
f2 = self.objective_function(x - dx)
|
||||||
gradient = self.objective_function_gradients(x)
|
gradient = self.objective_function_gradients(x)
|
||||||
|
|
||||||
numerical_gradient = (f1 - f2) / (2 * dx)
|
numerical_gradient = (f1 - f2) / (2 * dx)
|
||||||
|
|
@ -416,7 +417,7 @@ class Model(Parameterized):
|
||||||
else:
|
else:
|
||||||
# check the gradient of each parameter individually, and do some pretty printing
|
# check the gradient of each parameter individually, and do some pretty printing
|
||||||
try:
|
try:
|
||||||
names = self._get_param_names_transformed()
|
names = self._get_param_names()
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
names = ['Variable %i' % i for i in range(len(x))]
|
names = ['Variable %i' % i for i in range(len(x))]
|
||||||
# Prepare for pretty-printing
|
# Prepare for pretty-printing
|
||||||
|
|
@ -430,41 +431,45 @@ class Model(Parameterized):
|
||||||
header_string = map(lambda x: '|'.join(x), [header_string])
|
header_string = map(lambda x: '|'.join(x), [header_string])
|
||||||
separator = '-' * len(header_string[0])
|
separator = '-' * len(header_string[0])
|
||||||
print '\n'.join([header_string[0], separator])
|
print '\n'.join([header_string[0], separator])
|
||||||
|
|
||||||
if target_param is None:
|
if target_param is None:
|
||||||
param_list = range(len(x))
|
param_list = range(len(x))
|
||||||
else:
|
else:
|
||||||
param_list = self._raveled_index_for(target_param)
|
param_list = self._raveled_index_for(target_param)
|
||||||
if self._has_fixes():
|
if self._has_fixes():
|
||||||
param_list = param_list[self._fixes_]
|
param_list = np.intersect1d(param_list, np.r_[:self.size][self._fixes_], True)
|
||||||
if not np.any(param_list):
|
|
||||||
|
if param_list.size == 0:
|
||||||
print "No free parameters to check"
|
print "No free parameters to check"
|
||||||
return
|
return
|
||||||
|
|
||||||
gradient = self.objective_function_gradients(x)
|
gradient = self.objective_function_gradients(x)
|
||||||
np.where(gradient==0, 1e-312, gradient)
|
np.where(gradient==0, 1e-312, gradient)
|
||||||
|
ret = True
|
||||||
for i in param_list:
|
for i, ind in enumerate(param_list):
|
||||||
xx = x.copy()
|
xx = x.copy()
|
||||||
xx[i] += step
|
xx[ind] += step
|
||||||
f1, g1 = self.objective_and_gradients(xx)
|
f1 = self.objective_function(xx)
|
||||||
xx[i] -= 2.*step
|
xx[ind] -= 2.*step
|
||||||
f2, g2 = self.objective_and_gradients(xx)
|
f2 = self.objective_function(xx)
|
||||||
|
|
||||||
numerical_gradient = (f1 - f2) / (2 * step)
|
numerical_gradient = (f1 - f2) / (2 * step)
|
||||||
ratio = (f1 - f2) / (2 * step * gradient[i])
|
ratio = (f1 - f2) / (2 * step * gradient[ind])
|
||||||
difference = np.abs((f1 - f2) / 2 / step - gradient[i])
|
difference = np.abs((f1 - f2) / 2 / step - gradient[ind])
|
||||||
|
|
||||||
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
|
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
|
||||||
formatted_name = "\033[92m {0} \033[0m".format(names[i])
|
formatted_name = "\033[92m {0} \033[0m".format(names[ind])
|
||||||
|
ret &= True
|
||||||
else:
|
else:
|
||||||
formatted_name = "\033[91m {0} \033[0m".format(names[i])
|
formatted_name = "\033[91m {0} \033[0m".format(names[ind])
|
||||||
|
ret &= False
|
||||||
|
|
||||||
r = '%.6f' % float(ratio)
|
r = '%.6f' % float(ratio)
|
||||||
d = '%.6f' % float(difference)
|
d = '%.6f' % float(difference)
|
||||||
g = '%.6f' % gradient[i]
|
g = '%.6f' % gradient[ind]
|
||||||
ng = '%.6f' % float(numerical_gradient)
|
ng = '%.6f' % float(numerical_gradient)
|
||||||
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
|
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
|
||||||
print grad_string
|
print grad_string
|
||||||
|
return ret
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,24 @@ class ParamList(list):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
class C(np.ndarray):
|
||||||
|
__array_priority__ = 1.
|
||||||
|
def __new__(cls, array):
|
||||||
|
obj = array.view(cls)
|
||||||
|
return obj
|
||||||
|
#def __array_finalize__(self, obj):
|
||||||
|
# #print 'finalize'
|
||||||
|
# return obj
|
||||||
|
def __array_prepare__(self, out_arr, context):
|
||||||
|
#print 'prepare'
|
||||||
|
while type(out_arr) is C:
|
||||||
|
out_arr = out_arr.base
|
||||||
|
return out_arr
|
||||||
|
def __array_wrap__(self, out_arr, context):
|
||||||
|
#print 'wrap', type(self), type(out_arr), context
|
||||||
|
while type(out_arr) is C:
|
||||||
|
out_arr = out_arr.base
|
||||||
|
return out_arr
|
||||||
|
|
||||||
class ObservableArray(ListArray, Observable):
|
class ObservableArray(ListArray, Observable):
|
||||||
"""
|
"""
|
||||||
|
|
@ -35,7 +53,7 @@ class ObservableArray(ListArray, Observable):
|
||||||
will be called every time this array changes. The callable
|
will be called every time this array changes. The callable
|
||||||
takes exactly one argument, which is this array itself.
|
takes exactly one argument, which is this array itself.
|
||||||
"""
|
"""
|
||||||
__array_priority__ = 0 # Never give back Param
|
__array_priority__ = -1 # Never give back ObservableArray
|
||||||
def __new__(cls, input_array):
|
def __new__(cls, input_array):
|
||||||
cls.__name__ = "ObservableArray\n "
|
cls.__name__ = "ObservableArray\n "
|
||||||
obj = super(ObservableArray, cls).__new__(cls, input_array).view(cls)
|
obj = super(ObservableArray, cls).__new__(cls, input_array).view(cls)
|
||||||
|
|
|
||||||
|
|
@ -24,8 +24,11 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
|
||||||
"""
|
"""
|
||||||
Parameter object for GPy models.
|
Parameter object for GPy models.
|
||||||
|
|
||||||
:param name: name of the parameter to be printed
|
:param str name: name of the parameter to be printed
|
||||||
:param input_array: array which this parameter handles
|
:param input_array: array which this parameter handles
|
||||||
|
:type input_array: numpy.ndarray
|
||||||
|
:param default_constraint: The default constraint for this parameter
|
||||||
|
:type default_constraint:
|
||||||
|
|
||||||
You can add/remove constraints by calling constrain on the parameter itself, e.g:
|
You can add/remove constraints by calling constrain on the parameter itself, e.g:
|
||||||
|
|
||||||
|
|
@ -40,19 +43,10 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
|
||||||
|
|
||||||
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
|
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
|
||||||
|
|
||||||
This ndarray can be stored in lists and checked if it is in.
|
|
||||||
|
|
||||||
>>> import numpy as np
|
|
||||||
>>> x = np.random.normal(size=(10,3))
|
|
||||||
>>> x in [[1], x, [3]]
|
|
||||||
True
|
|
||||||
|
|
||||||
WARNING: This overrides the functionality of x==y!!!
|
|
||||||
Use numpy.equal(x,y) for element-wise equality testing.
|
|
||||||
"""
|
"""
|
||||||
__array_priority__ = 0 # Never give back Param
|
__array_priority__ = -1 # Never give back Param
|
||||||
_fixes_ = None
|
_fixes_ = None
|
||||||
def __new__(cls, name, input_array, *args, **kwargs):
|
def __new__(cls, name, input_array, default_constraint=None):
|
||||||
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
|
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
|
||||||
obj._current_slice_ = (slice(obj.shape[0]),)
|
obj._current_slice_ = (slice(obj.shape[0]),)
|
||||||
obj._realshape_ = obj.shape
|
obj._realshape_ = obj.shape
|
||||||
|
|
@ -66,8 +60,8 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
|
||||||
obj.gradient = None
|
obj.gradient = None
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def __init__(self, name, input_array):
|
def __init__(self, name, input_array, default_constraint=None):
|
||||||
super(Param, self).__init__(name=name)
|
super(Param, self).__init__(name=name, default_constraint=default_constraint)
|
||||||
|
|
||||||
def __array_finalize__(self, obj):
|
def __array_finalize__(self, obj):
|
||||||
# see InfoArray.__array_finalize__ for comments
|
# see InfoArray.__array_finalize__ for comments
|
||||||
|
|
@ -75,6 +69,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
|
||||||
super(Param, self).__array_finalize__(obj)
|
super(Param, self).__array_finalize__(obj)
|
||||||
self._direct_parent_ = getattr(obj, '_direct_parent_', None)
|
self._direct_parent_ = getattr(obj, '_direct_parent_', None)
|
||||||
self._parent_index_ = getattr(obj, '_parent_index_', None)
|
self._parent_index_ = getattr(obj, '_parent_index_', None)
|
||||||
|
self._default_constraint_ = getattr(obj, '_default_constraint_', None)
|
||||||
self._current_slice_ = getattr(obj, '_current_slice_', None)
|
self._current_slice_ = getattr(obj, '_current_slice_', None)
|
||||||
self._tied_to_me_ = getattr(obj, '_tied_to_me_', None)
|
self._tied_to_me_ = getattr(obj, '_tied_to_me_', None)
|
||||||
self._tied_to_ = getattr(obj, '_tied_to_', None)
|
self._tied_to_ = getattr(obj, '_tied_to_', None)
|
||||||
|
|
@ -97,6 +92,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
|
||||||
(self.name,
|
(self.name,
|
||||||
self._direct_parent_,
|
self._direct_parent_,
|
||||||
self._parent_index_,
|
self._parent_index_,
|
||||||
|
self._default_constraint_,
|
||||||
self._current_slice_,
|
self._current_slice_,
|
||||||
self._realshape_,
|
self._realshape_,
|
||||||
self._realsize_,
|
self._realsize_,
|
||||||
|
|
@ -117,6 +113,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
|
||||||
self._realsize_ = state.pop()
|
self._realsize_ = state.pop()
|
||||||
self._realshape_ = state.pop()
|
self._realshape_ = state.pop()
|
||||||
self._current_slice_ = state.pop()
|
self._current_slice_ = state.pop()
|
||||||
|
self._default_constraint_ = state.pop()
|
||||||
self._parent_index_ = state.pop()
|
self._parent_index_ = state.pop()
|
||||||
self._direct_parent_ = state.pop()
|
self._direct_parent_ = state.pop()
|
||||||
self.name = state.pop()
|
self.name = state.pop()
|
||||||
|
|
|
||||||
|
|
@ -91,8 +91,9 @@ class Gradcheckable(Parentable):
|
||||||
|
|
||||||
|
|
||||||
class Constrainable(Nameable):
|
class Constrainable(Nameable):
|
||||||
def __init__(self, name):
|
def __init__(self, name, default_constraint=None):
|
||||||
super(Constrainable,self).__init__(name)
|
super(Constrainable,self).__init__(name)
|
||||||
|
self._default_constraint_ = default_constraint
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Fixing Parameters:
|
# Fixing Parameters:
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
|
|
||||||
|
|
@ -171,6 +171,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||||
for t, ind in param._constraints_.iteritems():
|
for t, ind in param._constraints_.iteritems():
|
||||||
self.constraints.add(t, ind+self._offset_for(param))
|
self.constraints.add(t, ind+self._offset_for(param))
|
||||||
param._constraints_.clear()
|
param._constraints_.clear()
|
||||||
|
if param._default_constraint_ is not None:
|
||||||
|
self._add_constrain(param, param._default_constraint_, False)
|
||||||
if self._has_fixes() and np.all(self._fixes_): # ==UNFIXED
|
if self._has_fixes() and np.all(self._fixes_): # ==UNFIXED
|
||||||
self._fixes_= None
|
self._fixes_= None
|
||||||
|
|
||||||
|
|
@ -312,8 +314,11 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Optimization handles:
|
# Optimization handles:
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
def _get_param_names_transformed(self):
|
def _get_param_names(self):
|
||||||
n = numpy.array([p.name_hirarchical+'['+str(i)+']' for p in self.flattened_parameters for i in p._indices()])
|
n = numpy.array([p.name_hirarchical+'['+str(i)+']' for p in self.flattened_parameters for i in p._indices()])
|
||||||
|
return n
|
||||||
|
def _get_param_names_transformed(self):
|
||||||
|
n = self._get_param_names()
|
||||||
if self._has_fixes():
|
if self._has_fixes():
|
||||||
return n[self._fixes_]
|
return n[self._fixes_]
|
||||||
return n
|
return n
|
||||||
|
|
|
||||||
|
|
@ -154,13 +154,13 @@ class SVIGP(GP):
|
||||||
self.psi2 = None
|
self.psi2 = None
|
||||||
|
|
||||||
def dL_dtheta(self):
|
def dL_dtheta(self):
|
||||||
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
|
dL_dtheta = self.kern._param_grad_helper(self.dL_dKmm, self.Z)
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch)
|
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch)
|
||||||
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch)
|
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch)
|
||||||
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch)
|
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch)
|
||||||
else:
|
else:
|
||||||
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X_batch, self.Z)
|
dL_dtheta += self.kern._param_grad_helper(self.dL_dpsi1, self.X_batch, self.Z)
|
||||||
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch)
|
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch)
|
||||||
return dL_dtheta
|
return dL_dtheta
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,73 +1,54 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
import numpy as np
|
class VarDTC(object):
|
||||||
from ...util.linalg import mdot, jitchol, chol_inv, tdot, dtrtrs
|
def __init__(self):
|
||||||
from ...core import SparseGP
|
self.const_jitter = 1e-6
|
||||||
|
|
||||||
class FITC(SparseGP):
|
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
||||||
"""
|
|
||||||
|
|
||||||
Sparse FITC approximation
|
num_inducing, _ = Z.shape
|
||||||
|
num_data, output_dim = Y.shape
|
||||||
|
|
||||||
:param X: inputs
|
#make sure we're not using variational uncertain inputs
|
||||||
:type X: np.ndarray (num_data x Q)
|
assert = X_variance is None, "variational inducing inputs only for use with varDTC inference"
|
||||||
:param likelihood: a likelihood instance, containing the observed data
|
#Note: we can;t do the variational thing after making the GITC conditional approximation because K~ appears in the log determinant.
|
||||||
:type likelihood: GPy.likelihood.(Gaussian | EP)
|
|
||||||
:param kernel: the kernel (covariance function). See link kernels
|
|
||||||
:type kernel: a GPy.kern.kern instance
|
|
||||||
:param Z: inducing inputs (optional, see note)
|
|
||||||
:type Z: np.ndarray (M x Q) | None
|
|
||||||
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
|
|
||||||
:type normalize_(X|Y): bool
|
|
||||||
|
|
||||||
"""
|
#see whether we've got a different noise variance for each datum
|
||||||
|
sigma2 = np.squeeze(likelihood.variance)
|
||||||
|
het_noise = False
|
||||||
|
if sigma2.size <1:
|
||||||
|
het_noise = True
|
||||||
|
|
||||||
def __init__(self, X, likelihood, kernel, Z, normalize_X=False):
|
|
||||||
SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False)
|
|
||||||
assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs"
|
|
||||||
|
|
||||||
def update_likelihood_approximation(self, **kwargs):
|
|
||||||
"""
|
|
||||||
Approximates a non-Gaussian likelihood using Expectation Propagation
|
|
||||||
|
|
||||||
For a Gaussian likelihood, no iteration is required:
|
|
||||||
this function does nothing
|
|
||||||
"""
|
|
||||||
self.likelihood.restart()
|
|
||||||
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0, **kwargs)
|
|
||||||
self._set_params(self._get_params())
|
|
||||||
|
|
||||||
def _compute_kernel_matrices(self):
|
|
||||||
# kernel computations, using BGPLVM notation
|
# kernel computations, using BGPLVM notation
|
||||||
self.Kmm = self.kern.K(self.Z)
|
Kmm = kern.K(Z)
|
||||||
self.psi0 = self.kern.Kdiag(self.X)
|
Kdiag = kern.Kdiag(X)
|
||||||
self.psi1 = self.kern.K(self.Z, self.X)
|
Kmn = kern.K(X, Z)
|
||||||
self.psi2 = None
|
|
||||||
|
|
||||||
def _computations(self):
|
|
||||||
#factor Kmm
|
#factor Kmm
|
||||||
self.Lm = jitchol(self.Kmm)
|
Lm = jitchol(Kmm)
|
||||||
self.Lmi,info = dtrtrs(self.Lm,np.eye(self.num_inducing),lower=1)
|
V = dtrtrs(Lm, Kmn, lower=1)
|
||||||
Lmipsi1 = np.dot(self.Lmi,self.psi1)
|
|
||||||
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
|
|
||||||
self.Diag0 = self.psi0 - np.diag(self.Qnn)
|
|
||||||
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #NOTE: beta_star contains Diag0 and the precision
|
|
||||||
self.V_star = self.beta_star * self.likelihood.Y
|
|
||||||
|
|
||||||
# The rather complex computations of self.A
|
#compute effective noise
|
||||||
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data)))
|
g_sn2 = sigma2 + Kdiag - np.sum(V*V, 0)
|
||||||
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
|
|
||||||
self.A = tdot(tmp)
|
|
||||||
|
|
||||||
# factor B
|
#compute and factor B
|
||||||
self.B = np.eye(self.num_inducing) + self.A
|
tmp = Kmn / np.sqrt(g_snd)
|
||||||
self.LB = jitchol(self.B)
|
tmp, _ = dtrtrs(Lm, tmp, lower=1)
|
||||||
self.LBi = chol_inv(self.LB)
|
A = tdot(tmp)
|
||||||
self.psi1V = np.dot(self.psi1, self.V_star)
|
B = np.eye(num_inducing) + A
|
||||||
|
Bi, Lb, LBi, log_det_B = pdinv(B)
|
||||||
|
|
||||||
Lmi_psi1V, info = dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
|
#compute posterior parameters
|
||||||
self._LBi_Lmi_psi1V, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
|
tmp, _ = dtrtrs()
|
||||||
|
woodbury_vec =
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
psi1V = np.dot(self.psi1, self.V_star)
|
||||||
|
Lmi_psi1V, _ = dtrtrs(Lm, self.psi1V, lower=1, trans=0)
|
||||||
|
LBi_Lmi_psi1V, _ = dtrtrs(LB, Lmi_psi1V, lower=1, trans=0)
|
||||||
|
|
||||||
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
|
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
|
||||||
b_psi1_Ki = self.beta_star * Kmmipsi1.T
|
b_psi1_Ki = self.beta_star * Kmmipsi1.T
|
||||||
|
|
@ -116,8 +97,8 @@ class FITC(SparseGP):
|
||||||
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
|
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
|
||||||
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
|
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
|
||||||
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
|
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
|
||||||
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
|
self._dpsi1_dtheta += self.kern._param_grad_helper(_dpsi1,self.X[i:i+1,:],self.Z)
|
||||||
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
|
self._dKmm_dtheta += self.kern._param_grad_helper(_dKmm,self.Z)
|
||||||
self._dKmm_dX += self.kern.gradients_X(_dKmm ,self.Z)
|
self._dKmm_dX += self.kern.gradients_X(_dKmm ,self.Z)
|
||||||
self._dpsi1_dX += self.kern.gradients_X(_dpsi1.T,self.Z,self.X[i:i+1,:])
|
self._dpsi1_dX += self.kern.gradients_X(_dpsi1.T,self.Z,self.X[i:i+1,:])
|
||||||
|
|
||||||
|
|
@ -163,8 +144,8 @@ class FITC(SparseGP):
|
||||||
|
|
||||||
def dL_dtheta(self):
|
def dL_dtheta(self):
|
||||||
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X)
|
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X)
|
||||||
dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z)
|
dL_dtheta += self.kern._param_grad_helper(self._dL_dpsi1,self.X,self.Z)
|
||||||
dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z)
|
dL_dtheta += self.kern._param_grad_helper(self._dL_dKmm,X=self.Z)
|
||||||
dL_dtheta += self._dKmm_dtheta
|
dL_dtheta += self._dKmm_dtheta
|
||||||
dL_dtheta += self._dpsi1_dtheta
|
dL_dtheta += self._dpsi1_dtheta
|
||||||
return dL_dtheta
|
return dL_dtheta
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ from parts.prod import Prod as prod
|
||||||
from parts.linear import Linear
|
from parts.linear import Linear
|
||||||
from parts.kernpart import Kernpart
|
from parts.kernpart import Kernpart
|
||||||
from ..core.parameterization import Parameterized
|
from ..core.parameterization import Parameterized
|
||||||
|
from GPy.core.parameterization.param import Param
|
||||||
|
|
||||||
class kern(Parameterized):
|
class kern(Parameterized):
|
||||||
def __init__(self, input_dim, parts=[], input_slices=None):
|
def __init__(self, input_dim, parts=[], input_slices=None):
|
||||||
|
|
@ -84,7 +85,7 @@ class kern(Parameterized):
|
||||||
# represents the gradient in the transformed space (i.e. that given by
|
# represents the gradient in the transformed space (i.e. that given by
|
||||||
# get_params_transformed())
|
# get_params_transformed())
|
||||||
#
|
#
|
||||||
# :param g: the gradient vector for the current model, usually created by dK_dtheta
|
# :param g: the gradient vector for the current model, usually created by _param_grad_helper
|
||||||
# """
|
# """
|
||||||
# x = self._get_params()
|
# x = self._get_params()
|
||||||
# [np.place(g, index, g[index] * constraint.gradfactor(x[index]))
|
# [np.place(g, index, g[index] * constraint.gradfactor(x[index]))
|
||||||
|
|
@ -294,7 +295,7 @@ class kern(Parameterized):
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
|
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
|
||||||
|
|
||||||
def dK_dtheta(self, dL_dK, X, X2=None):
|
def _param_grad_helper(self, dL_dK, X, X2=None):
|
||||||
"""
|
"""
|
||||||
Compute the gradient of the covariance function with respect to the parameters.
|
Compute the gradient of the covariance function with respect to the parameters.
|
||||||
|
|
||||||
|
|
@ -310,9 +311,9 @@ class kern(Parameterized):
|
||||||
assert X.shape[1] == self.input_dim
|
assert X.shape[1] == self.input_dim
|
||||||
target = np.zeros(self.size)
|
target = np.zeros(self.size)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
[p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||||
else:
|
else:
|
||||||
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
[p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||||
|
|
||||||
return self._transform_gradients(target)
|
return self._transform_gradients(target)
|
||||||
|
|
||||||
|
|
@ -484,6 +485,7 @@ from GPy.core.model import Model
|
||||||
class Kern_check_model(Model):
|
class Kern_check_model(Model):
|
||||||
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
|
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
|
||||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
|
Model.__init__(self, 'kernel_test_model')
|
||||||
num_samples = 20
|
num_samples = 20
|
||||||
num_samples2 = 10
|
num_samples2 = 10
|
||||||
if kernel==None:
|
if kernel==None:
|
||||||
|
|
@ -497,12 +499,10 @@ class Kern_check_model(Model):
|
||||||
dL_dK = np.ones((X.shape[0], X2.shape[0]))
|
dL_dK = np.ones((X.shape[0], X2.shape[0]))
|
||||||
|
|
||||||
self.kernel=kernel
|
self.kernel=kernel
|
||||||
|
self.add_parameter(kernel)
|
||||||
self.X = X
|
self.X = X
|
||||||
self.X2 = X2
|
self.X2 = X2
|
||||||
self.dL_dK = dL_dK
|
self.dL_dK = dL_dK
|
||||||
#self.constrained_indices=[]
|
|
||||||
#self.constraints=[]
|
|
||||||
Model.__init__(self, 'kernel_test_model')
|
|
||||||
|
|
||||||
def is_positive_definite(self):
|
def is_positive_definite(self):
|
||||||
v = np.linalg.eig(self.kernel.K(self.X))[0]
|
v = np.linalg.eig(self.kernel.K(self.X))[0]
|
||||||
|
|
@ -511,15 +511,6 @@ class Kern_check_model(Model):
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return self.kernel._get_params()
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
return self.kernel._get_param_names()
|
|
||||||
|
|
||||||
def _set_params(self, x):
|
|
||||||
self.kernel._set_params(x)
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
|
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
|
||||||
|
|
||||||
|
|
@ -532,7 +523,7 @@ class Kern_check_dK_dtheta(Kern_check_model):
|
||||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
return self.kernel.dK_dtheta(self.dL_dK, self.X, self.X2)
|
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
|
||||||
|
|
||||||
class Kern_check_dKdiag_dtheta(Kern_check_model):
|
class Kern_check_dKdiag_dtheta(Kern_check_model):
|
||||||
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
|
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
|
||||||
|
|
@ -540,6 +531,8 @@ class Kern_check_dKdiag_dtheta(Kern_check_model):
|
||||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||||
if dL_dK==None:
|
if dL_dK==None:
|
||||||
self.dL_dK = np.ones((self.X.shape[0]))
|
self.dL_dK = np.ones((self.X.shape[0]))
|
||||||
|
def parameters_changed(self):
|
||||||
|
self.kernel.update_gradients_full(self.dL_dK, self.X)
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
||||||
|
|
@ -551,23 +544,16 @@ class Kern_check_dK_dX(Kern_check_model):
|
||||||
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
|
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
|
||||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||||
|
self.remove_parameter(kernel)
|
||||||
|
self.X = Param('X', self.X)
|
||||||
|
self.add_parameter(self.X)
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
|
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
|
||||||
|
|
||||||
def _get_param_names(self):
|
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
||||||
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return self.X.flatten()
|
|
||||||
|
|
||||||
def _set_params(self, x):
|
|
||||||
self.X=x.reshape(self.X.shape)
|
|
||||||
|
|
||||||
class Kern_check_dKdiag_dX(Kern_check_model):
|
|
||||||
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
|
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
|
||||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||||
if dL_dK==None:
|
if dL_dK==None:
|
||||||
self.dL_dK = np.ones((self.X.shape[0]))
|
self.dL_dK = np.ones((self.X.shape[0]))
|
||||||
|
|
||||||
|
|
@ -577,15 +563,6 @@ class Kern_check_dKdiag_dX(Kern_check_model):
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
|
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return self.X.flatten()
|
|
||||||
|
|
||||||
def _set_params(self, x):
|
|
||||||
self.X=x.reshape(self.X.shape)
|
|
||||||
|
|
||||||
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
||||||
"""
|
"""
|
||||||
This function runs on kernels to check the correctness of their
|
This function runs on kernels to check the correctness of their
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ class Brownian(Kernpart):
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
target += self.variance*X.flatten()
|
target += self.variance*X.flatten()
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
X2 = X
|
X2 = X
|
||||||
target += np.sum(np.fmin(X,X2.T)*dL_dK)
|
target += np.sum(np.fmin(X,X2.T)*dL_dK)
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,7 @@ class Matern32(Kernpart):
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||||
np.add(target, self.variance, target)
|
np.add(target, self.variance, target)
|
||||||
|
|
||||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||||
"""derivative of the covariance matrix with respect to the parameters."""
|
"""derivative of the covariance matrix with respect to the parameters."""
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,7 @@ class Matern52(Kernpart):
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||||
np.add(target,self.variance,target)
|
np.add(target,self.variance,target)
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
"""derivative of the covariance matrix with respect to the parameters."""
|
"""derivative of the covariance matrix with respect to the parameters."""
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
|
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
|
||||||
|
|
|
||||||
|
|
@ -90,7 +90,7 @@ class ODE_1(Kernpart):
|
||||||
|
|
||||||
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
|
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
|
||||||
|
|
||||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||||
"""derivative of the covariance matrix with respect to the parameters."""
|
"""derivative of the covariance matrix with respect to the parameters."""
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
dist = np.abs(X - X2.T)
|
dist = np.abs(X - X2.T)
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,7 @@ class Eq_ode1(Kernpart):
|
||||||
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
|
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
|
|
||||||
# First extract times and indices.
|
# First extract times and indices.
|
||||||
self._extract_t_indices(X, X2, dL_dK=dL_dK)
|
self._extract_t_indices(X, X2, dL_dK=dL_dK)
|
||||||
|
|
|
||||||
|
|
@ -75,7 +75,7 @@ class Exponential(Kernpart):
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||||
np.add(target, self.variance, target)
|
np.add(target, self.variance, target)
|
||||||
|
|
||||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||||
"""derivative of the covariance matrix with respect to the parameters."""
|
"""derivative of the covariance matrix with respect to the parameters."""
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ class FiniteDimensional(Kernpart):
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
product = np.diag(self.K(X, X))
|
product = np.diag(self.K(X, X))
|
||||||
np.add(target,product,target)
|
np.add(target,product,target)
|
||||||
def dK_dtheta(self,X,X2,target):
|
def _param_grad_helper(self,X,X2,target):
|
||||||
"""Return shape is NxMx(Ntheta)"""
|
"""Return shape is NxMx(Ntheta)"""
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
FX = np.column_stack([f(X) for f in self.F])
|
FX = np.column_stack([f(X) for f in self.F])
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ class Fixed(Kernpart):
|
||||||
def K(self, X, X2, target):
|
def K(self, X, X2, target):
|
||||||
target += self.variance * self.fixed_K
|
target += self.variance * self.fixed_K
|
||||||
|
|
||||||
def dK_dtheta(self, partial, X, X2, target):
|
def _param_grad_helper(self, partial, X, X2, target):
|
||||||
target += (partial * self.fixed_K).sum()
|
target += (partial * self.fixed_K).sum()
|
||||||
|
|
||||||
def gradients_X(self, partial, X, X2, target):
|
def gradients_X(self, partial, X, X2, target):
|
||||||
|
|
|
||||||
|
|
@ -85,7 +85,7 @@ class Gibbs(Kernpart):
|
||||||
"""Compute the diagonal of the covariance matrix for X."""
|
"""Compute the diagonal of the covariance matrix for X."""
|
||||||
np.add(target, self.variance, target)
|
np.add(target, self.variance, target)
|
||||||
|
|
||||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||||
"""Derivative of the covariance with respect to the parameters."""
|
"""Derivative of the covariance with respect to the parameters."""
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
self._dK_computations(dL_dK)
|
self._dK_computations(dL_dK)
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,7 @@ class Hetero(Kernpart):
|
||||||
"""Helper function for computing the diagonal elements of the covariance."""
|
"""Helper function for computing the diagonal elements of the covariance."""
|
||||||
return self.mapping.f(X).flatten()**2
|
return self.mapping.f(X).flatten()**2
|
||||||
|
|
||||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||||
"""Derivative of the covariance with respect to the parameters."""
|
"""Derivative of the covariance with respect to the parameters."""
|
||||||
if (X2 is None) or (X2 is X):
|
if (X2 is None) or (X2 is X):
|
||||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||||
|
|
|
||||||
|
|
@ -50,9 +50,9 @@ class Hierarchical(Kernpart):
|
||||||
#X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
#X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
#[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
#[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
X, X2, slices, slices2 = self._sort_slices(X,X2)
|
X, X2, slices, slices2 = self._sort_slices(X,X2)
|
||||||
[[[[k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)]
|
[[[[k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)]
|
||||||
|
|
||||||
|
|
||||||
def gradients_X(self,dL_dK,X,X2,target):
|
def gradients_X(self,dL_dK,X,X2,target):
|
||||||
|
|
|
||||||
|
|
@ -70,13 +70,13 @@ class IndependentOutputs(Kernpart):
|
||||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
X2,slices2 = X,slices
|
X2,slices2 = X,slices
|
||||||
else:
|
else:
|
||||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||||
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
[[[self.k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||||
|
|
||||||
|
|
||||||
def gradients_X(self,dL_dK,X,X2,target):
|
def gradients_X(self,dL_dK,X,X2,target):
|
||||||
|
|
|
||||||
|
|
@ -75,14 +75,14 @@ class Kernpart(Parameterized):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||||
# In the base case compute this by calling dK_dtheta. Need to
|
# In the base case compute this by calling _param_grad_helper. Need to
|
||||||
# override for stationary covariances (for example) to save
|
# override for stationary covariances (for example) to save
|
||||||
# time.
|
# time.
|
||||||
for i in range(X.shape[0]):
|
for i in range(X.shape[0]):
|
||||||
self.dK_dtheta(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
|
self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
|
||||||
def psi0(self,Z,mu,S,target):
|
def psi0(self,Z,mu,S,target):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
|
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
|
||||||
|
|
@ -109,8 +109,15 @@ class Kernpart(Parameterized):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def dKdiag_dX(self, dL_dK, X, target):
|
def dKdiag_dX(self, dL_dK, X, target):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
def update_gradients_full(self, dL_dK, X):
|
||||||
|
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||||
|
raise NotImplementedError
|
||||||
|
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||||
|
"""Set the gradients of all parameters when doing sparse (M) inference."""
|
||||||
|
raise NotImplementedError
|
||||||
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
|
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
class Kernpart_stationary(Kernpart):
|
class Kernpart_stationary(Kernpart):
|
||||||
def __init__(self, input_dim, lengthscale=None, ARD=False):
|
def __init__(self, input_dim, lengthscale=None, ARD=False):
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ from kernpart import Kernpart
|
||||||
from ...util.linalg import tdot
|
from ...util.linalg import tdot
|
||||||
from ...util.misc import fast_array_equal, param_to_array
|
from ...util.misc import fast_array_equal, param_to_array
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
|
||||||
class Linear(Kernpart):
|
class Linear(Kernpart):
|
||||||
"""
|
"""
|
||||||
|
|
@ -43,8 +44,9 @@ class Linear(Kernpart):
|
||||||
else:
|
else:
|
||||||
variances = np.ones(self.input_dim)
|
variances = np.ones(self.input_dim)
|
||||||
|
|
||||||
self.variances = Param('variances', variances)
|
self.variances = Param('variances', variances, Logexp())
|
||||||
self.add_parameters(self.variances)
|
self.variances.gradient = np.zeros(self.variances.shape)
|
||||||
|
self.add_parameter(self.variances)
|
||||||
self.variances.add_observer(self, self.update_variance)
|
self.variances.add_observer(self, self.update_variance)
|
||||||
|
|
||||||
# initialize cache
|
# initialize cache
|
||||||
|
|
@ -57,41 +59,34 @@ class Linear(Kernpart):
|
||||||
def on_input_change(self, X):
|
def on_input_change(self, X):
|
||||||
self._K_computations(X, None)
|
self._K_computations(X, None)
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X):
|
||||||
|
#self.variances.gradient[:] = 0
|
||||||
|
self._param_grad_helper(dL_dK, X, self.variances.gradient)
|
||||||
|
|
||||||
|
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||||
|
tmp = dL_dKdiag[:, None] * X ** 2
|
||||||
|
if self.ARD:
|
||||||
|
self.variances.gradient = tmp.sum(0)
|
||||||
|
else:
|
||||||
|
self.variances.gradient = tmp.sum()
|
||||||
|
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
|
||||||
|
self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient)
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
|
|
||||||
# psi0:
|
# psi0:
|
||||||
tmp = dL_dpsi0[:, None] * self.mu2_S
|
tmp = dL_dpsi0[:, None] * self.mu2_S
|
||||||
if self.ARD: self.variances.gradient = tmp.sum(0)
|
if self.ARD: self.variances.gradient[:] = tmp.sum(0)
|
||||||
else: self.variances.gradient = tmp.sum()
|
else: self.variances.gradient[:] = tmp.sum()
|
||||||
|
|
||||||
#psi1
|
#psi1
|
||||||
self.dK_dtheta(dL_dpsi1, mu, Z, self.variances.gradient)
|
self._param_grad_helper(dL_dpsi1, mu, Z, self.variances.gradient)
|
||||||
|
#psi2
|
||||||
#from psi2
|
|
||||||
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
|
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
|
||||||
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
|
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
|
||||||
else: self.variances.gradient += tmp.sum()
|
else: self.variances.gradient += tmp.sum()
|
||||||
|
|
||||||
#from Kmm
|
#from Kmm
|
||||||
self._K_computations(Z, None)
|
self._K_computations(Z, None)
|
||||||
self.dK_dtheta(dL_dKmm, Z, None, self.variances.gradient)
|
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
|
||||||
|
|
||||||
|
|
||||||
# def _get_params(self):
|
|
||||||
# return self.variances
|
|
||||||
#
|
|
||||||
# def _set_params(self, x):
|
|
||||||
# assert x.size == (self.num_params)
|
|
||||||
# self.variances = x
|
|
||||||
#def parameters_changed(self):
|
|
||||||
# self.variances2 = np.square(self.variances)
|
|
||||||
#
|
|
||||||
# def _get_param_names(self):
|
|
||||||
# if self.num_params == 1:
|
|
||||||
# return ['variance']
|
|
||||||
# else:
|
|
||||||
# return ['variance_%i' % i for i in range(self.variances.size)]
|
|
||||||
|
|
||||||
def K(self, X, X2, target):
|
def K(self, X, X2, target):
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
|
|
@ -109,7 +104,7 @@ class Linear(Kernpart):
|
||||||
def Kdiag(self, X, target):
|
def Kdiag(self, X, target):
|
||||||
np.add(target, np.sum(self.variances * np.square(X), -1), target)
|
np.add(target, np.sum(self.variances * np.square(X), -1), target)
|
||||||
|
|
||||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)]
|
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)]
|
||||||
|
|
@ -121,13 +116,6 @@ class Linear(Kernpart):
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
target += np.sum(self._dot_product * dL_dK)
|
target += np.sum(self._dot_product * dL_dK)
|
||||||
|
|
||||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
|
||||||
tmp = dL_dKdiag[:, None] * X ** 2
|
|
||||||
if self.ARD:
|
|
||||||
target += tmp.sum(0)
|
|
||||||
else:
|
|
||||||
target += tmp.sum()
|
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2, target):
|
def gradients_X(self, dL_dK, X, X2, target):
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||||
|
|
@ -145,14 +133,6 @@ class Linear(Kernpart):
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
target += np.sum(self.variances * self.mu2_S, 1)
|
target += np.sum(self.variances * self.mu2_S, 1)
|
||||||
|
|
||||||
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
tmp = dL_dpsi0[:, None] * self.mu2_S
|
|
||||||
if self.ARD:
|
|
||||||
target += tmp.sum(0)
|
|
||||||
else:
|
|
||||||
target += tmp.sum()
|
|
||||||
|
|
||||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
||||||
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
|
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
|
||||||
target_S += dL_dpsi0[:, None] * self.variances
|
target_S += dL_dpsi0[:, None] * self.variances
|
||||||
|
|
@ -161,10 +141,6 @@ class Linear(Kernpart):
|
||||||
"""the variance, it does nothing"""
|
"""the variance, it does nothing"""
|
||||||
self._psi1 = self.K(mu, Z, target)
|
self._psi1 = self.K(mu, Z, target)
|
||||||
|
|
||||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
|
|
||||||
"""the variance, it does nothing"""
|
|
||||||
self.dK_dtheta(dL_dpsi1, mu, Z, target)
|
|
||||||
|
|
||||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||||
"""Do nothing for S, it does not affect psi1"""
|
"""Do nothing for S, it does not affect psi1"""
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
|
|
@ -185,21 +161,13 @@ class Linear(Kernpart):
|
||||||
def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target):
|
def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target):
|
||||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
self.K(mu,Z,tmp)
|
self.K(mu,Z,tmp)
|
||||||
self.dK_dtheta(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target)
|
self._param_grad_helper(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target)
|
||||||
result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0)
|
result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0)
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
target += result.sum(0).sum(0).sum(0)
|
target += result.sum(0).sum(0).sum(0)
|
||||||
else:
|
else:
|
||||||
target += result.sum()
|
target += result.sum()
|
||||||
|
|
||||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
|
|
||||||
if self.ARD:
|
|
||||||
target += tmp.sum(0).sum(0).sum(0)
|
|
||||||
else:
|
|
||||||
target += tmp.sum()
|
|
||||||
|
|
||||||
def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
||||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
self.K(mu,Z,tmp)
|
self.K(mu,Z,tmp)
|
||||||
|
|
@ -309,11 +277,11 @@ class Linear(Kernpart):
|
||||||
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
|
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
|
||||||
self._X = X.copy()
|
self._X = X.copy()
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
self._dot_product = tdot(X)
|
self._dot_product = tdot(param_to_array(X))
|
||||||
self._X2 = None
|
self._X2 = None
|
||||||
else:
|
else:
|
||||||
self._X2 = X2.copy()
|
self._X2 = X2.copy()
|
||||||
self._dot_product = np.dot(X, X2.T)
|
self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
|
||||||
|
|
||||||
def _psi_computations(self, Z, mu, S):
|
def _psi_computations(self, Z, mu, S):
|
||||||
# here are the "statistics" for psi1 and psi2
|
# here are the "statistics" for psi1 and psi2
|
||||||
|
|
|
||||||
|
|
@ -77,7 +77,7 @@ class MLP(Kernpart):
|
||||||
self._K_diag_computations(X)
|
self._K_diag_computations(X)
|
||||||
target+= self.variance*self._K_diag_dvar
|
target+= self.variance*self._K_diag_dvar
|
||||||
|
|
||||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||||
"""Derivative of the covariance with respect to the parameters."""
|
"""Derivative of the covariance with respect to the parameters."""
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
denom3 = self._K_denom*self._K_denom*self._K_denom
|
denom3 = self._K_denom*self._K_denom*self._K_denom
|
||||||
|
|
|
||||||
|
|
@ -112,7 +112,7 @@ class PeriodicMatern32(Kernpart):
|
||||||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
||||||
|
|
||||||
@silence_errors
|
@silence_errors
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
|
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||||
|
|
|
||||||
|
|
@ -114,7 +114,7 @@ class PeriodicMatern52(Kernpart):
|
||||||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
||||||
|
|
||||||
@silence_errors
|
@silence_errors
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
|
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||||
|
|
|
||||||
|
|
@ -110,7 +110,7 @@ class PeriodicExponential(Kernpart):
|
||||||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
||||||
|
|
||||||
@silence_errors
|
@silence_errors
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
|
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||||
|
|
|
||||||
|
|
@ -86,7 +86,7 @@ class POLY(Kernpart):
|
||||||
self._K_diag_computations(X)
|
self._K_diag_computations(X)
|
||||||
target+= self.variance*self._K_diag_dvar
|
target+= self.variance*self._K_diag_dvar
|
||||||
|
|
||||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||||
"""Derivative of the covariance with respect to the parameters."""
|
"""Derivative of the covariance with respect to the parameters."""
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
|
base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
|
||||||
|
|
|
||||||
|
|
@ -54,15 +54,15 @@ class Prod(Kernpart):
|
||||||
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
|
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
|
||||||
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
|
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
"""Derivative of the covariance matrix with respect to the parameters."""
|
"""Derivative of the covariance matrix with respect to the parameters."""
|
||||||
self._K_computations(X,X2)
|
self._K_computations(X,X2)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
|
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
|
||||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
|
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
|
||||||
else:
|
else:
|
||||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
|
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
|
||||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
|
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||||
|
|
|
||||||
|
|
@ -41,15 +41,15 @@ class prod_orthogonal(Kernpart):
|
||||||
self._K_computations(X,X2)
|
self._K_computations(X,X2)
|
||||||
target += self._K1 * self._K2
|
target += self._K1 * self._K2
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
"""derivative of the covariance matrix with respect to the parameters."""
|
"""derivative of the covariance matrix with respect to the parameters."""
|
||||||
self._K_computations(X,X2)
|
self._K_computations(X,X2)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
|
self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
|
||||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
|
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
|
||||||
else:
|
else:
|
||||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
|
self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
|
||||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
|
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ class RationalQuadratic(Kernpart):
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
target += self.variance
|
target += self.variance
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
if X2 is None: X2 = X
|
if X2 is None: X2 = X
|
||||||
dist2 = np.square((X-X2.T)/self.lengthscale)
|
dist2 = np.square((X-X2.T)/self.lengthscale)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ from kernpart import Kernpart
|
||||||
from ...util.linalg import tdot
|
from ...util.linalg import tdot
|
||||||
from ...util.misc import fast_array_equal, param_to_array
|
from ...util.misc import fast_array_equal, param_to_array
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
|
||||||
class RBF(Kernpart):
|
class RBF(Kernpart):
|
||||||
"""
|
"""
|
||||||
|
|
@ -50,7 +51,7 @@ class RBF(Kernpart):
|
||||||
else:
|
else:
|
||||||
lengthscale = np.ones(self.input_dim)
|
lengthscale = np.ones(self.input_dim)
|
||||||
|
|
||||||
self.variance = Param('variance', variance)
|
self.variance = Param('variance', variance, Logexp())
|
||||||
|
|
||||||
self.lengthscale = Param('lengthscale', lengthscale)
|
self.lengthscale = Param('lengthscale', lengthscale)
|
||||||
self.lengthscale.add_observer(self, self.update_lengthscale)
|
self.lengthscale.add_observer(self, self.update_lengthscale)
|
||||||
|
|
@ -141,7 +142,7 @@ class RBF(Kernpart):
|
||||||
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
|
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
|
||||||
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
||||||
if not self.ARD:
|
if not self.ARD:
|
||||||
self.lengthscale.gradeint = dpsi1_dlength.sum()
|
self.lengthscale.gradient = dpsi1_dlength.sum()
|
||||||
else:
|
else:
|
||||||
self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0)
|
self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -97,7 +97,7 @@ class RBFInv(RBF):
|
||||||
# return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)]
|
# return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)]
|
||||||
|
|
||||||
# TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale)
|
# TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale)
|
||||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
target[0] += np.sum(self._K_dvar * dL_dK)
|
target[0] += np.sum(self._K_dvar * dL_dK)
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,7 @@ class RBFCos(Kernpart):
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
np.add(target,self.variance,target)
|
np.add(target,self.variance,target)
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
self._K_computations(X,X2)
|
self._K_computations(X,X2)
|
||||||
target[0] += np.sum(dL_dK*self._dvar)
|
target[0] += np.sum(dL_dK*self._dvar)
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@ class Spline(Kernpart):
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
target += self.variance*X.flatten()**3/3.
|
target += self.variance*X.flatten()**3/3.
|
||||||
|
|
||||||
def dK_dtheta(self,X,X2,target):
|
def _param_grad_helper(self,X,X2,target):
|
||||||
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
|
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
|
||||||
|
|
||||||
def dKdiag_dtheta(self,X,target):
|
def dKdiag_dtheta(self,X,target):
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,7 @@ class Symmetric(Kernpart):
|
||||||
self.k.K(X,AX2,target)
|
self.k.K(X,AX2,target)
|
||||||
self.k.K(AX,AX2,target)
|
self.k.K(AX,AX2,target)
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||||
"""derivative of the covariance matrix with respect to the parameters."""
|
"""derivative of the covariance matrix with respect to the parameters."""
|
||||||
AX = np.dot(X,self.transform)
|
AX = np.dot(X,self.transform)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
|
|
@ -48,10 +48,10 @@ class Symmetric(Kernpart):
|
||||||
ZX2 = AX
|
ZX2 = AX
|
||||||
else:
|
else:
|
||||||
AX2 = np.dot(X2, self.transform)
|
AX2 = np.dot(X2, self.transform)
|
||||||
self.k.dK_dtheta(dL_dK,X,X2,target)
|
self.k._param_grad_helper(dL_dK,X,X2,target)
|
||||||
self.k.dK_dtheta(dL_dK,AX,X2,target)
|
self.k._param_grad_helper(dL_dK,AX,X2,target)
|
||||||
self.k.dK_dtheta(dL_dK,X,AX2,target)
|
self.k._param_grad_helper(dL_dK,X,AX2,target)
|
||||||
self.k.dK_dtheta(dL_dK,AX,AX2,target)
|
self.k._param_grad_helper(dL_dK,AX,AX2,target)
|
||||||
|
|
||||||
|
|
||||||
def gradients_X(self,dL_dK,X,X2,target):
|
def gradients_X(self,dL_dK,X,X2,target):
|
||||||
|
|
|
||||||
|
|
@ -348,7 +348,7 @@ class spkern(Kernpart):
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
self._weave_inline(self._Kdiag_code, X, target)
|
self._weave_inline(self._Kdiag_code, X, target)
|
||||||
|
|
||||||
def dK_dtheta(self,partial,X,Z,target):
|
def _param_grad_helper(self,partial,X,Z,target):
|
||||||
if Z is None:
|
if Z is None:
|
||||||
self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
|
self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import GPy
|
import GPy
|
||||||
|
|
||||||
verbose = False
|
verbose = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import sympy
|
import sympy
|
||||||
|
|
|
||||||
|
|
@ -9,42 +9,44 @@ import numpy
|
||||||
import GPy
|
import GPy
|
||||||
import itertools
|
import itertools
|
||||||
from GPy.core import Model
|
from GPy.core import Model
|
||||||
|
from GPy.core.parameterization.param import Param
|
||||||
|
from GPy.core.parameterization.transformations import Logexp
|
||||||
|
|
||||||
class PsiStatModel(Model):
|
class PsiStatModel(Model):
|
||||||
def __init__(self, which, X, X_variance, Z, num_inducing, kernel):
|
def __init__(self, which, X, X_variance, Z, num_inducing, kernel):
|
||||||
|
super(PsiStatModel, self).__init__(name='psi stat test')
|
||||||
self.which = which
|
self.which = which
|
||||||
self.X = X
|
self.X = Param("X", X)
|
||||||
self.X_variance = X_variance
|
self.X_variance = Param('X_variance', X_variance, Logexp())
|
||||||
self.Z = Z
|
self.Z = Param("Z", Z)
|
||||||
self.N, self.input_dim = X.shape
|
self.N, self.input_dim = X.shape
|
||||||
self.num_inducing, input_dim = Z.shape
|
self.num_inducing, input_dim = Z.shape
|
||||||
assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
|
assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
|
||||||
self.kern = kernel
|
self.kern = kernel
|
||||||
super(PsiStatModel, self).__init__()
|
|
||||||
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
|
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
|
||||||
def _get_param_names(self):
|
self.add_parameters(self.X, self.X_variance, self.Z, self.kern)
|
||||||
Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.input_dim))]
|
|
||||||
Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.num_inducing), range(self.input_dim))]
|
|
||||||
return Xnames + Znames + self.kern._get_param_names()
|
|
||||||
def _get_params(self):
|
|
||||||
return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()])
|
|
||||||
def _set_params(self, x, save_old=True, save_count=0):
|
|
||||||
start, end = 0, self.X.size
|
|
||||||
self.X = x[start:end].reshape(self.N, self.input_dim)
|
|
||||||
start, end = end, end + self.X_variance.size
|
|
||||||
self.X_variance = x[start: end].reshape(self.N, self.input_dim)
|
|
||||||
start, end = end, end + self.Z.size
|
|
||||||
self.Z = x[start: end].reshape(self.num_inducing, self.input_dim)
|
|
||||||
self.kern._set_params(x[end:])
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
|
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
|
||||||
def _log_likelihood_gradients(self):
|
|
||||||
|
def parameters_changed(self):
|
||||||
psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
|
psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
|
||||||
|
self.X.gradient = psimu
|
||||||
|
self.X_variance.gradient = psiS
|
||||||
#psimu, psiS = numpy.ones(self.N * self.input_dim), numpy.ones(self.N * self.input_dim)
|
#psimu, psiS = numpy.ones(self.N * self.input_dim), numpy.ones(self.N * self.input_dim)
|
||||||
psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
|
try: psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
|
||||||
|
except AttributeError: psiZ = numpy.zeros_like(self.Z)
|
||||||
|
self.Z.gradient = psiZ
|
||||||
#psiZ = numpy.ones(self.num_inducing * self.input_dim)
|
#psiZ = numpy.ones(self.num_inducing * self.input_dim)
|
||||||
thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten()
|
N,M = self.X.shape[0], self.Z.shape[0]
|
||||||
return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad))
|
dL_dpsi0, dL_dpsi1, dL_dpsi2 = numpy.zeros([N]), numpy.zeros([N,M]), numpy.zeros([N,M,M])
|
||||||
|
if self.which == 'psi0': dL_dpsi0 += 1
|
||||||
|
if self.which == 'psi1': dL_dpsi1 += 1
|
||||||
|
if self.which == 'psi2': dL_dpsi2 += 1
|
||||||
|
self.kern.update_gradients_variational(numpy.zeros([1,1]),
|
||||||
|
dL_dpsi0,
|
||||||
|
dL_dpsi1,
|
||||||
|
dL_dpsi2, self.X, self.X_variance, self.Z)
|
||||||
|
|
||||||
class DPsiStatTest(unittest.TestCase):
|
class DPsiStatTest(unittest.TestCase):
|
||||||
input_dim = 5
|
input_dim = 5
|
||||||
|
|
@ -57,61 +59,66 @@ class DPsiStatTest(unittest.TestCase):
|
||||||
Y = X.dot(numpy.random.randn(input_dim, input_dim))
|
Y = X.dot(numpy.random.randn(input_dim, input_dim))
|
||||||
# kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)]
|
# kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)]
|
||||||
|
|
||||||
kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim),
|
kernels = [
|
||||||
GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim),
|
GPy.kern.linear(input_dim),
|
||||||
GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)]
|
GPy.kern.rbf(input_dim),
|
||||||
|
#GPy.kern.bias(input_dim),
|
||||||
|
#GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim),
|
||||||
|
#GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)
|
||||||
|
]
|
||||||
|
|
||||||
def testPsi0(self):
|
def testPsi0(self):
|
||||||
for k in self.kernels:
|
for k in self.kernels:
|
||||||
m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,\
|
m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,\
|
||||||
num_inducing=self.num_inducing, kernel=k)
|
num_inducing=self.num_inducing, kernel=k)
|
||||||
m.ensure_default_constraints()
|
#m.ensure_default_constraints(warning=0)
|
||||||
m.randomize()
|
m.randomize()
|
||||||
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts)))
|
import ipdb;ipdb.set_trace()
|
||||||
|
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||||
|
|
||||||
def testPsi1(self):
|
def testPsi1(self):
|
||||||
for k in self.kernels:
|
for k in self.kernels:
|
||||||
m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
|
m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||||
num_inducing=self.num_inducing, kernel=k)
|
num_inducing=self.num_inducing, kernel=k)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints(warning=0)
|
||||||
m.randomize()
|
m.randomize()
|
||||||
assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
|
assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||||
|
|
||||||
def testPsi2_lin(self):
|
def testPsi2_lin(self):
|
||||||
k = self.kernels[0]
|
k = self.kernels[0]
|
||||||
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||||
num_inducing=self.num_inducing, kernel=k)
|
num_inducing=self.num_inducing, kernel=k)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints(warning=0)
|
||||||
m.randomize()
|
m.randomize()
|
||||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||||
def testPsi2_lin_bia(self):
|
def testPsi2_lin_bia(self):
|
||||||
k = self.kernels[3]
|
k = self.kernels[3]
|
||||||
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||||
num_inducing=self.num_inducing, kernel=k)
|
num_inducing=self.num_inducing, kernel=k)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints(warning=0)
|
||||||
m.randomize()
|
m.randomize()
|
||||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||||
def testPsi2_rbf(self):
|
def testPsi2_rbf(self):
|
||||||
k = self.kernels[1]
|
k = self.kernels[1]
|
||||||
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||||
num_inducing=self.num_inducing, kernel=k)
|
num_inducing=self.num_inducing, kernel=k)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints(warning=0)
|
||||||
m.randomize()
|
m.randomize()
|
||||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||||
def testPsi2_rbf_bia(self):
|
def testPsi2_rbf_bia(self):
|
||||||
k = self.kernels[-1]
|
k = self.kernels[-1]
|
||||||
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||||
num_inducing=self.num_inducing, kernel=k)
|
num_inducing=self.num_inducing, kernel=k)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints(warning=0)
|
||||||
m.randomize()
|
m.randomize()
|
||||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||||
def testPsi2_bia(self):
|
def testPsi2_bia(self):
|
||||||
k = self.kernels[2]
|
k = self.kernels[2]
|
||||||
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||||
num_inducing=self.num_inducing, kernel=k)
|
num_inducing=self.num_inducing, kernel=k)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints(warning=0)
|
||||||
m.randomize()
|
m.randomize()
|
||||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue