Merge branch 'params' of github.com:SheffieldML/GPy into params

This commit is contained in:
Alan Saul 2014-02-11 14:06:45 +00:00
commit 835934ff51
39 changed files with 262 additions and 295 deletions

View file

@ -260,17 +260,18 @@ class Model(Parameterized):
""" """
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity'] positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
# param_names = self._get_param_names() # param_names = self._get_param_names()
for s in positive_strings:
paramlist = self.grep_param_names(".*"+s) # for s in positive_strings:
for param in paramlist: # paramlist = self.grep_param_names(".*"+s)
for p in param.flattened_parameters: # for param in paramlist:
rav_i = set(self._raveled_index_for(p)) # for p in param.flattened_parameters:
for constraint in self.constraints.iter_properties(): # rav_i = set(self._raveled_index_for(p))
rav_i -= set(self._constraint_indices(p, constraint)) # for constraint in self.constraints.iter_properties():
rav_i -= set(np.nonzero(self._fixes_for(p)!=UNFIXED)[0]) # rav_i -= set(self._constraint_indices(p, constraint))
ind = self._backtranslate_index(p, np.array(list(rav_i), dtype=int)) # rav_i -= set(np.nonzero(self._fixes_for(p)!=UNFIXED)[0])
if ind.size != 0: # ind = self._backtranslate_index(p, np.array(list(rav_i), dtype=int))
p[np.unravel_index(ind, p.shape)].constrain_positive() # if ind.size != 0:
# p[np.unravel_index(ind, p.shape)].constrain_positive(warning=warning)
# if paramlist: # if paramlist:
# self.__getitem__(None, paramlist).constrain_positive(warning=warning) # self.__getitem__(None, paramlist).constrain_positive(warning=warning)
# currently_constrained = self.all_constrained_indices() # currently_constrained = self.all_constrained_indices()
@ -405,18 +406,18 @@ class Model(Parameterized):
dx = step * np.sign(np.random.uniform(-1, 1, x.size)) dx = step * np.sign(np.random.uniform(-1, 1, x.size))
# evaulate around the point x # evaulate around the point x
f1, g1 = self.objective_and_gradients(x + dx) f1 = self.objective_function(x + dx)
f2, g2 = self.objective_and_gradients(x - dx) f2 = self.objective_function(x - dx)
gradient = self.objective_function_gradients(x) gradient = self.objective_function_gradients(x)
numerical_gradient = (f1 - f2) / (2 * dx) numerical_gradient = (f1 - f2) / (2 * dx)
global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient))) global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient)))
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance) return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance)
else: else:
# check the gradient of each parameter individually, and do some pretty printing # check the gradient of each parameter individually, and do some pretty printing
try: try:
names = self._get_param_names_transformed() names = self._get_param_names()
except NotImplementedError: except NotImplementedError:
names = ['Variable %i' % i for i in range(len(x))] names = ['Variable %i' % i for i in range(len(x))]
# Prepare for pretty-printing # Prepare for pretty-printing
@ -430,42 +431,46 @@ class Model(Parameterized):
header_string = map(lambda x: '|'.join(x), [header_string]) header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-' * len(header_string[0]) separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator]) print '\n'.join([header_string[0], separator])
if target_param is None: if target_param is None:
param_list = range(len(x)) param_list = range(len(x))
else: else:
param_list = self._raveled_index_for(target_param) param_list = self._raveled_index_for(target_param)
if self._has_fixes(): if self._has_fixes():
param_list = param_list[self._fixes_] param_list = np.intersect1d(param_list, np.r_[:self.size][self._fixes_], True)
if not np.any(param_list):
if param_list.size == 0:
print "No free parameters to check" print "No free parameters to check"
return return
gradient = self.objective_function_gradients(x) gradient = self.objective_function_gradients(x)
np.where(gradient==0, 1e-312, gradient) np.where(gradient==0, 1e-312, gradient)
ret = True
for i in param_list: for i, ind in enumerate(param_list):
xx = x.copy() xx = x.copy()
xx[i] += step xx[ind] += step
f1, g1 = self.objective_and_gradients(xx) f1 = self.objective_function(xx)
xx[i] -= 2.*step xx[ind] -= 2.*step
f2, g2 = self.objective_and_gradients(xx) f2 = self.objective_function(xx)
numerical_gradient = (f1 - f2) / (2 * step) numerical_gradient = (f1 - f2) / (2 * step)
ratio = (f1 - f2) / (2 * step * gradient[i]) ratio = (f1 - f2) / (2 * step * gradient[ind])
difference = np.abs((f1 - f2) / 2 / step - gradient[i]) difference = np.abs((f1 - f2) / 2 / step - gradient[ind])
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
formatted_name = "\033[92m {0} \033[0m".format(names[i]) formatted_name = "\033[92m {0} \033[0m".format(names[ind])
ret &= True
else: else:
formatted_name = "\033[91m {0} \033[0m".format(names[i]) formatted_name = "\033[91m {0} \033[0m".format(names[ind])
ret &= False
r = '%.6f' % float(ratio) r = '%.6f' % float(ratio)
d = '%.6f' % float(difference) d = '%.6f' % float(difference)
g = '%.6f' % gradient[i] g = '%.6f' % gradient[ind]
ng = '%.6f' % float(numerical_gradient) ng = '%.6f' % float(numerical_gradient)
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4]) grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
print grad_string print grad_string
return ret
def input_sensitivity(self): def input_sensitivity(self):
""" """
return an array describing the sesitivity of the model to each input return an array describing the sesitivity of the model to each input

View file

@ -27,6 +27,24 @@ class ParamList(list):
return False return False
pass pass
class C(np.ndarray):
__array_priority__ = 1.
def __new__(cls, array):
obj = array.view(cls)
return obj
#def __array_finalize__(self, obj):
# #print 'finalize'
# return obj
def __array_prepare__(self, out_arr, context):
#print 'prepare'
while type(out_arr) is C:
out_arr = out_arr.base
return out_arr
def __array_wrap__(self, out_arr, context):
#print 'wrap', type(self), type(out_arr), context
while type(out_arr) is C:
out_arr = out_arr.base
return out_arr
class ObservableArray(ListArray, Observable): class ObservableArray(ListArray, Observable):
""" """
@ -35,7 +53,7 @@ class ObservableArray(ListArray, Observable):
will be called every time this array changes. The callable will be called every time this array changes. The callable
takes exactly one argument, which is this array itself. takes exactly one argument, which is this array itself.
""" """
__array_priority__ = 0 # Never give back Param __array_priority__ = -1 # Never give back ObservableArray
def __new__(cls, input_array): def __new__(cls, input_array):
cls.__name__ = "ObservableArray\n " cls.__name__ = "ObservableArray\n "
obj = super(ObservableArray, cls).__new__(cls, input_array).view(cls) obj = super(ObservableArray, cls).__new__(cls, input_array).view(cls)

View file

@ -24,8 +24,11 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
""" """
Parameter object for GPy models. Parameter object for GPy models.
:param name: name of the parameter to be printed :param str name: name of the parameter to be printed
:param input_array: array which this parameter handles :param input_array: array which this parameter handles
:type input_array: numpy.ndarray
:param default_constraint: The default constraint for this parameter
:type default_constraint:
You can add/remove constraints by calling constrain on the parameter itself, e.g: You can add/remove constraints by calling constrain on the parameter itself, e.g:
@ -40,19 +43,10 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc. See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
This ndarray can be stored in lists and checked if it is in.
>>> import numpy as np
>>> x = np.random.normal(size=(10,3))
>>> x in [[1], x, [3]]
True
WARNING: This overrides the functionality of x==y!!!
Use numpy.equal(x,y) for element-wise equality testing.
""" """
__array_priority__ = 0 # Never give back Param __array_priority__ = -1 # Never give back Param
_fixes_ = None _fixes_ = None
def __new__(cls, name, input_array, *args, **kwargs): def __new__(cls, name, input_array, default_constraint=None):
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array)) obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
obj._current_slice_ = (slice(obj.shape[0]),) obj._current_slice_ = (slice(obj.shape[0]),)
obj._realshape_ = obj.shape obj._realshape_ = obj.shape
@ -66,8 +60,8 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
obj.gradient = None obj.gradient = None
return obj return obj
def __init__(self, name, input_array): def __init__(self, name, input_array, default_constraint=None):
super(Param, self).__init__(name=name) super(Param, self).__init__(name=name, default_constraint=default_constraint)
def __array_finalize__(self, obj): def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments # see InfoArray.__array_finalize__ for comments
@ -75,6 +69,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
super(Param, self).__array_finalize__(obj) super(Param, self).__array_finalize__(obj)
self._direct_parent_ = getattr(obj, '_direct_parent_', None) self._direct_parent_ = getattr(obj, '_direct_parent_', None)
self._parent_index_ = getattr(obj, '_parent_index_', None) self._parent_index_ = getattr(obj, '_parent_index_', None)
self._default_constraint_ = getattr(obj, '_default_constraint_', None)
self._current_slice_ = getattr(obj, '_current_slice_', None) self._current_slice_ = getattr(obj, '_current_slice_', None)
self._tied_to_me_ = getattr(obj, '_tied_to_me_', None) self._tied_to_me_ = getattr(obj, '_tied_to_me_', None)
self._tied_to_ = getattr(obj, '_tied_to_', None) self._tied_to_ = getattr(obj, '_tied_to_', None)
@ -97,6 +92,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
(self.name, (self.name,
self._direct_parent_, self._direct_parent_,
self._parent_index_, self._parent_index_,
self._default_constraint_,
self._current_slice_, self._current_slice_,
self._realshape_, self._realshape_,
self._realsize_, self._realsize_,
@ -117,6 +113,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable):
self._realsize_ = state.pop() self._realsize_ = state.pop()
self._realshape_ = state.pop() self._realshape_ = state.pop()
self._current_slice_ = state.pop() self._current_slice_ = state.pop()
self._default_constraint_ = state.pop()
self._parent_index_ = state.pop() self._parent_index_ = state.pop()
self._direct_parent_ = state.pop() self._direct_parent_ = state.pop()
self.name = state.pop() self.name = state.pop()

View file

@ -91,8 +91,9 @@ class Gradcheckable(Parentable):
class Constrainable(Nameable): class Constrainable(Nameable):
def __init__(self, name): def __init__(self, name, default_constraint=None):
super(Constrainable,self).__init__(name) super(Constrainable,self).__init__(name)
self._default_constraint_ = default_constraint
#=========================================================================== #===========================================================================
# Fixing Parameters: # Fixing Parameters:
#=========================================================================== #===========================================================================

View file

@ -171,6 +171,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
for t, ind in param._constraints_.iteritems(): for t, ind in param._constraints_.iteritems():
self.constraints.add(t, ind+self._offset_for(param)) self.constraints.add(t, ind+self._offset_for(param))
param._constraints_.clear() param._constraints_.clear()
if param._default_constraint_ is not None:
self._add_constrain(param, param._default_constraint_, False)
if self._has_fixes() and np.all(self._fixes_): # ==UNFIXED if self._has_fixes() and np.all(self._fixes_): # ==UNFIXED
self._fixes_= None self._fixes_= None
@ -312,8 +314,11 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
#=========================================================================== #===========================================================================
# Optimization handles: # Optimization handles:
#=========================================================================== #===========================================================================
def _get_param_names_transformed(self): def _get_param_names(self):
n = numpy.array([p.name_hirarchical+'['+str(i)+']' for p in self.flattened_parameters for i in p._indices()]) n = numpy.array([p.name_hirarchical+'['+str(i)+']' for p in self.flattened_parameters for i in p._indices()])
return n
def _get_param_names_transformed(self):
n = self._get_param_names()
if self._has_fixes(): if self._has_fixes():
return n[self._fixes_] return n[self._fixes_]
return n return n

View file

@ -154,13 +154,13 @@ class SVIGP(GP):
self.psi2 = None self.psi2 = None
def dL_dtheta(self): def dL_dtheta(self):
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z) dL_dtheta = self.kern._param_grad_helper(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch) dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch) dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch) dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch)
else: else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X_batch, self.Z) dL_dtheta += self.kern._param_grad_helper(self.dL_dpsi1, self.X_batch, self.Z)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch)
return dL_dtheta return dL_dtheta

View file

@ -1,73 +1,54 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np class VarDTC(object):
from ...util.linalg import mdot, jitchol, chol_inv, tdot, dtrtrs def __init__(self):
from ...core import SparseGP self.const_jitter = 1e-6
class FITC(SparseGP): def inference(self, kern, X, X_variance, Z, likelihood, Y):
"""
Sparse FITC approximation num_inducing, _ = Z.shape
num_data, output_dim = Y.shape
:param X: inputs #make sure we're not using variational uncertain inputs
:type X: np.ndarray (num_data x Q) assert = X_variance is None, "variational inducing inputs only for use with varDTC inference"
:param likelihood: a likelihood instance, containing the observed data #Note: we can;t do the variational thing after making the GITC conditional approximation because K~ appears in the log determinant.
:type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel: the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
""" #see whether we've got a different noise variance for each datum
sigma2 = np.squeeze(likelihood.variance)
het_noise = False
if sigma2.size <1:
het_noise = True
def __init__(self, X, likelihood, kernel, Z, normalize_X=False):
SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False)
assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs"
def update_likelihood_approximation(self, **kwargs):
"""
Approximates a non-Gaussian likelihood using Expectation Propagation
For a Gaussian likelihood, no iteration is required:
this function does nothing
"""
self.likelihood.restart()
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0, **kwargs)
self._set_params(self._get_params())
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation # kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z) Kmm = kern.K(Z)
self.psi0 = self.kern.Kdiag(self.X) Kdiag = kern.Kdiag(X)
self.psi1 = self.kern.K(self.Z, self.X) Kmn = kern.K(X, Z)
self.psi2 = None
def _computations(self):
#factor Kmm #factor Kmm
self.Lm = jitchol(self.Kmm) Lm = jitchol(Kmm)
self.Lmi,info = dtrtrs(self.Lm,np.eye(self.num_inducing),lower=1) V = dtrtrs(Lm, Kmn, lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn)
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #NOTE: beta_star contains Diag0 and the precision
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A #compute effective noise
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data))) g_sn2 = sigma2 + Kdiag - np.sum(V*V, 0)
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B #compute and factor B
self.B = np.eye(self.num_inducing) + self.A tmp = Kmn / np.sqrt(g_snd)
self.LB = jitchol(self.B) tmp, _ = dtrtrs(Lm, tmp, lower=1)
self.LBi = chol_inv(self.LB) A = tdot(tmp)
self.psi1V = np.dot(self.psi1, self.V_star) B = np.eye(num_inducing) + A
Bi, Lb, LBi, log_det_B = pdinv(B)
Lmi_psi1V, info = dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0) #compute posterior parameters
self._LBi_Lmi_psi1V, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0) tmp, _ = dtrtrs()
woodbury_vec =
psi1V = np.dot(self.psi1, self.V_star)
Lmi_psi1V, _ = dtrtrs(Lm, self.psi1V, lower=1, trans=0)
LBi_Lmi_psi1V, _ = dtrtrs(LB, Lmi_psi1V, lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1) Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T b_psi1_Ki = self.beta_star * Kmmipsi1.T
@ -116,8 +97,8 @@ class FITC(SparseGP):
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T) K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:] _dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z) self._dpsi1_dtheta += self.kern._param_grad_helper(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z) self._dKmm_dtheta += self.kern._param_grad_helper(_dKmm,self.Z)
self._dKmm_dX += self.kern.gradients_X(_dKmm ,self.Z) self._dKmm_dX += self.kern.gradients_X(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.gradients_X(_dpsi1.T,self.Z,self.X[i:i+1,:]) self._dpsi1_dX += self.kern.gradients_X(_dpsi1.T,self.Z,self.X[i:i+1,:])
@ -163,8 +144,8 @@ class FITC(SparseGP):
def dL_dtheta(self): def dL_dtheta(self):
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X) dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X)
dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z) dL_dtheta += self.kern._param_grad_helper(self._dL_dpsi1,self.X,self.Z)
dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z) dL_dtheta += self.kern._param_grad_helper(self._dL_dKmm,X=self.Z)
dL_dtheta += self._dKmm_dtheta dL_dtheta += self._dKmm_dtheta
dL_dtheta += self._dpsi1_dtheta dL_dtheta += self._dpsi1_dtheta
return dL_dtheta return dL_dtheta

View file

@ -8,6 +8,7 @@ from parts.prod import Prod as prod
from parts.linear import Linear from parts.linear import Linear
from parts.kernpart import Kernpart from parts.kernpart import Kernpart
from ..core.parameterization import Parameterized from ..core.parameterization import Parameterized
from GPy.core.parameterization.param import Param
class kern(Parameterized): class kern(Parameterized):
def __init__(self, input_dim, parts=[], input_slices=None): def __init__(self, input_dim, parts=[], input_slices=None):
@ -84,7 +85,7 @@ class kern(Parameterized):
# represents the gradient in the transformed space (i.e. that given by # represents the gradient in the transformed space (i.e. that given by
# get_params_transformed()) # get_params_transformed())
# #
# :param g: the gradient vector for the current model, usually created by dK_dtheta # :param g: the gradient vector for the current model, usually created by _param_grad_helper
# """ # """
# x = self._get_params() # x = self._get_params()
# [np.place(g, index, g[index] * constraint.gradfactor(x[index])) # [np.place(g, index, g[index] * constraint.gradfactor(x[index]))
@ -294,7 +295,7 @@ class kern(Parameterized):
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
def dK_dtheta(self, dL_dK, X, X2=None): def _param_grad_helper(self, dL_dK, X, X2=None):
""" """
Compute the gradient of the covariance function with respect to the parameters. Compute the gradient of the covariance function with respect to the parameters.
@ -310,9 +311,9 @@ class kern(Parameterized):
assert X.shape[1] == self.input_dim assert X.shape[1] == self.input_dim
target = np.zeros(self.size) target = np.zeros(self.size)
if X2 is None: if X2 is None:
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] [p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
else: else:
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] [p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
return self._transform_gradients(target) return self._transform_gradients(target)
@ -484,6 +485,7 @@ from GPy.core.model import Model
class Kern_check_model(Model): class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Model.__init__(self, 'kernel_test_model')
num_samples = 20 num_samples = 20
num_samples2 = 10 num_samples2 = 10
if kernel==None: if kernel==None:
@ -495,14 +497,12 @@ class Kern_check_model(Model):
dL_dK = np.ones((X.shape[0], X.shape[0])) dL_dK = np.ones((X.shape[0], X.shape[0]))
else: else:
dL_dK = np.ones((X.shape[0], X2.shape[0])) dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel self.kernel=kernel
self.add_parameter(kernel)
self.X = X self.X = X
self.X2 = X2 self.X2 = X2
self.dL_dK = dL_dK self.dL_dK = dL_dK
#self.constrained_indices=[]
#self.constraints=[]
Model.__init__(self, 'kernel_test_model')
def is_positive_definite(self): def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0] v = np.linalg.eig(self.kernel.K(self.X))[0]
@ -511,15 +511,6 @@ class Kern_check_model(Model):
else: else:
return True return True
def _get_params(self):
return self.kernel._get_params()
def _get_param_names(self):
return self.kernel._get_param_names()
def _set_params(self, x):
self.kernel._set_params(x)
def log_likelihood(self): def log_likelihood(self):
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum() return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
@ -532,7 +523,7 @@ class Kern_check_dK_dtheta(Kern_check_model):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return self.kernel.dK_dtheta(self.dL_dK, self.X, self.X2) return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model): class Kern_check_dKdiag_dtheta(Kern_check_model):
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters.""" """This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
@ -540,6 +531,8 @@ class Kern_check_dKdiag_dtheta(Kern_check_model):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None: if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0])) self.dL_dK = np.ones((self.X.shape[0]))
def parameters_changed(self):
self.kernel.update_gradients_full(self.dL_dK, self.X)
def log_likelihood(self): def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
@ -551,41 +544,25 @@ class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """ """This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.remove_parameter(kernel)
self.X = Param('X', self.X)
self.add_parameter(self.X)
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten() return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
def _get_param_names(self): class Kern_check_dKdiag_dX(Kern_check_dK_dX):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)
class Kern_check_dKdiag_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """ """This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None: if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0])) self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self): def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten() return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
def _get_param_names(self):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
""" """
This function runs on kernels to check the correctness of their This function runs on kernels to check the correctness of their

View file

@ -43,7 +43,7 @@ class Brownian(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
target += self.variance*X.flatten() target += self.variance*X.flatten()
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
if X2 is None: if X2 is None:
X2 = X X2 = X
target += np.sum(np.fmin(X,X2.T)*dL_dK) target += np.sum(np.fmin(X,X2.T)*dL_dK)

View file

@ -74,7 +74,7 @@ class Matern32(Kernpart):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
np.add(target, self.variance, target) np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))

View file

@ -74,7 +74,7 @@ class Matern52(Kernpart):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target) np.add(target,self.variance,target)
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))

View file

@ -90,7 +90,7 @@ class ODE_1(Kernpart):
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target) np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.abs(X - X2.T) dist = np.abs(X - X2.T)

View file

@ -124,7 +124,7 @@ class Eq_ode1(Kernpart):
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()] #target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
pass pass
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
# First extract times and indices. # First extract times and indices.
self._extract_t_indices(X, X2, dL_dK=dL_dK) self._extract_t_indices(X, X2, dL_dK=dL_dK)

View file

@ -75,7 +75,7 @@ class Exponential(Kernpart):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
np.add(target, self.variance, target) np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))

View file

@ -50,7 +50,7 @@ class FiniteDimensional(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
product = np.diag(self.K(X, X)) product = np.diag(self.K(X, X))
np.add(target,product,target) np.add(target,product,target)
def dK_dtheta(self,X,X2,target): def _param_grad_helper(self,X,X2,target):
"""Return shape is NxMx(Ntheta)""" """Return shape is NxMx(Ntheta)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = np.column_stack([f(X) for f in self.F]) FX = np.column_stack([f(X) for f in self.F])

View file

@ -31,7 +31,7 @@ class Fixed(Kernpart):
def K(self, X, X2, target): def K(self, X, X2, target):
target += self.variance * self.fixed_K target += self.variance * self.fixed_K
def dK_dtheta(self, partial, X, X2, target): def _param_grad_helper(self, partial, X, X2, target):
target += (partial * self.fixed_K).sum() target += (partial * self.fixed_K).sum()
def gradients_X(self, partial, X, X2, target): def gradients_X(self, partial, X, X2, target):

View file

@ -85,7 +85,7 @@ class Gibbs(Kernpart):
"""Compute the diagonal of the covariance matrix for X.""" """Compute the diagonal of the covariance matrix for X."""
np.add(target, self.variance, target) np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters.""" """Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2) self._K_computations(X, X2)
self._dK_computations(dL_dK) self._dK_computations(dL_dK)

View file

@ -80,7 +80,7 @@ class Hetero(Kernpart):
"""Helper function for computing the diagonal elements of the covariance.""" """Helper function for computing the diagonal elements of the covariance."""
return self.mapping.f(X).flatten()**2 return self.mapping.f(X).flatten()**2
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters.""" """Derivative of the covariance with respect to the parameters."""
if (X2 is None) or (X2 is X): if (X2 is None) or (X2 is X):
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1] dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]

View file

@ -50,9 +50,9 @@ class Hierarchical(Kernpart):
#X,slices = X[:,:-1],index_to_slices(X[:,-1]) #X,slices = X[:,:-1],index_to_slices(X[:,-1])
#[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices] #[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
X, X2, slices, slices2 = self._sort_slices(X,X2) X, X2, slices, slices2 = self._sort_slices(X,X2)
[[[[k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)] [[[[k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)]
def gradients_X(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):

View file

@ -70,13 +70,13 @@ class IndependentOutputs(Kernpart):
X,slices = X[:,:-1],index_to_slices(X[:,-1]) X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices] [[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1]) X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None: if X2 is None:
X2,slices2 = X,slices X2,slices2 = X,slices
else: else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] [[[self.k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def gradients_X(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):

View file

@ -75,14 +75,14 @@ class Kernpart(Parameterized):
raise NotImplementedError raise NotImplementedError
def Kdiag(self,X,target): def Kdiag(self,X,target):
raise NotImplementedError raise NotImplementedError
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
raise NotImplementedError raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
# In the base case compute this by calling dK_dtheta. Need to # In the base case compute this by calling _param_grad_helper. Need to
# override for stationary covariances (for example) to save # override for stationary covariances (for example) to save
# time. # time.
for i in range(X.shape[0]): for i in range(X.shape[0]):
self.dK_dtheta(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target) self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
def psi0(self,Z,mu,S,target): def psi0(self,Z,mu,S,target):
raise NotImplementedError raise NotImplementedError
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
@ -109,8 +109,15 @@ class Kernpart(Parameterized):
raise NotImplementedError raise NotImplementedError
def dKdiag_dX(self, dL_dK, X, target): def dKdiag_dX(self, dL_dK, X, target):
raise NotImplementedError raise NotImplementedError
def update_gradients_full(self, dL_dK, X):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
"""Set the gradients of all parameters when doing sparse (M) inference."""
raise NotImplementedError
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
raise NotImplementedError
class Kernpart_stationary(Kernpart): class Kernpart_stationary(Kernpart):
def __init__(self, input_dim, lengthscale=None, ARD=False): def __init__(self, input_dim, lengthscale=None, ARD=False):

View file

@ -8,6 +8,7 @@ from kernpart import Kernpart
from ...util.linalg import tdot from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Linear(Kernpart): class Linear(Kernpart):
""" """
@ -43,8 +44,9 @@ class Linear(Kernpart):
else: else:
variances = np.ones(self.input_dim) variances = np.ones(self.input_dim)
self.variances = Param('variances', variances) self.variances = Param('variances', variances, Logexp())
self.add_parameters(self.variances) self.variances.gradient = np.zeros(self.variances.shape)
self.add_parameter(self.variances)
self.variances.add_observer(self, self.update_variance) self.variances.add_observer(self, self.update_variance)
# initialize cache # initialize cache
@ -57,42 +59,35 @@ class Linear(Kernpart):
def on_input_change(self, X): def on_input_change(self, X):
self._K_computations(X, None) self._K_computations(X, None)
def update_gradients_full(self, dL_dK, X):
#self.variances.gradient[:] = 0
self._param_grad_helper(dL_dK, X, self.variances.gradient)
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
self.variances.gradient = tmp.sum(0)
else:
self.variances.gradient = tmp.sum()
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
# psi0: # psi0:
tmp = dL_dpsi0[:, None] * self.mu2_S tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD: self.variances.gradient = tmp.sum(0) if self.ARD: self.variances.gradient[:] = tmp.sum(0)
else: self.variances.gradient = tmp.sum() else: self.variances.gradient[:] = tmp.sum()
#psi1 #psi1
self.dK_dtheta(dL_dpsi1, mu, Z, self.variances.gradient) self._param_grad_helper(dL_dpsi1, mu, Z, self.variances.gradient)
#psi2
#from psi2
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :]) tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0) if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
else: self.variances.gradient += tmp.sum() else: self.variances.gradient += tmp.sum()
#from Kmm #from Kmm
self._K_computations(Z, None) self._K_computations(Z, None)
self.dK_dtheta(dL_dKmm, Z, None, self.variances.gradient) self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
# def _get_params(self):
# return self.variances
#
# def _set_params(self, x):
# assert x.size == (self.num_params)
# self.variances = x
#def parameters_changed(self):
# self.variances2 = np.square(self.variances)
#
# def _get_param_names(self):
# if self.num_params == 1:
# return ['variance']
# else:
# return ['variance_%i' % i for i in range(self.variances.size)]
def K(self, X, X2, target): def K(self, X, X2, target):
if self.ARD: if self.ARD:
XX = X * np.sqrt(self.variances) XX = X * np.sqrt(self.variances)
@ -109,7 +104,7 @@ class Linear(Kernpart):
def Kdiag(self, X, target): def Kdiag(self, X, target):
np.add(target, np.sum(self.variances * np.square(X), -1), target) np.add(target, np.sum(self.variances * np.square(X), -1), target)
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
if self.ARD: if self.ARD:
if X2 is None: if X2 is None:
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)] [np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)]
@ -121,13 +116,6 @@ class Linear(Kernpart):
self._K_computations(X, X2) self._K_computations(X, X2)
target += np.sum(self._dot_product * dL_dK) target += np.sum(self._dot_product * dL_dK)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
target += tmp.sum(0)
else:
target += tmp.sum()
def gradients_X(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
if X2 is None: if X2 is None:
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
@ -145,14 +133,6 @@ class Linear(Kernpart):
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
target += np.sum(self.variances * self.mu2_S, 1) target += np.sum(self.variances * self.mu2_S, 1)
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD:
target += tmp.sum(0)
else:
target += tmp.sum()
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances) target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
target_S += dL_dpsi0[:, None] * self.variances target_S += dL_dpsi0[:, None] * self.variances
@ -161,10 +141,6 @@ class Linear(Kernpart):
"""the variance, it does nothing""" """the variance, it does nothing"""
self._psi1 = self.K(mu, Z, target) self._psi1 = self.K(mu, Z, target)
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
"""the variance, it does nothing"""
self.dK_dtheta(dL_dpsi1, mu, Z, target)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
"""Do nothing for S, it does not affect psi1""" """Do nothing for S, it does not affect psi1"""
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
@ -185,21 +161,13 @@ class Linear(Kernpart):
def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target): def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target):
tmp = np.zeros((mu.shape[0], Z.shape[0])) tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp) self.K(mu,Z,tmp)
self.dK_dtheta(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target) self._param_grad_helper(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target)
result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0) result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0)
if self.ARD: if self.ARD:
target += result.sum(0).sum(0).sum(0) target += result.sum(0).sum(0).sum(0)
else: else:
target += result.sum() target += result.sum()
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
if self.ARD:
target += tmp.sum(0).sum(0).sum(0)
else:
target += tmp.sum()
def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S): def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
tmp = np.zeros((mu.shape[0], Z.shape[0])) tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp) self.K(mu,Z,tmp)
@ -309,11 +277,11 @@ class Linear(Kernpart):
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)): if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
self._X = X.copy() self._X = X.copy()
if X2 is None: if X2 is None:
self._dot_product = tdot(X) self._dot_product = tdot(param_to_array(X))
self._X2 = None self._X2 = None
else: else:
self._X2 = X2.copy() self._X2 = X2.copy()
self._dot_product = np.dot(X, X2.T) self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
def _psi_computations(self, Z, mu, S): def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2 # here are the "statistics" for psi1 and psi2

View file

@ -77,7 +77,7 @@ class MLP(Kernpart):
self._K_diag_computations(X) self._K_diag_computations(X)
target+= self.variance*self._K_diag_dvar target+= self.variance*self._K_diag_dvar
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters.""" """Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2) self._K_computations(X, X2)
denom3 = self._K_denom*self._K_denom*self._K_denom denom3 = self._K_denom*self._K_denom*self._K_denom

View file

@ -112,7 +112,7 @@ class PeriodicMatern32(Kernpart):
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target) np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
@silence_errors @silence_errors
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)""" """derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)

View file

@ -114,7 +114,7 @@ class PeriodicMatern52(Kernpart):
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target) np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
@silence_errors @silence_errors
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)""" """derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)

View file

@ -110,7 +110,7 @@ class PeriodicExponential(Kernpart):
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target) np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
@silence_errors @silence_errors
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)""" """derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)

View file

@ -86,7 +86,7 @@ class POLY(Kernpart):
self._K_diag_computations(X) self._K_diag_computations(X)
target+= self.variance*self._K_diag_dvar target+= self.variance*self._K_diag_dvar
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters.""" """Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2) self._K_computations(X, X2)
base = self.variance*self.degree*self._K_poly_arg**(self.degree-1) base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)

View file

@ -54,15 +54,15 @@ class Prod(Kernpart):
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1]) self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2]) self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""Derivative of the covariance matrix with respect to the parameters.""" """Derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2) self._K_computations(X,X2)
if X2 is None: if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params]) self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:]) self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
else: else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params]) self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:]) self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
def Kdiag(self,X,target): def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""

View file

@ -41,15 +41,15 @@ class prod_orthogonal(Kernpart):
self._K_computations(X,X2) self._K_computations(X,X2)
target += self._K1 * self._K2 target += self._K1 * self._K2
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2) self._K_computations(X,X2)
if X2 is None: if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params]) self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:]) self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
else: else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params]) self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:]) self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
def Kdiag(self,X,target): def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""

View file

@ -52,7 +52,7 @@ class RationalQuadratic(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
target += self.variance target += self.variance
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
if X2 is None: X2 = X if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale) dist2 = np.square((X-X2.T)/self.lengthscale)

View file

@ -8,6 +8,7 @@ from kernpart import Kernpart
from ...util.linalg import tdot from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class RBF(Kernpart): class RBF(Kernpart):
""" """
@ -50,7 +51,7 @@ class RBF(Kernpart):
else: else:
lengthscale = np.ones(self.input_dim) lengthscale = np.ones(self.input_dim)
self.variance = Param('variance', variance) self.variance = Param('variance', variance, Logexp())
self.lengthscale = Param('lengthscale', lengthscale) self.lengthscale = Param('lengthscale', lengthscale)
self.lengthscale.add_observer(self, self.update_lengthscale) self.lengthscale.add_observer(self, self.update_lengthscale)
@ -141,7 +142,7 @@ class RBF(Kernpart):
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale) d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None] dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD: if not self.ARD:
self.lengthscale.gradeint = dpsi1_dlength.sum() self.lengthscale.gradient = dpsi1_dlength.sum()
else: else:
self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0) self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0)

View file

@ -97,7 +97,7 @@ class RBFInv(RBF):
# return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)] # return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)]
# TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale) # TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale)
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
self._K_computations(X, X2) self._K_computations(X, X2)
target[0] += np.sum(self._K_dvar * dL_dK) target[0] += np.sum(self._K_dvar * dL_dK)
if self.ARD: if self.ARD:

View file

@ -73,7 +73,7 @@ class RBFCos(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
np.add(target,self.variance,target) np.add(target,self.variance,target)
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
self._K_computations(X,X2) self._K_computations(X,X2)
target[0] += np.sum(dL_dK*self._dvar) target[0] += np.sum(dL_dK*self._dvar)
if self.ARD: if self.ARD:

View file

@ -50,7 +50,7 @@ class Spline(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
target += self.variance*X.flatten()**3/3. target += self.variance*X.flatten()**3/3.
def dK_dtheta(self,X,X2,target): def _param_grad_helper(self,X,X2,target):
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6. target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
def dKdiag_dtheta(self,X,target): def dKdiag_dtheta(self,X,target):

View file

@ -40,7 +40,7 @@ class Symmetric(Kernpart):
self.k.K(X,AX2,target) self.k.K(X,AX2,target)
self.k.K(AX,AX2,target) self.k.K(AX,AX2,target)
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
AX = np.dot(X,self.transform) AX = np.dot(X,self.transform)
if X2 is None: if X2 is None:
@ -48,10 +48,10 @@ class Symmetric(Kernpart):
ZX2 = AX ZX2 = AX
else: else:
AX2 = np.dot(X2, self.transform) AX2 = np.dot(X2, self.transform)
self.k.dK_dtheta(dL_dK,X,X2,target) self.k._param_grad_helper(dL_dK,X,X2,target)
self.k.dK_dtheta(dL_dK,AX,X2,target) self.k._param_grad_helper(dL_dK,AX,X2,target)
self.k.dK_dtheta(dL_dK,X,AX2,target) self.k._param_grad_helper(dL_dK,X,AX2,target)
self.k.dK_dtheta(dL_dK,AX,AX2,target) self.k._param_grad_helper(dL_dK,AX,AX2,target)
def gradients_X(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):

View file

@ -348,7 +348,7 @@ class spkern(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
self._weave_inline(self._Kdiag_code, X, target) self._weave_inline(self._Kdiag_code, X, target)
def dK_dtheta(self,partial,X,Z,target): def _param_grad_helper(self,partial,X,Z,target):
if Z is None: if Z is None:
self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial) self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
else: else:

View file

@ -3,7 +3,7 @@
#TODO #TODO
""" """
A lot of this code assumes that the link functio nis the identity. A lot of this code assumes that the link function is the identity.
I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity. I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity.

View file

@ -5,7 +5,7 @@ import unittest
import numpy as np import numpy as np
import GPy import GPy
verbose = False verbose = True
try: try:
import sympy import sympy

View file

@ -9,42 +9,44 @@ import numpy
import GPy import GPy
import itertools import itertools
from GPy.core import Model from GPy.core import Model
from GPy.core.parameterization.param import Param
from GPy.core.parameterization.transformations import Logexp
class PsiStatModel(Model): class PsiStatModel(Model):
def __init__(self, which, X, X_variance, Z, num_inducing, kernel): def __init__(self, which, X, X_variance, Z, num_inducing, kernel):
super(PsiStatModel, self).__init__(name='psi stat test')
self.which = which self.which = which
self.X = X self.X = Param("X", X)
self.X_variance = X_variance self.X_variance = Param('X_variance', X_variance, Logexp())
self.Z = Z self.Z = Param("Z", Z)
self.N, self.input_dim = X.shape self.N, self.input_dim = X.shape
self.num_inducing, input_dim = Z.shape self.num_inducing, input_dim = Z.shape
assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape) assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
self.kern = kernel self.kern = kernel
super(PsiStatModel, self).__init__()
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance) self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
def _get_param_names(self): self.add_parameters(self.X, self.X_variance, self.Z, self.kern)
Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.input_dim))]
Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.num_inducing), range(self.input_dim))]
return Xnames + Znames + self.kern._get_param_names()
def _get_params(self):
return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()])
def _set_params(self, x, save_old=True, save_count=0):
start, end = 0, self.X.size
self.X = x[start:end].reshape(self.N, self.input_dim)
start, end = end, end + self.X_variance.size
self.X_variance = x[start: end].reshape(self.N, self.input_dim)
start, end = end, end + self.Z.size
self.Z = x[start: end].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(x[end:])
def log_likelihood(self): def log_likelihood(self):
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum() return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
def _log_likelihood_gradients(self):
def parameters_changed(self):
psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance) psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
self.X.gradient = psimu
self.X_variance.gradient = psiS
#psimu, psiS = numpy.ones(self.N * self.input_dim), numpy.ones(self.N * self.input_dim) #psimu, psiS = numpy.ones(self.N * self.input_dim), numpy.ones(self.N * self.input_dim)
psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance) try: psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
except AttributeError: psiZ = numpy.zeros_like(self.Z)
self.Z.gradient = psiZ
#psiZ = numpy.ones(self.num_inducing * self.input_dim) #psiZ = numpy.ones(self.num_inducing * self.input_dim)
thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten() N,M = self.X.shape[0], self.Z.shape[0]
return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad)) dL_dpsi0, dL_dpsi1, dL_dpsi2 = numpy.zeros([N]), numpy.zeros([N,M]), numpy.zeros([N,M,M])
if self.which == 'psi0': dL_dpsi0 += 1
if self.which == 'psi1': dL_dpsi1 += 1
if self.which == 'psi2': dL_dpsi2 += 1
self.kern.update_gradients_variational(numpy.zeros([1,1]),
dL_dpsi0,
dL_dpsi1,
dL_dpsi2, self.X, self.X_variance, self.Z)
class DPsiStatTest(unittest.TestCase): class DPsiStatTest(unittest.TestCase):
input_dim = 5 input_dim = 5
@ -57,61 +59,66 @@ class DPsiStatTest(unittest.TestCase):
Y = X.dot(numpy.random.randn(input_dim, input_dim)) Y = X.dot(numpy.random.randn(input_dim, input_dim))
# kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)] # kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)]
kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim), kernels = [
GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim), GPy.kern.linear(input_dim),
GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)] GPy.kern.rbf(input_dim),
#GPy.kern.bias(input_dim),
#GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim),
#GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)
]
def testPsi0(self): def testPsi0(self):
for k in self.kernels: for k in self.kernels:
m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,\ m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,\
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() #m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts))) import ipdb;ipdb.set_trace()
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi1(self): def testPsi1(self):
for k in self.kernels: for k in self.kernels:
m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_lin(self): def testPsi2_lin(self):
k = self.kernels[0] k = self.kernels[0]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_lin_bia(self): def testPsi2_lin_bia(self):
k = self.kernels[3] k = self.kernels[3]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_rbf(self): def testPsi2_rbf(self):
k = self.kernels[1] k = self.kernels[1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_rbf_bia(self): def testPsi2_rbf_bia(self):
k = self.kernels[-1] k = self.kernels[-1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_bia(self): def testPsi2_bia(self):
k = self.kernels[2] k = self.kernels[2]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
if __name__ == "__main__": if __name__ == "__main__":