Compare commits

...

17 commits

Author SHA1 Message Date
Yuri Victorovich
38bffb154c Fix lint issues related to comparisons with None 2025-06-19 11:42:32 +02:00
Martin Bubel
26a7f21655
Merge pull request #1116 from SheffieldML/limit-pytest-version
limit upper pytest version in workflows
2025-06-19 11:41:27 +02:00
Martin Bubel
156f5b738f
Update .github/workflows/test-and-deploy.yml
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-06-19 11:32:30 +02:00
Martin Bubel
6ff1e68699
Update .github/workflows/test-and-deploy.yml
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-06-19 11:32:18 +02:00
Martin Bubel
bb4c5ab7e2
Update .github/workflows/test-and-deploy.yml
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-06-19 11:32:11 +02:00
Martin Bubel
7d269c772d limit upper pytest version in workflows 2025-06-17 09:50:51 +02:00
Martin Bubel
acdd03d3ed
Merge pull request #1108 from olamarre/bugfix/ep-pickling
fix: pickle and deep copy classification models with EP
2025-01-15 19:22:47 +01:00
Martin Bubel
b6efa8e548 update changelog 2025-01-15 19:17:45 +01:00
Olivier Lamarre
ad9c507cde fix: pickle and deep copy classification models with EP
- Removed unecessary code preventing pickling & copying
- Wrote a new test to check that pickling and copying works
2025-01-04 10:21:12 -05:00
Martin Bubel
9a31886085
Merge pull request #1104 from SheffieldML/1098-cant-instantiate-halft-prior
1098 cant instantiate halft prior
2024-10-28 23:32:56 +01:00
Martin Bubel
d7da7c617f remove dev-test-script 2024-10-28 23:28:18 +01:00
Martin Bubel
5a9e5e3cc4 update prior tests 2024-10-28 23:28:10 +01:00
Martin Bubel
200798eb48 draft: update prior __new__ functions 2024-10-28 23:06:31 +01:00
Martin Bubel
1fcb40845d
Merge pull request #1101 from janmayer/fix_invalid_escape_sequence
Fix invalid escape sequence
2024-10-27 13:51:56 +01:00
Martin Bubel
c04c9b328d update changelog 2024-10-27 13:51:41 +01:00
Jan Mayer
12d4c22ca7 Escape more strings 2024-10-26 09:36:11 +02:00
Jan Mayer
4fa858ee6d Fix invalid escape sequence 2024-10-21 20:15:11 +02:00
60 changed files with 563 additions and 426 deletions

View file

@ -39,7 +39,7 @@ jobs:
- name: Install test dependencies
run: |
pip install matplotlib
pip install pytest
pip install 'pytest<8'
- name: pytest
run: |
@ -71,7 +71,7 @@ jobs:
- name: Install test dependencies
run: |
pip install matplotlib
pip install pytest
pip install 'pytest<8'
- name: pytest
run: |
@ -103,7 +103,7 @@ jobs:
- name: Install test dependencies
run: |
pip install matplotlib
pip install pytest
pip install 'pytest<8'
- name: pytest
run: |

View file

@ -1,6 +1,11 @@
# Changelog
## Unreleased
* fix pickle and deep copy for classification models inheriting from EP #1108 [olamarre]
* update prior `__new__` methods #1098 [MartinBubel]
* fix invalid escape sequence #1011 [janmayer]
## v1.13.2 (2024-07-21)
* update string checks in initialization method for latent variable and put `empirical_samples` init-method on a deprecation path

View file

@ -194,7 +194,7 @@ class GP(Model):
# Make sure to name this variable and the predict functions will "just work"
# In maths the predictive variable is:
# K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
# W_{pp} := \texttt{Woodbury inv}
# W_{pp} := \\texttt{Woodbury inv}
# p := _predictive_variable
@property
@ -283,7 +283,7 @@ class GP(Model):
def log_likelihood(self):
"""
The log marginal likelihood of the model, :math:`p(\mathbf{y})`, this is the objective function of the model being optimised
The log marginal likelihood of the model, :math:`p(\\mathbf{y})`, this is the objective function of the model being optimised
"""
return self._log_marginal_likelihood
@ -296,9 +296,9 @@ class GP(Model):
diagonal of the covariance is returned.
.. math::
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
p(f*|X*, X, Y) = \\int^{\\inf}_{\\inf} p(f*|f,X*)p(f|X,Y) df
= N(f*| K_{x*x}(K_{xx} + \\Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \\Sigma)^{-1}K_{xx*}
\\Sigma := \\texttt{Likelihood.variance / Approximate likelihood covariance}
"""
mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov)
if self.mean_function is not None:
@ -702,7 +702,7 @@ class GP(Model):
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
:param x_test: test locations (x_{*})
:type x_test: (Nx1) array
@ -718,7 +718,7 @@ class GP(Model):
Calculation of the log predictive density by sampling
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
:param x_test: test locations (x_{*})
:type x_test: (Nx1) array
@ -734,24 +734,24 @@ class GP(Model):
def _raw_posterior_covariance_between_points(self, X1, X2):
"""
Computes the posterior covariance between points. Does not account for
Computes the posterior covariance between points. Does not account for
normalization or likelihood
:param X1: some input observations
:param X2: other input observations
:returns:
:returns:
cov: raw posterior covariance: k(X1,X2) - k(X1,X) G^{-1} K(X,X2)
"""
return self.posterior.covariance_between_points(self.kern, self.X, X1, X2)
def posterior_covariance_between_points(self, X1, X2, Y_metadata=None,
likelihood=None,
def posterior_covariance_between_points(self, X1, X2, Y_metadata=None,
likelihood=None,
include_likelihood=True):
"""
Computes the posterior covariance between points. Includes likelihood
variance as well as normalization so that evaluation at (x,x) is consistent
Computes the posterior covariance between points. Includes likelihood
variance as well as normalization so that evaluation at (x,x) is consistent
with model.predict
:param X1: some input observations
@ -762,8 +762,8 @@ class GP(Model):
the predicted underlying latent function f.
:type include_likelihood: bool
:returns:
cov: posterior covariance, a Numpy array, Nnew x Nnew if
:returns:
cov: posterior covariance, a Numpy array, Nnew x Nnew if
self.output_dim == 1, and Nnew x Nnew x self.output_dim otherwise.
"""
@ -774,7 +774,7 @@ class GP(Model):
mean, _ = self._raw_predict(X1, full_cov=True)
if likelihood is None:
likelihood = self.likelihood
_, cov = likelihood.predictive_values(mean, cov, full_cov=True,
_, cov = likelihood.predictive_values(mean, cov, full_cov=True,
Y_metadata=Y_metadata)
if self.normalizer is not None:

View file

@ -580,7 +580,11 @@ class DGPLVM(Prior):
domain = _REAL
def __new__(cls, sigma2, lbl, x_shape):
return super(Prior, cls).__new__(cls, sigma2, lbl, x_shape)
newfunc = super(Prior, cls).__new__
if newfunc is object.__new__:
return newfunc(cls)
else:
return newfunc(cls, sigma2, lbl, x_shape)
def __init__(self, sigma2, lbl, x_shape):
self.sigma2 = sigma2
@ -1275,7 +1279,12 @@ class HalfT(Prior):
for instance in cls._instances:
if instance().A == A and instance().nu == nu:
return instance()
o = super(Prior, cls).__new__(cls, A, nu)
newfunc = super(Prior, cls).__new__
if newfunc is object.__new__:
o = newfunc(cls)
else:
o = newfunc(cls, A, nu)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()

View file

@ -38,7 +38,7 @@ class SparseGP_MPI(SparseGP):
mean_function=None, inference_method=None, name='sparse gp',
Y_metadata=None, mpi_comm=None, normalizer=False):
self._IN_OPTIMIZATION_ = False
if mpi_comm != None:
if mpi_comm is not None:
if inference_method is None:
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
else:
@ -52,7 +52,7 @@ class SparseGP_MPI(SparseGP):
self.mpi_comm = mpi_comm
# Manage the data (Y) division
if mpi_comm != None:
if mpi_comm is not None:
from ..util.parallel import divide_data
N_start, N_end, N_list = divide_data(Y.shape[0], mpi_comm.rank, mpi_comm.size)
self.N_range = (N_start, N_end)
@ -65,7 +65,7 @@ class SparseGP_MPI(SparseGP):
def __getstate__(self):
dc = super(SparseGP_MPI, self).__getstate__()
dc['mpi_comm'] = None
if self.mpi_comm != None:
if self.mpi_comm is not None:
del dc['N_range']
del dc['N_list']
del dc['Y_local']
@ -81,7 +81,7 @@ class SparseGP_MPI(SparseGP):
@SparseGP.optimizer_array.setter
def optimizer_array(self, p):
if self.mpi_comm != None:
if self.mpi_comm is not None:
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
self.mpi_comm.Bcast(np.int32(1),root=0)
self.mpi_comm.Bcast(p, root=0)

View file

@ -44,7 +44,7 @@ class Symbolic_core():
self._set_derivatives(derivatives)
self._set_parameters(parameters)
# Convert the expressions to a list for common sub expression elimination
# We should find the following type of expressions: 'function', 'derivative', 'second_derivative', 'third_derivative'.
# We should find the following type of expressions: 'function', 'derivative', 'second_derivative', 'third_derivative'.
self.update_expression_list()
# Apply any global stabilisation operations to expressions.
@ -86,7 +86,7 @@ class Symbolic_core():
# object except as cached. For covariance functions this is X
# and Z, for likelihoods F and for mapping functions X.
self.cacheable_vars = [] # list of everything that's cacheable
for var in cacheable:
for var in cacheable:
self.variables[var] = [e for e in vars if e.name.split('_')[0]==var.lower()]
self.cacheable_vars += self.variables[var]
for var in cacheable:
@ -105,7 +105,7 @@ class Symbolic_core():
for derivative in derivatives:
derivative_arguments += self.variables[derivative]
# Do symbolic work to compute derivatives.
# Do symbolic work to compute derivatives.
for key, func in self.expressions.items():
# if func['function'].is_Matrix:
# rows = func['function'].shape[0]
@ -126,7 +126,7 @@ class Symbolic_core():
if theta.name in parameters:
val = parameters[theta.name]
# Add parameter.
self.link_parameters(Param(theta.name, val, None))
#self._set_attribute(theta.name, )
@ -174,7 +174,7 @@ class Symbolic_core():
code = self.code[function]['derivative'][theta.name]
gradient[theta.name] = (partial*eval(code, self.namespace)).sum()
return gradient
def eval_gradients_X(self, function, partial, **kwargs):
if 'X' in kwargs:
gradients_X = np.zeros_like(kwargs['X'])
@ -194,7 +194,7 @@ class Symbolic_core():
for variable, code in self.variable_sort(self.code['parameters_changed']):
lcode += self._print_code(variable) + ' = ' + self._print_code(code) + '\n'
return lcode
def code_update_cache(self):
lcode = ''
for var in self.cacheable:
@ -208,7 +208,7 @@ class Symbolic_core():
for i, theta in enumerate(self.variables[var]):
lcode+= "\t" + var + '= np.atleast_2d(' + var + ')\n'
lcode+= "\t" + self._print_code(theta.name) + ' = ' + var + '[:, ' + str(i) + "]" + reorder + "\n"
for variable, code in self.variable_sort(self.code['update_cache']):
lcode+= self._print_code(variable) + ' = ' + self._print_code(code) + "\n"
@ -250,7 +250,7 @@ class Symbolic_core():
"""Make sure namespace gets updated when setting attributes."""
setattr(self, name, value)
self.namespace.update({name: getattr(self, name)})
def update_expression_list(self):
"""Extract a list of expressions from the dictionary of expressions."""
@ -260,9 +260,9 @@ class Symbolic_core():
for fname, fexpressions in self.expressions.items():
for type, texpressions in fexpressions.items():
if type == 'function':
self.expression_list.append(texpressions)
self.expression_list.append(texpressions)
self.expression_keys.append([fname, type])
self.expression_order.append(1)
self.expression_order.append(1)
elif type[-10:] == 'derivative':
for dtype, expression in texpressions.items():
self.expression_list.append(expression)
@ -274,9 +274,9 @@ class Symbolic_core():
elif type[:-10] == 'third_':
self.expression_order.append(5) #sym.count_ops(self.expressions[type][dtype]))
else:
self.expression_list.append(fexpressions[type])
self.expression_list.append(fexpressions[type])
self.expression_keys.append([fname, type])
self.expression_order.append(2)
self.expression_order.append(2)
# This step may be unecessary.
# Not 100% sure if the sub expression elimination is order sensitive. This step orders the list with the 'function' code first and derivatives after.
@ -313,7 +313,7 @@ class Symbolic_core():
sym_var = sym.var(cache_prefix + str(i))
self.variables[cache_prefix].append(sym_var)
replace_dict[expr.name] = sym_var
for i, expr in enumerate(params_change_list):
sym_var = sym.var(sub_prefix + str(i))
self.variables[sub_prefix].append(sym_var)
@ -329,7 +329,7 @@ class Symbolic_core():
for keys in self.expression_keys:
for replace, void in common_sub_expressions:
setInDict(self.expressions, keys, getFromDict(self.expressions, keys).subs(replace, replace_dict[replace.name]))
self.expressions['parameters_changed'] = {}
self.expressions['update_cache'] = {}
for var, expr in common_sub_expressions:
@ -339,7 +339,7 @@ class Symbolic_core():
self.expressions['update_cache'][replace_dict[var.name].name] = expr
else:
self.expressions['parameters_changed'][replace_dict[var.name].name] = expr
def _gen_code(self):
"""Generate code for the list of expressions provided using the common sub-expression eliminator to separate out portions that are computed multiple times."""
@ -357,8 +357,8 @@ class Symbolic_core():
return code
self.code = match_key(self.expressions)
def _expr2code(self, arg_list, expr):
"""Convert the given symbolic expression into code."""
code = lambdastr(arg_list, expr)
@ -379,7 +379,7 @@ class Symbolic_core():
def _display_expression(self, keys, user_substitutes={}):
"""Helper function for human friendly display of the symbolic components."""
# Create some pretty maths symbols for the display.
sigma, alpha, nu, omega, l, variance = sym.var('\sigma, \alpha, \nu, \omega, \ell, \sigma^2')
sigma, alpha, nu, omega, l, variance = sym.var(r'\sigma, \alpha, \nu, \omega, \ell, \sigma^2')
substitutes = {'scale': sigma, 'shape': alpha, 'lengthscale': l, 'variance': variance}
substitutes.update(user_substitutes)
@ -416,5 +416,5 @@ class Symbolic_core():
return int(digits[0])
else:
return x[0]
return sorted(var_dict.items(), key=sort_key, reverse=reverse)

View file

@ -134,10 +134,10 @@ class posteriorParams(posteriorParamsBase):
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
L = jitchol(B)
V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \Sigma^(-1))^(-1)
Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \\Sigma^(-1))^(-1)
aux_alpha , _ = dpotrs(L, tau_tilde_root * (np.dot(K, ga_approx.v) + mean_prior), lower=1)
alpha = ga_approx.v - tau_tilde_root * aux_alpha #(K + Sigma^(\tilde))^(-1) (/mu^(/tilde) - /mu_p)
alpha = ga_approx.v - tau_tilde_root * aux_alpha #(K + Sigma^(\\tilde))^(-1) (/mu^(/tilde) - /mu_p)
mu = np.dot(K, alpha) + mean_prior
return posteriorParams(mu=mu, Sigma=Sigma, L=L)
@ -151,8 +151,8 @@ class posteriorParamsDTC(posteriorParamsBase):
DSYR(LLT,Kmn[:,i].copy(),delta_tau)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
self.Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\top)^(-1)) K_mn
si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i]
self.Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\\top)^(-1)) K_mn
si = np.sum(V.T*V[:,i],-1) #(V V^\\top)[:,i]
self.mu += (delta_v-delta_tau*self.mu[i])*si
#mu = np.dot(Sigma, v_tilde)
@ -229,24 +229,17 @@ class EPBase(object):
v_diff = np.mean(np.square(ga_approx.v-self.ga_approx_old.v))
return ((tau_diff < self.epsilon) and (v_diff < self.epsilon))
def __setstate__(self, state):
super(EPBase, self).__setstate__(state[0])
self.epsilon, self.eta, self.delta = state[1]
self.reset()
def __getstate__(self):
return [super(EPBase, self).__getstate__() , [self.epsilon, self.eta, self.delta]]
def _save_to_input_dict(self):
input_dict = super(EPBase, self)._save_to_input_dict()
input_dict["epsilon"]=self.epsilon
input_dict["eta"]=self.eta
input_dict["delta"]=self.delta
input_dict["always_reset"]=self.always_reset
input_dict["max_iters"]=self.max_iters
input_dict["ep_mode"]=self.ep_mode
input_dict["parallel_updates"]=self.parallel_updates
input_dict["loading"]=True
input_dict = {
"epsilon": self.epsilon,
"eta": self.eta,
"delta": self.delta,
"always_reset": self.always_reset,
"max_iters": self.max_iters,
"ep_mode": self.ep_mode,
"parallel_updates": self.parallel_updates,
"loading": True
}
return input_dict
class EP(EPBase, ExactGaussianInference):
@ -391,11 +384,11 @@ class EP(EPBase, ExactGaussianInference):
aux_alpha , _ = dpotrs(post_params.L, tau_tilde_root * (np.dot(K, ga_approx.v) + mean_prior), lower=1)
alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) (/mu^(/tilde) - /mu_p)
alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\\tilde))^(-1) (/mu^(/tilde) - /mu_p)
LWi, _ = dtrtrs(post_params.L, np.diag(tau_tilde_root), lower=1)
Wi = np.dot(LWi.T,LWi)
symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
symmetrify(Wi) #(K + Sigma^(\\tilde))^(-1)
dL_dK = 0.5 * (tdot(alpha) - Wi)
dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
@ -530,7 +523,7 @@ class EPDTC(EPBase, VarDTC):
#initial values - Gaussian factors
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
LLT0 = Kmm.copy()
Lm = jitchol(LLT0) #K_m = L_m L_m^\top
Lm = jitchol(LLT0) #K_m = L_m L_m^\\top
Vm,info = dtrtrs(Lm, Kmn,lower=1)
# Lmi = dtrtri(Lm)
# Kmmi = np.dot(Lmi.T,Lmi)

View file

@ -27,7 +27,7 @@ class Laplace(LatentFunctionInference):
"""
Laplace Approximation
Find the moments \hat{f} and the hessian at this point
Find the moments \\hat{f} and the hessian at this point
(using Newton-Raphson) of the unnormalised posterior
"""

View file

@ -8,14 +8,14 @@ log_2_pi = np.log(2*np.pi)
class PEP(LatentFunctionInference):
'''
Sparse Gaussian processes using Power-Expectation Propagation
for regression: alpha \approx 0 gives VarDTC and alpha = 1 gives FITC
Reference: A Unifying Framework for Sparse Gaussian Process Approximation using
for regression: alpha \\approx 0 gives VarDTC and alpha = 1 gives FITC
Reference: A Unifying Framework for Sparse Gaussian Process Approximation using
Power Expectation Propagation, https://arxiv.org/abs/1605.07066
'''
const_jitter = 1e-6
def __init__(self, alpha):
super(PEP, self).__init__()
self.alpha = alpha
@ -69,7 +69,7 @@ class PEP(LatentFunctionInference):
#compute dL_dR
Uv = np.dot(U, v)
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - (1.0+alpha_const_term)/beta_star + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) \
+ np.sum(np.square(Uv), 1))*beta_star**2
+ np.sum(np.square(Uv), 1))*beta_star**2
# Compute dL_dKmm
vvT_P = tdot(v.reshape(-1,1)) + P

View file

@ -82,7 +82,7 @@ class Posterior(object):
Posterior mean
$$
K_{xx}v
v := \texttt{Woodbury vector}
v := \\texttt{Woodbury vector}
$$
"""
if self._mean is None:
@ -95,7 +95,7 @@ class Posterior(object):
Posterior covariance
$$
K_{xx} - K_{xx}W_{xx}^{-1}K_{xx}
W_{xx} := \texttt{Woodbury inv}
W_{xx} := \\texttt{Woodbury inv}
$$
"""
if self._covariance is None:
@ -146,8 +146,8 @@ class Posterior(object):
"""
return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix
$$
L_{W}L_{W}^{\top} = W^{-1}
W^{-1} := \texttt{Woodbury inv}
L_{W}L_{W}^{\\top} = W^{-1}
W^{-1} := \\texttt{Woodbury inv}
$$
"""
if self._woodbury_chol is None:
@ -178,8 +178,8 @@ class Posterior(object):
"""
The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
$$
(K_{xx} + \Sigma_{xx})^{-1}
\Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
(K_{xx} + \\Sigma_{xx})^{-1}
\\Sigma_{xx} := \\texttt{Likelihood.variance / Approximate likelihood covariance}
$$
"""
if self._woodbury_inv is None:
@ -200,8 +200,8 @@ class Posterior(object):
"""
Woodbury vector in the gaussian likelihood case only is defined as
$$
(K_{xx} + \Sigma)^{-1}Y
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
(K_{xx} + \\Sigma)^{-1}Y
\\Sigma := \\texttt{Likelihood.variance / Approximate likelihood covariance}
$$
"""
if self._woodbury_vector is None:

View file

@ -118,7 +118,7 @@ class VarDTC_minibatch(LatentFunctionInference):
if not het_noise:
YRY_full = trYYT*beta
if self.mpi_comm != None:
if self.mpi_comm is not None:
from mpi4py import MPI
psi0_all = np.array(psi0_full)
psi1Y_all = psi1Y_full.copy()
@ -142,7 +142,7 @@ class VarDTC_minibatch(LatentFunctionInference):
num_data, output_dim = Y.shape
input_dim = Z.shape[0]
if self.mpi_comm != None:
if self.mpi_comm is not None:
from mpi4py import MPI
num_data_all = np.array(num_data,dtype=np.int32)
self.mpi_comm.Allreduce([np.int32(num_data), MPI.INT], [num_data_all, MPI.INT])
@ -384,7 +384,7 @@ def update_gradients(model, mpi_comm=None):
dL_dthetaL += grad_dict['dL_dthetaL']
# Gather the gradients from multiple MPI nodes
if mpi_comm != None:
if mpi_comm is not None:
from mpi4py import MPI
if het_noise:
raise "het_noise not implemented!"
@ -407,7 +407,7 @@ def update_gradients(model, mpi_comm=None):
# update for the KL divergence
model.variational_prior.update_gradients_KL(X)
if mpi_comm != None:
if mpi_comm is not None:
from mpi4py import MPI
KL_div_all = np.array(KL_div)
mpi_comm.Allreduce([np.float64(KL_div), MPI.DOUBLE], [KL_div_all, MPI.DOUBLE])
@ -467,7 +467,7 @@ def update_gradients_sparsegp(model, mpi_comm=None):
dL_dthetaL += grad_dict['dL_dthetaL']
# Gather the gradients from multiple MPI nodes
if mpi_comm != None:
if mpi_comm is not None:
from mpi4py import MPI
if het_noise:
raise "het_noise not implemented!"

View file

@ -25,12 +25,12 @@ class Coregionalize(Kern):
This covariance has the form:
.. math::
\mathbf{B} = \mathbf{W}\mathbf{W}^\intercal + \mathrm{diag}(kappa)
\\mathbf{B} = \\mathbf{W}\\mathbf{W}^\\intercal + \\mathrm{diag}(kappa)
An intrinsic/linear coregionalization covariance function of the form:
.. math::
k_2(x, y)=\mathbf{B} k(x, y)
k_2(x, y)=\\mathbf{B} k(x, y)
it is obtained as the tensor product between a covariance function
k(x, y) and B.

View file

@ -15,7 +15,7 @@ class EQ_ODE1(Kern):
This outputs of this kernel have the form
.. math::
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} u_i(t-\delta_j) - d_jy_j(t)
\\frac{\\text{d}y_j}{\\text{d}t} = \\sum_{i=1}^R w_{j,i} u_i(t-\\delta_j) - d_jy_j(t)
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`u_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.

View file

@ -15,7 +15,7 @@ class EQ_ODE2(Kern):
This outputs of this kernel have the form
.. math::
\frac{\text{d}^2y_j(t)}{\text{d}^2t} + C_j\frac{\text{d}y_j(t)}{\text{d}t} + B_jy_j(t) = \sum_{i=1}^R w_{j,i} u_i(t)
\\frac{\\text{d}^2y_j(t)}{\\text{d}^2t} + C_j\\frac{\\text{d}y_j(t)}{\\text{d}t} + B_jy_j(t) = \\sum_{i=1}^R w_{j,i} u_i(t)
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.

View file

@ -45,7 +45,7 @@ class GridRBF(GridKern):
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
"""
_support_GPU = True

View file

@ -146,25 +146,25 @@ class Kern(Parameterized):
def psi0(self, Z, variational_posterior):
"""
.. math::
\psi_0 = \sum_{i=0}^{n}E_{q(X)}[k(X_i, X_i)]
\\psi_0 = \\sum_{i=0}^{n}E_{q(X)}[k(X_i, X_i)]
"""
return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
def psi1(self, Z, variational_posterior):
"""
.. math::
\psi_1^{n,m} = E_{q(X)}[k(X_n, Z_m)]
\\psi_1^{n,m} = E_{q(X)}[k(X_n, Z_m)]
"""
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior):
"""
.. math::
\psi_2^{m,m'} = \sum_{i=0}^{n}E_{q(X)}[ k(Z_m, X_i) k(X_i, Z_{m'})]
\\psi_2^{m,m'} = \\sum_{i=0}^{n}E_{q(X)}[ k(Z_m, X_i) k(X_i, Z_{m'})]
"""
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
def psi2n(self, Z, variational_posterior):
"""
.. math::
\psi_2^{n,m,m'} = E_{q(X)}[ k(Z_m, X_n) k(X_n, Z_{m'})]
\\psi_2^{n,m,m'} = E_{q(X)}[ k(Z_m, X_n) k(X_n, Z_{m'})]
Thus, we do not sum out n, compared to psi2
"""
@ -173,7 +173,7 @@ class Kern(Parameterized):
"""
.. math::
\\frac{\partial L}{\partial X} = \\frac{\partial L}{\partial K}\\frac{\partial K}{\partial X}
\\frac{\\partial L}{\\partial X} = \\frac{\\partial L}{\\partial K}\\frac{\\partial K}{\\partial X}
"""
raise NotImplementedError
def gradients_X_X2(self, dL_dK, X, X2):
@ -182,7 +182,7 @@ class Kern(Parameterized):
"""
.. math::
\\frac{\partial^2 L}{\partial X\partial X_2} = \\frac{\partial L}{\partial K}\\frac{\partial^2 K}{\partial X\partial X_2}
\\frac{\\partial^2 L}{\\partial X\\partial X_2} = \\frac{\\partial L}{\\partial K}\\frac{\\partial^2 K}{\\partial X\\partial X_2}
"""
raise NotImplementedError("This is the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_XX_diag(self, dL_dKdiag, X, cov=True):
@ -203,7 +203,7 @@ class Kern(Parameterized):
def update_gradients_full(self, dL_dK, X, X2):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def reset_gradients(self):
raise NotImplementedError
@ -216,9 +216,9 @@ class Kern(Parameterized):
.. math::
\\frac{\partial L}{\partial \\theta_i} & = \\frac{\partial L}{\partial \psi_0}\\frac{\partial \psi_0}{\partial \\theta_i}\\
& \quad + \\frac{\partial L}{\partial \psi_1}\\frac{\partial \psi_1}{\partial \\theta_i}\\
& \quad + \\frac{\partial L}{\partial \psi_2}\\frac{\partial \psi_2}{\partial \\theta_i}
\\frac{\\partial L}{\\partial \\theta_i} & = \\frac{\\partial L}{\\partial \\psi_0}\\frac{\\partial \\psi_0}{\\partial \\theta_i}\\
& \\quad + \\frac{\\partial L}{\\partial \\psi_1}\\frac{\\partial \\psi_1}{\\partial \\theta_i}\\
& \\quad + \\frac{\\partial L}{\\partial \\psi_2}\\frac{\\partial \\psi_2}{\\partial \\theta_i}
Thus, we push the different derivatives through the gradients of the psi
statistics. Be sure to set the gradients for all kernel

View file

@ -16,15 +16,15 @@ class Linear(Kern):
.. math::
k(x,y) = \sum_{i=1}^{\\text{input_dim}} \sigma^2_i x_iy_i
k(x,y) = \\sum_{i=1}^{\\text{input_dim}} \\sigma^2_i x_iy_i
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:param variances: the vector of variances :math:`\\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there
is only one variance parameter)
:param ARD: Auto Relevance Determination. If False, the kernel has only one
variance parameter \sigma^2, otherwise there is one variance
variance parameter \\sigma^2, otherwise there is one variance
parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
@ -121,7 +121,7 @@ class Linear(Kern):
the returned array is of shape [NxNxQxQ].
..math:
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
\\frac{\\partial^2 K}{\\partial X2 ^2} = - \\frac{\\partial^2 K}{\\partial X\\partial X2}
..returns:
dL2_dXdX2: [NxMxQxQ] for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)

View file

@ -20,12 +20,12 @@ class MLP(Kern):
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:param variance: the variance :math:`\\sigma^2`
:type variance: float
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\\sigma^2_w`
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
:param bias_variance: the variance of the prior over bias parameters :math:`\\sigma^2_b`
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \\sigma^2_w), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object

View file

@ -16,7 +16,7 @@ class RBF(Stationary):
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
"""
_support_GPU = True

View file

@ -12,48 +12,48 @@ import numpy as np
class sde_Brownian(Brownian):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sigma^2 min(x,y)
k(x,y) = \\sigma^2 min(x,y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((1.0,),(0,)) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,0),) )
Pinf = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
#P0 = Pinf.copy()
P0 = np.zeros((2,2))
#P0 = Pinf.copy()
P0 = np.zeros((2,2))
#Pinf = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.ones( (1,1,1) )
dPinf = np.zeros((2,2,1))
dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
#dP0 = dPinf.copy()
#dP0 = dPinf.copy()
dP0 = np.zeros((2,2,1))
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -11,15 +11,15 @@ import numpy as np
class sde_Linear(Linear):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
k(x,y) = \\sum_{i=1}^{input dim} \\sigma^2_i x_iy_i
"""
def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
@ -27,40 +27,40 @@ class sde_Linear(Linear):
Modify the init method, because one extra parameter is required. X - points
on the X axis.
"""
super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
self.t0 = np.min(X)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variances.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
t0 = float(self.t0)
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((0,),(1.0,)) )
Qc = np.zeros((1,1))
H = np.array( ((1.0,0),) )
Pinf = np.zeros((2,2))
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.zeros( (1,1,1) )
dPinf = np.zeros((2,2,1))
dP0 = np.zeros((2,2,1))
dP0[:,:,0] = P0 / variance
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -11,15 +11,15 @@ import numpy as np
class sde_Matern32(Matern32):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 3/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \\exp(- \\sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
@ -27,59 +27,59 @@ class sde_Matern32(Matern32):
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array(((0, 1.0), (-foo**2, -2*foo)))
foo = np.sqrt(3.)/lengthscale
F = np.array(((0, 1.0), (-foo**2, -2*foo)))
L = np.array(( (0,), (1.0,) ))
Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),))
H = np.array(((1.0, 0),))
Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),))
H = np.array(((1.0, 0),))
Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2))))
P0 = Pinf.copy()
# Allocate space for the derivatives
# Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros((2,2))
dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2)))
dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3))
dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance))
dPinfvariance = np.array(((1,0),(0,3./lengthscale**2)))
dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros((2,2))
dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2)))
dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3))
dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance))
dPinfvariance = np.array(((1,0),(0,3./lengthscale**2)))
dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Matern52(Matern52):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
k(r) = \\sigma^2 (1 + \\sqrt{5} r + \\frac{5}{3}r^2) \\exp(- \\sqrt{5} r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
@ -87,51 +87,51 @@ class sde_Matern52(Matern52):
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
lamda = np.sqrt(5.0)/lengthscale
kappa = 5.0/3.0*variance/lengthscale**2
kappa = 5.0/3.0*variance/lengthscale**2
F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
L = np.array(((0,),(0,),(1,)))
Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
H = np.array(((1,0,0),))
H = np.array(((1,0,0),))
Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty((3,3,2))
dQc = np.empty((1,1,2))
# Allocate space for the derivatives
dF = np.empty((3,3,2))
dQc = np.empty((1,1,2))
dPinf = np.empty((3,3,2))
# The partial derivatives
# The partial derivatives
dFvariance = np.zeros((3,3))
dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
dPinf_variance = Pinf/variance
kappa2 = -2.0*kappa/lengthscale
dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
0,-100*variance/lengthscale**5)))
# Combine the derivatives
dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
0,-100*variance/lengthscale**5)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -24,8 +24,8 @@ class sde_StdPeriodic(StdPeriodic):
.. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
k(x,y) = \\theta_1 \\exp \\left[ - \\frac{1}{2} {}\\sum_{i=1}^{input\\_dim}
\\left( \\frac{\\sin(\\frac{\\pi}{\\lambda_i} (x_i - y_i) )}{l_i} \\right)^2 \\right] }
"""
@ -177,7 +177,7 @@ def seriescoeff(m=6, lengthScale=1.0, magnSigma2=1.0, true_covariance=False):
Calculate the coefficients q_j^2 for the covariance function
approximation:
k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
k(\\tau) = \\sum_{j=0}^{+\\infty} q_j^2 \\cos(j\\omega_0 \\tau)
Reference is:

View file

@ -12,63 +12,63 @@ import numpy as np
class sde_White(White):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
White kernel:
.. math::
k(x,y) = \alpha*\delta(x-y)
k(x,y) = \\alpha*\\delta(x-y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((-np.inf,),) )
L = np.array( ((1.0,),) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,),) )
Pinf = np.array( ((variance,),) )
P0 = Pinf.copy()
P0 = Pinf.copy()
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dQc[:,:,0] = np.array( ((1.0,),) )
dPinf = np.zeros((1,1,1))
dPinf[:,:,0] = np.array( ((1.0,),) )
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Bias(Bias):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Bias kernel:
.. math::
k(x,y) = \alpha
k(x,y) = \\alpha
"""
def sde_update_gradient_full(self, gradients):
@ -76,28 +76,28 @@ class sde_Bias(Bias):
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((0.0,),))
L = np.array( ((1.0,),))
Qc = np.zeros((1,1))
H = np.array( ((1.0,),))
Pinf = np.zeros((1,1))
P0 = np.array( ((variance,),) )
P0 = np.array( ((variance,),) )
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dPinf = np.zeros((1,1,1))
dP0 = np.zeros((1,1,1))
dP0[:,:,0] = np.array( ((1.0,),) )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -29,7 +29,7 @@ class sde_RBF(RBF):
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
"""
@ -102,7 +102,7 @@ class sde_RBF(RBF):
eps = 1e-12
if (float(Qc) > 1.0 / eps) or (float(Qc) < eps):
warnings.warn(
"""sde_RBF kernel: the noise variance Qc is either very large or very small.
"""sde_RBF kernel: the noise variance Qc is either very large or very small.
It influece conditioning of P_inf: {0:e}""".format(
float(Qc)
)
@ -204,7 +204,7 @@ class sde_Exponential(Exponential):
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
"""
@ -259,7 +259,7 @@ class sde_RatQuad(RatQuad):
.. math::
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
k(r) = \\sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \\alpha} \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
"""

View file

@ -24,19 +24,19 @@ class StdPeriodic(Kern):
.. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} \sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{T_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
k(x,y) = \\theta_1 \\exp \\left[ - \\frac{1}{2} \\sum_{i=1}^{input\\_dim}
\\left( \\frac{\\sin(\\frac{\\pi}{T_i} (x_i - y_i) )}{l_i} \\right)^2 \\right] }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\theta_1` in the formula above
:param variance: the variance :math:`\\theta_1` in the formula above
:type variance: float
:param period: the vector of periods :math:`\T_i`. If None then 1.0 is assumed.
:param period: the vector of periods :math:`\\T_i`. If None then 1.0 is assumed.
:type period: array or list of the appropriate size (or float if there is only one period parameter)
:param lengthscale: the vector of lengthscale :math:`\l_i`. If None then 1.0 is assumed.
:param lengthscale: the vector of lengthscale :math:`\\l_i`. If None then 1.0 is assumed.
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD1: Auto Relevance Determination with respect to period.
If equal to "False" one single period parameter :math:`\T_i` for
If equal to "False" one single period parameter :math:`\\T_i` for
each dimension is assumed, otherwise there is one lengthscale
parameter per dimension.
:type ARD1: Boolean
@ -177,7 +177,7 @@ class StdPeriodic(Kern):
Returns only diagonal elements.
"""
return np.zeros(X.shape[0])
def dK2_dXdX2(self, X, X2, dimX, dimX2):
"""
Compute the second derivative of K with respect to:
@ -578,9 +578,9 @@ class StdPeriodic(Kern):
X2 = X
dX = -np.pi*((dL_dK*K)[:,:,None]*np.sin(2*np.pi/self.period*(X[:,None,:] - X2[None,:,:]))/(2.*np.square(self.lengthscale)*self.period)).sum(1)
return dX
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def input_sensitivity(self, summarize=True):
return self.variance*np.ones(self.input_dim)/self.lengthscale**2

View file

@ -35,7 +35,7 @@ class Stationary(Kern):
.. math::
r(x, x') = \\sqrt{ \\sum_{q=1}^Q \\frac{(x_q - x'_q)^2}{\ell_q^2} }.
r(x, x') = \\sqrt{ \\sum_{q=1}^Q \\frac{(x_q - x'_q)^2}{\\ell_q^2} }.
By default, there's only one lengthscale: seaprate lengthscales for each
dimension can be enables by setting ARD=True.
@ -153,7 +153,7 @@ class Stationary(Kern):
Efficiently compute the scaled distance, r.
..math::
r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
r = \\sqrt( \\sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
Note that if thre is only one lengthscale, l comes outside the sum. In
this case we compute the unscaled distance first (in a separate
@ -259,7 +259,7 @@ class Stationary(Kern):
the returned array is of shape [NxNxQxQ].
..math:
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
\\frac{\\partial^2 K}{\\partial X2 ^2} = - \\frac{\\partial^2 K}{\\partial X\\partial X2}
..returns:
dL2_dXdX2: [NxMxQxQ] in the cov=True case, or [NxMxQ] in the cov=False case,
@ -295,7 +295,7 @@ class Stationary(Kern):
Given the derivative of the objective dL_dK, compute the second derivative of K wrt X:
..math:
\frac{\partial^2 K}{\partial X\partial X}
\\frac{\\partial^2 K}{\\partial X\\partial X}
..returns:
dL2_dXdX: [NxQxQ]
@ -423,7 +423,7 @@ class OU(Stationary):
.. math::
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
k(r) = \\sigma^2 \\exp(- r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
"""
@ -460,7 +460,7 @@ class Matern32(Stationary):
.. math::
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \\exp(- \\sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
"""
@ -559,7 +559,7 @@ class Matern52(Stationary):
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
k(r) = \\sigma^2 (1 + \\sqrt{5} r + \\frac53 r^2) \\exp(- \\sqrt{5} r)
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
@ -626,7 +626,7 @@ class ExpQuad(Stationary):
.. math::
k(r) = \sigma^2 \exp(- 0.5 r^2)
k(r) = \\sigma^2 \\exp(- 0.5 r^2)
notes::
- This is exactly the same as the RBF covariance function, but the
@ -664,10 +664,10 @@ class ExpQuad(Stationary):
class Cosine(Stationary):
"""
Cosine Covariance function
.. math::
k(r) = \sigma^2 \cos(r)
k(r) = \\sigma^2 \\cos(r)
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Cosine'):
@ -682,18 +682,18 @@ class Cosine(Stationary):
class ExpQuadCosine(Stationary):
"""
Exponentiated quadratic multiplied by cosine covariance function (spectral mixture kernel).
.. math::
k(r) = \sigma^2 \exp(-2\pi^2r^2)\cos(2\pi r/T)
k(r) = \\sigma^2 \\exp(-2\\pi^2r^2)\\cos(2\\pi r/T)
"""
def __init__(self, input_dim, variance=1., lengthscale=None, period=1., ARD=False, active_dims=None, name='ExpQuadCosine'):
super(ExpQuadCosine, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
self.period = Param('period', period, Logexp())
self.link_parameters(self.period)
def K_of_r(self, r):
return self.variance * np.exp(-2*np.pi**2*r**2)*np.cos(2*np.pi*r/self.period)
@ -712,18 +712,18 @@ class ExpQuadCosine(Stationary):
super(ExpQuadCosine, self).update_gradients_diag(dL_dKdiag, X)
self.period.gradient = 0.
class Sinc(Stationary):
"""
Sinc Covariance function
.. math::
k(r) = \sigma^2 \sinc(\pi r)
k(r) = \\sigma^2 \\sinc(\\pi r)
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Sinc'):
super(Sinc, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
@ -734,7 +734,7 @@ class Sinc(Stationary):
# small angle approximation to avoid divide by zero errors.
return np.where(r<1e-5, -self.variance*4/3*np.pi*np.pi*r, self.variance/r * (np.cos(2*np.pi*r)-np.sinc(2*r)))
class RatQuad(Stationary):
"""
@ -742,7 +742,7 @@ class RatQuad(Stationary):
.. math::
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \\alpha}
k(r) = \\sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \\alpha}
"""

View file

@ -14,23 +14,23 @@ class Eq_ode1(Kernpart):
This outputs of this kernel have the form
.. math::
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
\\frac{\\text{d}y_j}{\\text{d}t} = \\sum_{i=1}^R w_{j,i} f_i(t-\\delta_j) +\\sqrt{\\kappa_j}g_j(t) - d_jy_j(t)
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
:param output_dim: number of outputs driven by latent function.
:type output_dim: int
:param W: sensitivities of each output to the latent driving function.
:param W: sensitivities of each output to the latent driving function.
:type W: ndarray (output_dim x rank).
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
:type rank: int
:param decay: decay rates for the first order system.
:param decay: decay rates for the first order system.
:type decay: array of length output_dim.
:param delay: delay between latent force and output response.
:type delay: array of length output_dim.
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
:type kappa: array of length output_dim.
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
"""
def __init__(self,output_dim, W=None, rank=1, kappa=None, lengthscale=1.0, decay=None, delay=None):
@ -62,7 +62,7 @@ class Eq_ode1(Kernpart):
self.is_stationary = False
self.gaussian_initial = False
self._set_params(self._get_params())
def _get_params(self):
param_list = [self.W.flatten()]
if self.kappa is not None:
@ -103,11 +103,11 @@ class Eq_ode1(Kernpart):
param_names += ['decay_%i'%i for i in range(1,self.output_dim)]
if self.delay is not None:
param_names += ['delay_%i'%i for i in 1+range(1,self.output_dim)]
param_names+= ['lengthscale']
param_names+= ['lengthscale']
return param_names
def K(self,X,X2,target):
if X.shape[1] > 2:
raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
@ -123,13 +123,13 @@ class Eq_ode1(Kernpart):
def Kdiag(self,index,target):
#target += np.diag(self.B)[np.asarray(index,dtype=int).flatten()]
pass
def _param_grad_helper(self,dL_dK,X,X2,target):
# First extract times and indices.
self._extract_t_indices(X, X2, dL_dK=dL_dK)
self._dK_ode_dtheta(target)
def _dK_ode_dtheta(self, target):
"""Do all the computations for the ode parts of the covariance function."""
@ -138,7 +138,7 @@ class Eq_ode1(Kernpart):
index_ode = self._index[self._index>0]-1
if self._t2 is None:
if t_ode.size==0:
return
return
t2_ode = t_ode
dL_dK_ode = dL_dK_ode[:, self._index>0]
index2_ode = index_ode
@ -210,7 +210,7 @@ class Eq_ode1(Kernpart):
self._index = self._index[self._order]
self._t = self._t[self._order]
self._rorder = self._order.argsort() # rorder is for reversing the order
if X2 is None:
self._t2 = None
self._index2 = None
@ -229,7 +229,7 @@ class Eq_ode1(Kernpart):
if dL_dK is not None:
self._dL_dK = dL_dK[self._order, :]
self._dL_dK = self._dL_dK[:, self._order2]
def _K_computations(self, X, X2):
"""Perform main body of computations for the ode1 covariance function."""
# First extract times and indices.
@ -253,8 +253,8 @@ class Eq_ode1(Kernpart):
np.hstack((self._K_ode_eq, self._K_ode))))
self._K_dvar = self._K_dvar[self._rorder, :]
self._K_dvar = self._K_dvar[:, self._rorder2]
if X2 is None:
# Matrix giving scales of each output
self._scale = np.zeros((self._t.size, self._t.size))
@ -303,7 +303,7 @@ class Eq_ode1(Kernpart):
self._K_eq = np.zeros((t_eq.size, t2_eq.size))
return
self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
self._K_eq = np.exp(-self._dist2/(2*self.lengthscale*self.lengthscale))
if self.is_normalized:
self._K_eq/=(np.sqrt(2*np.pi)*self.lengthscale)
@ -361,7 +361,7 @@ class Eq_ode1(Kernpart):
self._K_eq_ode = sK.T
else:
self._K_ode_eq = sK
def _K_compute_ode(self):
# Compute covariances between outputs of the ODE models.
@ -370,7 +370,7 @@ class Eq_ode1(Kernpart):
if self._t2 is None:
if t_ode.size==0:
self._K_ode = np.zeros((0, 0))
return
return
t2_ode = t_ode
index2_ode = index_ode
else:
@ -379,14 +379,14 @@ class Eq_ode1(Kernpart):
self._K_ode = np.zeros((t_ode.size, t2_ode.size))
return
index2_ode = self._index2[self._index2>0]-1
# When index is identical
h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary)
if self._t2 is None:
self._K_ode = 0.5 * (h + h.T)
else:
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)
self._K_ode = 0.5 * (h + h2.T)
if not self.is_normalized:
@ -410,28 +410,28 @@ class Eq_ode1(Kernpart):
Decay = self.decay[index]
if self.delay is not None:
t = t - self.delay[index]
t_squared = t*t
half_sigma_decay = 0.5*self.sigma*Decay
[ln_part_1, sign1] = ln_diff_erfs(half_sigma_decay + t/self.sigma,
half_sigma_decay)
[ln_part_2, sign2] = ln_diff_erfs(half_sigma_decay,
half_sigma_decay - t/self.sigma)
h = (sign1*np.exp(half_sigma_decay*half_sigma_decay
+ ln_part_1
- log(Decay + D_j))
- log(Decay + D_j))
- sign2*np.exp(half_sigma_decay*half_sigma_decay
- (Decay + D_j)*t
+ ln_part_2
+ ln_part_2
- log(Decay + D_j)))
sigma2 = self.sigma*self.sigma
if update_derivatives:
dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h
dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h
+ t*sign2*np.exp(
half_sigma_decay*half_sigma_decay-(Decay+D_j)*t + ln_part_2
)
@ -439,11 +439,11 @@ class Eq_ode1(Kernpart):
(-1 + np.exp(-t_squared/sigma2-Decay*t)
+ np.exp(-t_squared/sigma2-D_j*t)
- np.exp(-(Decay + D_j)*t)))
dh_dD_i = (dh_dD_i/(Decay+D_j)).real
dh_dD_j = (t*sign2*np.exp(
half_sigma_decay*half_sigma_decay-(Decay + D_j)*t+ln_part_2
)
@ -457,7 +457,7 @@ class Eq_ode1(Kernpart):
- (-t/sigma2-Decay/2)*np.exp(-t_squared/sigma2 - D_j*t) \
- Decay/2*np.exp(-(Decay+D_j)*t))"""
pass
def _compute_H(self, t, index, t2, index2, update_derivatives=False, stationary=False):
"""Helper function for computing part of the ode1 covariance function.
@ -491,7 +491,7 @@ class Eq_ode1(Kernpart):
inv_sigma_diff_t = 1./self.sigma*diff_t
half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]
ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma,
ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma,
half_sigma_decay_i - inv_sigma_diff_t,
return_sign=True)
ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
@ -529,7 +529,7 @@ class Eq_ode1(Kernpart):
)
))
self._dh_ddecay = (dh_ddecay/(Decay[:, None]+Decay2[None, :])).real
# Update jth decay gradient
dh_ddecay2 = (t2_mat*sign2
*np.exp(
@ -539,7 +539,7 @@ class Eq_ode1(Kernpart):
)
-h)
self._dh_ddecay2 = (dh_ddecay/(Decay[:, None] + Decay2[None, :])).real
# Update sigma gradient
self._dh_dsigma = (half_sigma_decay_i*Decay[:, None]*h
+ 2/(np.sqrt(np.pi)
@ -547,10 +547,10 @@ class Eq_ode1(Kernpart):
*((-diff_t/sigma2-Decay[:, None]/2)
*np.exp(-diff_t*diff_t/sigma2)
+ (-t2_mat/sigma2+Decay[:, None]/2)
*np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat)
- (-t_mat/sigma2-Decay[:, None]/2)
*np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat)
*np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat)
- (-t_mat/sigma2-Decay[:, None]/2)
*np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat)
- Decay[:, None]/2
*np.exp(-(Decay[:, None]*t_mat+Decay2[None, :]*t2_mat))))
return h

View file

@ -9,13 +9,13 @@ import GPy
class Gibbs(Kernpart):
"""
Gibbs non-stationary covariance function.
Gibbs non-stationary covariance function.
.. math::
r = sqrt((x_i - x_j)'*(x_i - x_j))
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
k(x_i, x_j) = \\sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
Z = (2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')^{q/2}
@ -25,18 +25,18 @@ class Gibbs(Kernpart):
with input location. This leads to an additional term in front of
the kernel.
The parameters are :math:`\sigma^2`, the process variance, and
The parameters are :math:`\\sigma^2`, the process variance, and
the parameters of l(x) which is a function that can be
specified by the user, by default an multi-layer peceptron is
used.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type input_dim: int
:param variance: the variance :math:`\\sigma^2`
:type variance: float
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
:type mapping: GPy.core.Mapping
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \\sigma^2_w), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
@ -113,7 +113,7 @@ class Gibbs(Kernpart):
target += 2.*self.mapping.df_dX(self._dL_dl[:, None], X)
else:
target += self.mapping.df_dX(self._dL_dl[:, None], X)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X."""
pass
@ -123,7 +123,7 @@ class Gibbs(Kernpart):
target[0] += np.sum(dL_dKdiag)
def _K_computations(self, X, X2=None):
"""Pre-computations for the covariance function (used both when computing the covariance and its gradients). Here self._dK_dvar and self._K_dist2 are updated."""
self._lengthscales=self.mapping.f(X)
@ -146,7 +146,7 @@ class Gibbs(Kernpart):
"""Pre-computations for the gradients of the covaraince function. Here the gradient of the covariance with respect to all the individual lengthscales is computed.
:param dL_dK: the gradient of the objective with respect to the covariance function.
:type dL_dK: ndarray"""
self._dL_dl = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales_two.T**4 - self._lengthscales**4) + 2*self._lengthscales2*self._K_dist2)/(self._w2*self._w2*self._lengthscales)).sum(1)
if self._lengthscales_two is self._lengthscales:
self._dL_dl_two = None

View file

@ -19,11 +19,11 @@ class Hetero(Kernpart):
.. math::
k(x_i, x_j) = \delta_{i,j} \sigma^2(x_i)
k(x_i, x_j) = \\delta_{i,j} \\sigma^2(x_i)
where :math:`\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
where :math:`\\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\\delta_{i,j}` is the Kronecker delta function.
The parameters are the parameters of \sigma^2(x) which is a
The parameters are the parameters of \\sigma^2(x) which is a
function that can be specified by the user, by default an
multi-layer peceptron is used.

View file

@ -11,28 +11,28 @@ class POLY(Kernpart):
Polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel:
.. math::
k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
k(x, y) = \\sigma^2\\*(\\sigma_w^2 x'y+\\sigma_b^b)^d
The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
(weight_variance), :math:`\sigma^2_b` (bias_variance) and d
The kernel parameters are :math:`\\sigma^2` (variance), :math:`\\sigma^2_w`
(weight_variance), :math:`\\sigma^2_b` (bias_variance) and d
(degree). Only gradients of the first three are provided for
kernel optimisation, it is assumed that polynomial degree would
be set by hand.
The kernel is not recommended as it is badly behaved when the
:math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
:math:`\\sigma^2_w\\*x'\\*y + \\sigma^2_b` has a magnitude greater than one. For completeness
there is an automatic relevance determination version of this
kernel provided (NOTE YET IMPLEMENTED!).
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type input_dim: int
:param variance: the variance :math:`\\sigma^2`
:type variance: float
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\\sigma^2_w`
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
:param bias_variance: the variance of the prior over bias parameters :math:`\\sigma^2_b`
:param degree: the degree of the polynomial.
:type degree: int
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
@ -93,7 +93,7 @@ class POLY(Kernpart):
base_cov_grad = base*dL_dK
target[0] += np.sum(self._K_dvar*dL_dK)
target[1] += (self._K_inner_prod*base_cov_grad).sum()
target[2] += base_cov_grad.sum()
@ -107,14 +107,14 @@ class POLY(Kernpart):
target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
else:
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X"""
self._K_diag_computations(X)
arg = self._K_diag_poly_arg
target += 2.*self.weight_variance*self.degree*self.variance*X*dL_dKdiag[:, None]*(arg**(self.degree-1))[:, None]
def _K_computations(self, X, X2):
if self.ARD:
pass
@ -133,6 +133,6 @@ class POLY(Kernpart):
self._K_diag_poly_arg = (X*X).sum(1)*self.weight_variance + self.bias_variance
self._K_diag_dvar = self._K_diag_poly_arg**self.degree

View file

@ -15,9 +15,9 @@ class RBFInv(RBF):
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \\ \\text{ where } r^2 = \\sum_{i=1}^d \\frac{ (x_i-x^\\prime_i)^2}{\\ell_i^2}
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
where \\ell_i is the lengthscale, \\sigma^2 the variance and d the dimensionality of the input.
:param input_dim: the number of input dimensions
:type input_dim: int
@ -25,7 +25,7 @@ class RBFInv(RBF):
:type variance: float
:param lengthscale: the vector of lengthscale of the kernel
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \\ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object

View file

@ -14,15 +14,15 @@ class TruncLinear(Kern):
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
k(x,y) = \\sum_{i=1}^input_dim \\sigma^2_i \\max(0, x_iy_i - \\sigma_q)
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:param variances: the vector of variances :math:`\\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there
is only one variance parameter)
:param ARD: Auto Relevance Determination. If False, the kernel has only one
variance parameter \sigma^2, otherwise there is one variance
variance parameter \\sigma^2, otherwise there is one variance
parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
@ -113,15 +113,15 @@ class TruncLinear_inf(Kern):
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
k(x,y) = \\sum_{i=1}^input_dim \\sigma^2_i \\max(0, x_iy_i - \\sigma_q)
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:param variances: the vector of variances :math:`\\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there
is only one variance parameter)
:param ARD: Auto Relevance Determination. If False, the kernel has only one
variance parameter \sigma^2, otherwise there is one variance
variance parameter \\sigma^2, otherwise there is one variance
parameter per dimension.
:type ARD: Boolean
:rtype: kernel object

View file

@ -243,7 +243,7 @@ class Bernoulli(Likelihood):
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3))
state = np.seterr(divide='ignore')
# TODO check y \in {0, 1} or {-1, 1}
# TODO check y \\in {0, 1} or {-1, 1}
d3logpdf_dlink3 = np.where(y==1, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3))
np.seterr(**state)
return d3logpdf_dlink3

View file

@ -14,7 +14,7 @@ class Exponential(Likelihood):
Y is expected to take values in {0,1,2,...}
-----
$$
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
L(x) = \\exp(\\lambda) * \\lambda**Y_i / Y_i!
$$
"""
def __init__(self,gp_link=None):
@ -46,7 +46,7 @@ class Exponential(Likelihood):
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
\\ln p(y_{i}|\\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
@ -65,7 +65,7 @@ class Exponential(Likelihood):
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
:param link_f: latent variables (f)
:type link_f: Nx1 array
@ -87,7 +87,7 @@ class Exponential(Likelihood):
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
@ -110,7 +110,7 @@ class Exponential(Likelihood):
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array

View file

@ -54,7 +54,7 @@ class Gamma(Likelihood):
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
\\ln p(y_{i}|\\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables (link(f))
@ -101,7 +101,7 @@ class Gamma(Likelihood):
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables link(f)
@ -126,7 +126,7 @@ class Gamma(Likelihood):
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables link(f)

View file

@ -130,7 +130,7 @@ class Likelihood(Parameterized):
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
@ -199,7 +199,7 @@ class Likelihood(Parameterized):
.. math:
log p(y_{*}|D) = log 1/num_samples prod^{S}_{s=1} p(y_{*}|f_{*s})
f_{*s} ~ p(f_{*}|\mu_{*}\\sigma^{2}_{*})
f_{*s} ~ p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array

View file

@ -145,7 +145,7 @@ class ScaledProbit(Probit):
"""
def __init__(self, nu=1.):
self.nu = float(nu)
def transf(self,f):
return std_norm_cdf(f*self.nu)
@ -157,7 +157,7 @@ class ScaledProbit(Probit):
def d3transf_df3(self,f):
return (safe_square(f*self.nu)-1.)*std_norm_pdf(f*self.nu)*(self.nu**3)
def to_dict(self):
"""
Convert the object into a json serializable dictionary.
@ -180,7 +180,7 @@ class Cloglog(GPTransformation):
or
f = \log (-\log(1-p))
f = \\log (-\\log(1-p))
"""
def transf(self,f):

View file

@ -54,7 +54,7 @@ class Poisson(Likelihood):
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = -\\lambda(f_{i}) + y_{i}\\log \\lambda(f_{i}) - \\log y_{i}!
\\ln p(y_{i}|\\lambda(f_{i})) = -\\lambda(f_{i}) + y_{i}\\log \\lambda(f_{i}) - \\log y_{i}!
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
@ -72,7 +72,7 @@ class Poisson(Likelihood):
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - 1
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - 1
:param link_f: latent variables (f)
:type link_f: Nx1 array
@ -92,7 +92,7 @@ class Poisson(Likelihood):
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{-y_{i}}{\\lambda(f_{i})^{2}}
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{-y_{i}}{\\lambda(f_{i})^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
@ -113,7 +113,7 @@ class Poisson(Likelihood):
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f_{i})^{3}}
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f_{i})^{3}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array

View file

@ -78,7 +78,7 @@ class StudentT(Likelihood):
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
\\ln p(y_{i}|\\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
:param inv_link_f: latent variables (link(f))
:type inv_link_f: Nx1 array
@ -107,7 +107,7 @@ class StudentT(Likelihood):
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\\lambda(f_{i}))}{(y_{i}-\\lambda(f_{i}))^{2} + \\sigma^{2}v}
:param inv_link_f: latent variables (f)
:type inv_link_f: Nx1 array
@ -129,7 +129,7 @@ class StudentT(Likelihood):
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
:param inv_link_f: latent variables inv_link(f)
:type inv_link_f: Nx1 array
@ -154,7 +154,7 @@ class StudentT(Likelihood):
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \\lambda(f_{i}))^3 - 3(y_{i} - \\lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \\lambda(f_{i})) + \\sigma^{2} v)^3}
:param inv_link_f: latent variables link(f)
:type inv_link_f: Nx1 array
@ -175,7 +175,7 @@ class StudentT(Likelihood):
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \\lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \\lambda(f_{i}))^{2})}
:param inv_link_f: latent variables link(f)
:type inv_link_f: Nx1 array
@ -199,7 +199,7 @@ class StudentT(Likelihood):
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\\lambda(f_{i}))}{(y_{i}-\\lambda(f_{i}))^2 + \\sigma^2 v)^2}
:param inv_link_f: latent variables inv_link_f
:type inv_link_f: Nx1 array
@ -220,7 +220,7 @@ class StudentT(Likelihood):
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \\lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \\lambda(f_{i}))^{2})^{3}}
:param inv_link_f: latent variables link(f)
:type inv_link_f: Nx1 array

View file

@ -54,7 +54,7 @@ class Weibull(Likelihood):
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
\\ln p(y_{i}|\\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables (link(f))
@ -117,7 +117,7 @@ class Weibull(Likelihood):
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables link(f)
@ -150,7 +150,7 @@ class Weibull(Likelihood):
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables link(f)

View file

@ -10,7 +10,7 @@ class Additive(Mapping):
.. math::
f(\mathbf{x}*) = f_1(\mathbf{x}*) + f_2(\mathbf(x)*)
f(\\mathbf{x}*) = f_1(\\mathbf{x}*) + f_2(\\mathbf(x)*)
:param mapping1: first mapping to add together.
:type mapping1: GPy.mappings.Mapping

View file

@ -9,7 +9,7 @@ class Compound(Mapping):
.. math::
f(\mathbf{x}) = f_2(f_1(\mathbf{x}))
f(\\mathbf{x}) = f_2(f_1(\\mathbf{x}))
:param mapping1: first mapping
:type mapping1: GPy.mappings.Mapping

View file

@ -9,7 +9,7 @@ class Constant(Mapping):
.. math::
F(\mathbf{x}) = c
F(\\mathbf{x}) = c
:param input_dim: dimension of input.

View file

@ -12,20 +12,20 @@ class Kernel(Mapping):
.. math::
f(\mathbf{x}) = \sum_i \alpha_i k(\mathbf{z}_i, \mathbf{x})
f(\\mathbf{x}) = \\sum_i \\alpha_i k(\\mathbf{z}_i, \\mathbf{x})
or for multple outputs
.. math::
f_i(\mathbf{x}) = \sum_j \alpha_{i,j} k(\mathbf{z}_i, \mathbf{x})
f_i(\\mathbf{x}) = \\sum_j \\alpha_{i,j} k(\\mathbf{z}_i, \\mathbf{x})
:param input_dim: dimension of input.
:type input_dim: int
:param output_dim: dimension of output.
:type output_dim: int
:param Z: input observations containing :math:`\mathbf{Z}`
:param Z: input observations containing :math:`\\mathbf{Z}`
:type Z: ndarray
:param kernel: a GPy kernel, defaults to GPy.kern.RBF
:type kernel: GPy.kern.kern

View file

@ -12,7 +12,7 @@ class Linear(Mapping):
.. math::
F(\mathbf{x}) = \mathbf{A} \mathbf{x})
F(\\mathbf{x}) = \\mathbf{A} \\mathbf{x})
:param input_dim: dimension of input.

View file

@ -22,7 +22,7 @@ class GPKroneckerGaussianRegression(Model):
.. rubric:: References
.. [stegle_et_al_2011] Stegle, O.; Lippert, C.; Mooij, J.M.; Lawrence, N.D.; Borgwardt, K.:Efficient inference in matrix-variate Gaussian models with \iid observation noise. In: Advances in Neural Information Processing Systems, 2011, Pages 630-638
.. [stegle_et_al_2011] Stegle, O.; Lippert, C.; Mooij, J.M.; Lawrence, N.D.; Borgwardt, K.:Efficient inference in matrix-variate Gaussian models with \\iid observation noise. In: Advances in Neural Information Processing Systems, 2011, Pages 630-638
"""
def __init__(self, X1, X2, Y, kern1, kern2, noise_var=1., name='KGPR'):

View file

@ -183,7 +183,7 @@ class SSMRD(Model):
@Model.optimizer_array.setter
def optimizer_array(self, p):
if self.mpi_comm != None:
if self.mpi_comm is not None:
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank == 0:
self.mpi_comm.Bcast(np.int32(1), root=0)
self.mpi_comm.Bcast(p, root=0)

View file

@ -4002,10 +4002,10 @@ class ContDescrStateSpace(DescreteStateSpace):
"""
Linear Time-Invariant Stochastic Differential Equation (LTI SDE):
dx(t) = F x(t) dt + L d \beta ,where
dx(t) = F x(t) dt + L d \\beta ,where
x(t): (vector) stochastic process
\beta: (vector) Brownian motion process
\\beta: (vector) Brownian motion process
F, L: (time invariant) matrices of corresponding dimensions
Qc: covariance of noise.
@ -4022,7 +4022,7 @@ class ContDescrStateSpace(DescreteStateSpace):
F,L: LTI SDE matrices of corresponding dimensions
Qc: matrix (n,n)
Covarince between different dimensions of noise \beta.
Covarince between different dimensions of noise \\beta.
n is the dimensionality of the noise.
dt: double or iterable

View file

@ -171,7 +171,7 @@ class TPRegression(Model):
def log_likelihood(self):
"""
The log marginal likelihood of the model, :math:`p(\mathbf{y})`, this is the objective function of the model being optimised
The log marginal likelihood of the model, :math:`p(\\mathbf{y})`, this is the objective function of the model being optimised
"""
return self._log_marginal_likelihood or self.inference()[1]
@ -184,10 +184,10 @@ class TPRegression(Model):
diagonal of the covariance is returned.
.. math::
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
= MVN\left(\nu + N,f*| K_{x*x}(K_{xx})^{-1}Y,
\frac{\nu + \beta - 2}{\nu + N - 2}K_{x*x*} - K_{xx*}(K_{xx})^{-1}K_{xx*}\right)
\nu := \texttt{Degrees of freedom}
p(f*|X*, X, Y) = \\int^{\\inf}_{\\inf} p(f*|f,X*)p(f|X,Y) df
= MVN\\left(\\nu + N,f*| K_{x*x}(K_{xx})^{-1}Y,
\\frac{\\nu + \\beta - 2}{\\nu + N - 2}K_{x*x*} - K_{xx*}(K_{xx})^{-1}K_{xx*}\\right)
\\nu := \\texttt{Degrees of freedom}
"""
mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew,
pred_var=self._predictive_variable, full_cov=full_cov)

View file

@ -146,7 +146,7 @@ class WarpedGP(GP):
the jacobian of the warping function here.
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
:param x_test: test locations (x_{*})
:type x_test: (Nx1) array

View file

@ -167,7 +167,7 @@ class lvm(matplotlib_show):
def show_sensitivities(self):
# A click in the bar chart axis for selection a dimension.
if self.sense_axes != None:
if self.sense_axes is not None:
self.sense_axes.cla()
self.sense_axes.bar(np.arange(self.model.input_dim), self.model.input_sensitivity(), color='b')

View file

@ -159,7 +159,7 @@ def generate_brownian_data(
):
"""
Generate brownian data - data from Brownian motion.
First point is always 0, and \Beta(0) = 0 - standard conditions for Brownian motion.
First point is always 0, and \\Beta(0) = 0 - standard conditions for Brownian motion.
Input:
--------------------------------

View file

@ -5,6 +5,8 @@
The test cases for various inference algorithms
"""
import copy
import pickle
import numpy as np
import GPy
@ -146,6 +148,32 @@ class TestInferenceGPEP:
< 1e6
)
def test_pickle_copy_EP(self):
"""Pickling and deep-copying a classification model employing EP"""
# Dummy binary classification dataset
X = np.array([0, 1, 2, 3]).reshape(-1, 1)
Y = np.array([0, 0, 1, 1]).reshape(-1, 1)
# Some classification model
inf = GPy.inference.latent_function_inference.expectation_propagation.EP(
max_iters=30, delta=0.5
)
m = GPy.core.GP(
X=X,
Y=Y,
kernel=GPy.kern.RBF(input_dim=1, variance=1.0, lengthscale=1.0),
inference_method = inf,
likelihood=GPy.likelihoods.Bernoulli(),
mean_function=None
)
m.optimize()
m_pickled = pickle.dumps(m)
assert pickle.loads(m_pickled) is not None
assert copy.deepcopy(m) is not None
# NOTE: adding a test like above for parameterized likelihood- the above test is
# only for probit likelihood which does not have any tunable hyperparameter which is why
# the term in dictionary of gradients: dL_dthetaL will always be zero. So here we repeat tests for

View file

@ -269,8 +269,8 @@ class TestMisc:
from GPy.core.parameterization.variational import NormalPosterior
X_pred = NormalPosterior(X_pred_mu, X_pred_var)
# mu = \int f(x)q(x|mu,S) dx = \int 2x.q(x|mu,S) dx = 2.mu
# S = \int (f(x) - m)^2q(x|mu,S) dx = \int f(x)^2 q(x) dx - mu**2 = 4(mu^2 + S) - (2.mu)^2 = 4S
# mu = \\int f(x)q(x|mu,S) dx = \\int 2x.q(x|mu,S) dx = 2.mu
# S = \\int (f(x) - m)^2q(x|mu,S) dx = \\int f(x)^2 q(x) dx - mu**2 = 4(mu^2 + S) - (2.mu)^2 = 4S
Y_mu_true = 2 * X_pred_mu
Y_var_true = 4 * X_pred_var
Y_mu_pred, Y_var_pred = m.predict_noiseless(X_pred)
@ -684,7 +684,7 @@ class TestMisc:
warp_m = GPy.models.WarpedGP(
X, Y
) # , kernel=warp_k)#, warping_function=warp_f)
warp_m[".*\.d"].constrain_fixed(1.0)
warp_m[r".*\.d"].constrain_fixed(1.0)
warp_m.optimize_restarts(
parallel=False, robust=False, num_restarts=5, max_iters=max_iters
)

View file

@ -3,6 +3,21 @@
import pytest
import numpy as np
import GPy
from GPy.core.parameterization.priors import (
Gaussian,
Uniform,
LogGaussian,
MultivariateGaussian,
Gamma,
InverseGamma,
DGPLVM,
DGPLVM_KFDA,
DGPLVM_Lamda,
DGPLVM_T,
HalfT,
Exponential,
StudentT,
)
class TestPrior:
@ -178,3 +193,90 @@ class TestPrior:
# should raise an assertionerror.
with pytest.raises(AssertionError):
m.rbf.set_prior(gaussian)
def initialize_gaussian_prior() -> None:
return Gaussian(0, 1)
def initialize_uniform_prior() -> None:
return Uniform(0, 1)
def initialize_log_gaussian_prior() -> None:
return LogGaussian(0, 1)
def initialize_multivariate_gaussian_prior() -> None:
return MultivariateGaussian(np.zeros(2), np.eye(2))
def initialize_gamma_prior() -> None:
return Gamma(1, 1)
def initialize_inverse_gamma_prior() -> None:
return InverseGamma(1, 1)
def initialize_dgplvm_prior() -> None:
# return DGPLVM(...)
raise NotImplementedError("No idea how to initialize this prior")
def initialize_dgplvm_kfda_prior() -> None:
# return DGPLVM_KFDA(...)
raise NotImplementedError("No idea how to initialize this prior")
def initialize_dgplvm_lamda_prior() -> None:
# return DGPLVM_Lamda(...)
raise NotImplementedError("No idea how to initialize this prior")
def initialize_dgplvm_t_prior() -> None:
# return DGPLVM_T(1, 1, (1, 1))
raise NotImplementedError("No idea how to initialize this prior")
def initialize_half_t_prior() -> None:
return HalfT(1, 1)
def initialize_exponential_prior() -> None:
return Exponential(1)
def initialize_student_t_prior() -> None:
return StudentT(1, 1, 1)
PRIORS = {
"Gaussian": initialize_gaussian_prior,
"Uniform": initialize_uniform_prior,
"LogGaussian": initialize_log_gaussian_prior,
"MultivariateGaussian": initialize_multivariate_gaussian_prior,
"Gamma": initialize_gamma_prior,
"InverseGamma": initialize_inverse_gamma_prior,
# "DGPLVM": initialize_dgplvm_prior,
# "DGPLVM_KFDA": initialize_dgplvm_kfda_prior,
# "DGPLVM_Lamda": initialize_dgplvm_lamda_prior,
# "DGPLVM_T": initialize_dgplvm_t_prior,
"HalfT": initialize_half_t_prior,
"Exponential": initialize_exponential_prior,
"StudentT": initialize_student_t_prior,
}
def check_prior(prior_getter: str) -> None:
prior_getter()
def test_priors() -> None:
for prior_name, prior_getter in PRIORS.items():
try:
check_prior(prior_getter)
except Exception as e:
raise RuntimeError(
f"Failed to initialize {prior_name} prior"
) from e # noqa E501

View file

@ -47,7 +47,7 @@ class TestRVTransformation:
# ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Histogram')
# ax.plot(phi, kde(phi), '--', linewidth=2, label='Kernel Density Estimation')
# ax.plot(phi, pdf_phi, ':', linewidth=2, label='Transformed PDF')
# ax.set_xlabel(r'transformed $\theta$', fontsize=16)
# ax.set_xlabel(r'transformed $\\theta$', fontsize=16)
# ax.set_ylabel('PDF', fontsize=16)
# plt.legend(loc='best')
# plt.show(block=True)

View file

@ -537,7 +537,7 @@ http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%2
# In the notebook they did some data cleaning: remove Javascript header+footer, and translate new Date(....,..,..) into YYYY-MM-DD.
header = """// Data table response\ngoogle.visualization.Query.setResponse("""
data = data[len(header):-2]
data = re.sub('new Date\((\d+),(\d+),(\d+)\)', (lambda m: '"%s-%02d-%02d"' % (m.group(1).strip(), 1+int(m.group(2)), int(m.group(3)))), data)
data = re.sub(r'new Date\((\d+),(\d+),(\d+)\)', (lambda m: '"%s-%02d-%02d"' % (m.group(1).strip(), 1+int(m.group(2)), int(m.group(3)))), data)
timeseries = json.loads(data)
columns = [k['label'] for k in timeseries['table']['cols']]
rows = map(lambda x: [k['v'] for k in x['c']], timeseries['table']['rows'])
@ -782,7 +782,7 @@ def hapmap3(data_set='hapmap3'):
/ 1, iff SNPij==(B1,B1)
Aij = | 0, iff SNPij==(B1,B2)
\ -1, iff SNPij==(B2,B2)
\\ -1, iff SNPij==(B2,B2)
The SNP data and the meta information (such as iid, sex and phenotype) are
stored in the dataframe datadf, index is the Individual ID,
@ -1011,7 +1011,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
sample_info.columns = c
# get the labels right:
rep = re.compile('\(.*\)')
rep = re.compile(r'\(.*\)')
def filter_dev_stage(row):
if isnull(row):
row = "2-cell stage embryo"
@ -1050,7 +1050,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
#gene_info[file_info.name[:-18]] = inner.Refseq_IDs
# Strip GSM number off data index
rep = re.compile('GSM\d+_')
rep = re.compile(r'GSM\d+_')
from pandas import MultiIndex
columns = MultiIndex.from_tuples([row.split('_', 1) for row in data.columns])

View file

@ -180,24 +180,24 @@ class NetpbmFile(object):
"""Read PAM header and initialize instance."""
regroups = re.search(
b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|"
b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
rb"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
rb"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
self.header = regroups[0]
self.magicnum = b'P7'
for group in regroups[1:]:
key, value = group.split()
setattr(self, unicode(key).lower(), int(value))
matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header)
matches = re.findall(rb"(TUPLTYPE\s+\w+)", self.header)
self.tupltypes = [s.split(None, 1)[1] for s in matches]
def _read_pnm_header(self, data):
"""Read PNM header and initialize instance."""
bpm = data[1:2] in b"14"
regroups = re.search(b"".join((
b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
b"\s*(\d+)\s+(?:#.*[\r\n])*",
b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
rb"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
rb"\s*(\d+)\s+(?:#.*[\r\n])*",
rb"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
rb"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
self.header = regroups[0]
self.magicnum = regroups[1]
self.width = int(regroups[2])

View file

@ -150,7 +150,7 @@ with open('../../GPy/__version__.py', 'r') as f:
version = f.read()
release = version
print version
print(version)
# version = '0.8.8'
# The full version, including alpha/beta/rc tags.