mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-24 20:36:23 +02:00
Compare commits
17 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
38bffb154c | ||
|
|
26a7f21655 | ||
|
|
156f5b738f | ||
|
|
6ff1e68699 | ||
|
|
bb4c5ab7e2 | ||
|
|
7d269c772d | ||
|
|
acdd03d3ed | ||
|
|
b6efa8e548 | ||
|
|
ad9c507cde | ||
|
|
9a31886085 | ||
|
|
d7da7c617f | ||
|
|
5a9e5e3cc4 | ||
|
|
200798eb48 | ||
|
|
1fcb40845d | ||
|
|
c04c9b328d | ||
|
|
12d4c22ca7 | ||
|
|
4fa858ee6d |
60 changed files with 563 additions and 426 deletions
6
.github/workflows/test-and-deploy.yml
vendored
6
.github/workflows/test-and-deploy.yml
vendored
|
|
@ -39,7 +39,7 @@ jobs:
|
|||
- name: Install test dependencies
|
||||
run: |
|
||||
pip install matplotlib
|
||||
pip install pytest
|
||||
pip install 'pytest<8'
|
||||
|
||||
- name: pytest
|
||||
run: |
|
||||
|
|
@ -71,7 +71,7 @@ jobs:
|
|||
- name: Install test dependencies
|
||||
run: |
|
||||
pip install matplotlib
|
||||
pip install pytest
|
||||
pip install 'pytest<8'
|
||||
|
||||
- name: pytest
|
||||
run: |
|
||||
|
|
@ -103,7 +103,7 @@ jobs:
|
|||
- name: Install test dependencies
|
||||
run: |
|
||||
pip install matplotlib
|
||||
pip install pytest
|
||||
pip install 'pytest<8'
|
||||
|
||||
- name: pytest
|
||||
run: |
|
||||
|
|
|
|||
|
|
@ -1,6 +1,11 @@
|
|||
# Changelog
|
||||
|
||||
## Unreleased
|
||||
* fix pickle and deep copy for classification models inheriting from EP #1108 [olamarre]
|
||||
|
||||
* update prior `__new__` methods #1098 [MartinBubel]
|
||||
|
||||
* fix invalid escape sequence #1011 [janmayer]
|
||||
|
||||
## v1.13.2 (2024-07-21)
|
||||
* update string checks in initialization method for latent variable and put `empirical_samples` init-method on a deprecation path
|
||||
|
|
|
|||
|
|
@ -194,7 +194,7 @@ class GP(Model):
|
|||
# Make sure to name this variable and the predict functions will "just work"
|
||||
# In maths the predictive variable is:
|
||||
# K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
|
||||
# W_{pp} := \texttt{Woodbury inv}
|
||||
# W_{pp} := \\texttt{Woodbury inv}
|
||||
# p := _predictive_variable
|
||||
|
||||
@property
|
||||
|
|
@ -283,7 +283,7 @@ class GP(Model):
|
|||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
The log marginal likelihood of the model, :math:`p(\mathbf{y})`, this is the objective function of the model being optimised
|
||||
The log marginal likelihood of the model, :math:`p(\\mathbf{y})`, this is the objective function of the model being optimised
|
||||
"""
|
||||
return self._log_marginal_likelihood
|
||||
|
||||
|
|
@ -296,9 +296,9 @@ class GP(Model):
|
|||
diagonal of the covariance is returned.
|
||||
|
||||
.. math::
|
||||
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
|
||||
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
|
||||
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
p(f*|X*, X, Y) = \\int^{\\inf}_{\\inf} p(f*|f,X*)p(f|X,Y) df
|
||||
= N(f*| K_{x*x}(K_{xx} + \\Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \\Sigma)^{-1}K_{xx*}
|
||||
\\Sigma := \\texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
"""
|
||||
mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov)
|
||||
if self.mean_function is not None:
|
||||
|
|
@ -702,7 +702,7 @@ class GP(Model):
|
|||
Calculation of the log predictive density
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param x_test: test locations (x_{*})
|
||||
:type x_test: (Nx1) array
|
||||
|
|
@ -718,7 +718,7 @@ class GP(Model):
|
|||
Calculation of the log predictive density by sampling
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param x_test: test locations (x_{*})
|
||||
:type x_test: (Nx1) array
|
||||
|
|
@ -734,24 +734,24 @@ class GP(Model):
|
|||
|
||||
def _raw_posterior_covariance_between_points(self, X1, X2):
|
||||
"""
|
||||
Computes the posterior covariance between points. Does not account for
|
||||
Computes the posterior covariance between points. Does not account for
|
||||
normalization or likelihood
|
||||
|
||||
:param X1: some input observations
|
||||
:param X2: other input observations
|
||||
|
||||
:returns:
|
||||
:returns:
|
||||
cov: raw posterior covariance: k(X1,X2) - k(X1,X) G^{-1} K(X,X2)
|
||||
"""
|
||||
return self.posterior.covariance_between_points(self.kern, self.X, X1, X2)
|
||||
|
||||
|
||||
def posterior_covariance_between_points(self, X1, X2, Y_metadata=None,
|
||||
likelihood=None,
|
||||
def posterior_covariance_between_points(self, X1, X2, Y_metadata=None,
|
||||
likelihood=None,
|
||||
include_likelihood=True):
|
||||
"""
|
||||
Computes the posterior covariance between points. Includes likelihood
|
||||
variance as well as normalization so that evaluation at (x,x) is consistent
|
||||
Computes the posterior covariance between points. Includes likelihood
|
||||
variance as well as normalization so that evaluation at (x,x) is consistent
|
||||
with model.predict
|
||||
|
||||
:param X1: some input observations
|
||||
|
|
@ -762,8 +762,8 @@ class GP(Model):
|
|||
the predicted underlying latent function f.
|
||||
:type include_likelihood: bool
|
||||
|
||||
:returns:
|
||||
cov: posterior covariance, a Numpy array, Nnew x Nnew if
|
||||
:returns:
|
||||
cov: posterior covariance, a Numpy array, Nnew x Nnew if
|
||||
self.output_dim == 1, and Nnew x Nnew x self.output_dim otherwise.
|
||||
"""
|
||||
|
||||
|
|
@ -774,7 +774,7 @@ class GP(Model):
|
|||
mean, _ = self._raw_predict(X1, full_cov=True)
|
||||
if likelihood is None:
|
||||
likelihood = self.likelihood
|
||||
_, cov = likelihood.predictive_values(mean, cov, full_cov=True,
|
||||
_, cov = likelihood.predictive_values(mean, cov, full_cov=True,
|
||||
Y_metadata=Y_metadata)
|
||||
|
||||
if self.normalizer is not None:
|
||||
|
|
|
|||
|
|
@ -580,7 +580,11 @@ class DGPLVM(Prior):
|
|||
domain = _REAL
|
||||
|
||||
def __new__(cls, sigma2, lbl, x_shape):
|
||||
return super(Prior, cls).__new__(cls, sigma2, lbl, x_shape)
|
||||
newfunc = super(Prior, cls).__new__
|
||||
if newfunc is object.__new__:
|
||||
return newfunc(cls)
|
||||
else:
|
||||
return newfunc(cls, sigma2, lbl, x_shape)
|
||||
|
||||
def __init__(self, sigma2, lbl, x_shape):
|
||||
self.sigma2 = sigma2
|
||||
|
|
@ -1275,7 +1279,12 @@ class HalfT(Prior):
|
|||
for instance in cls._instances:
|
||||
if instance().A == A and instance().nu == nu:
|
||||
return instance()
|
||||
o = super(Prior, cls).__new__(cls, A, nu)
|
||||
|
||||
newfunc = super(Prior, cls).__new__
|
||||
if newfunc is object.__new__:
|
||||
o = newfunc(cls)
|
||||
else:
|
||||
o = newfunc(cls, A, nu)
|
||||
cls._instances.append(weakref.ref(o))
|
||||
return cls._instances[-1]()
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ class SparseGP_MPI(SparseGP):
|
|||
mean_function=None, inference_method=None, name='sparse gp',
|
||||
Y_metadata=None, mpi_comm=None, normalizer=False):
|
||||
self._IN_OPTIMIZATION_ = False
|
||||
if mpi_comm != None:
|
||||
if mpi_comm is not None:
|
||||
if inference_method is None:
|
||||
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
|
||||
else:
|
||||
|
|
@ -52,7 +52,7 @@ class SparseGP_MPI(SparseGP):
|
|||
|
||||
self.mpi_comm = mpi_comm
|
||||
# Manage the data (Y) division
|
||||
if mpi_comm != None:
|
||||
if mpi_comm is not None:
|
||||
from ..util.parallel import divide_data
|
||||
N_start, N_end, N_list = divide_data(Y.shape[0], mpi_comm.rank, mpi_comm.size)
|
||||
self.N_range = (N_start, N_end)
|
||||
|
|
@ -65,7 +65,7 @@ class SparseGP_MPI(SparseGP):
|
|||
def __getstate__(self):
|
||||
dc = super(SparseGP_MPI, self).__getstate__()
|
||||
dc['mpi_comm'] = None
|
||||
if self.mpi_comm != None:
|
||||
if self.mpi_comm is not None:
|
||||
del dc['N_range']
|
||||
del dc['N_list']
|
||||
del dc['Y_local']
|
||||
|
|
@ -81,7 +81,7 @@ class SparseGP_MPI(SparseGP):
|
|||
|
||||
@SparseGP.optimizer_array.setter
|
||||
def optimizer_array(self, p):
|
||||
if self.mpi_comm != None:
|
||||
if self.mpi_comm is not None:
|
||||
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
|
||||
self.mpi_comm.Bcast(np.int32(1),root=0)
|
||||
self.mpi_comm.Bcast(p, root=0)
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class Symbolic_core():
|
|||
self._set_derivatives(derivatives)
|
||||
self._set_parameters(parameters)
|
||||
# Convert the expressions to a list for common sub expression elimination
|
||||
# We should find the following type of expressions: 'function', 'derivative', 'second_derivative', 'third_derivative'.
|
||||
# We should find the following type of expressions: 'function', 'derivative', 'second_derivative', 'third_derivative'.
|
||||
self.update_expression_list()
|
||||
|
||||
# Apply any global stabilisation operations to expressions.
|
||||
|
|
@ -86,7 +86,7 @@ class Symbolic_core():
|
|||
# object except as cached. For covariance functions this is X
|
||||
# and Z, for likelihoods F and for mapping functions X.
|
||||
self.cacheable_vars = [] # list of everything that's cacheable
|
||||
for var in cacheable:
|
||||
for var in cacheable:
|
||||
self.variables[var] = [e for e in vars if e.name.split('_')[0]==var.lower()]
|
||||
self.cacheable_vars += self.variables[var]
|
||||
for var in cacheable:
|
||||
|
|
@ -105,7 +105,7 @@ class Symbolic_core():
|
|||
for derivative in derivatives:
|
||||
derivative_arguments += self.variables[derivative]
|
||||
|
||||
# Do symbolic work to compute derivatives.
|
||||
# Do symbolic work to compute derivatives.
|
||||
for key, func in self.expressions.items():
|
||||
# if func['function'].is_Matrix:
|
||||
# rows = func['function'].shape[0]
|
||||
|
|
@ -126,7 +126,7 @@ class Symbolic_core():
|
|||
if theta.name in parameters:
|
||||
val = parameters[theta.name]
|
||||
# Add parameter.
|
||||
|
||||
|
||||
self.link_parameters(Param(theta.name, val, None))
|
||||
#self._set_attribute(theta.name, )
|
||||
|
||||
|
|
@ -174,7 +174,7 @@ class Symbolic_core():
|
|||
code = self.code[function]['derivative'][theta.name]
|
||||
gradient[theta.name] = (partial*eval(code, self.namespace)).sum()
|
||||
return gradient
|
||||
|
||||
|
||||
def eval_gradients_X(self, function, partial, **kwargs):
|
||||
if 'X' in kwargs:
|
||||
gradients_X = np.zeros_like(kwargs['X'])
|
||||
|
|
@ -194,7 +194,7 @@ class Symbolic_core():
|
|||
for variable, code in self.variable_sort(self.code['parameters_changed']):
|
||||
lcode += self._print_code(variable) + ' = ' + self._print_code(code) + '\n'
|
||||
return lcode
|
||||
|
||||
|
||||
def code_update_cache(self):
|
||||
lcode = ''
|
||||
for var in self.cacheable:
|
||||
|
|
@ -208,7 +208,7 @@ class Symbolic_core():
|
|||
for i, theta in enumerate(self.variables[var]):
|
||||
lcode+= "\t" + var + '= np.atleast_2d(' + var + ')\n'
|
||||
lcode+= "\t" + self._print_code(theta.name) + ' = ' + var + '[:, ' + str(i) + "]" + reorder + "\n"
|
||||
|
||||
|
||||
for variable, code in self.variable_sort(self.code['update_cache']):
|
||||
lcode+= self._print_code(variable) + ' = ' + self._print_code(code) + "\n"
|
||||
|
||||
|
|
@ -250,7 +250,7 @@ class Symbolic_core():
|
|||
"""Make sure namespace gets updated when setting attributes."""
|
||||
setattr(self, name, value)
|
||||
self.namespace.update({name: getattr(self, name)})
|
||||
|
||||
|
||||
|
||||
def update_expression_list(self):
|
||||
"""Extract a list of expressions from the dictionary of expressions."""
|
||||
|
|
@ -260,9 +260,9 @@ class Symbolic_core():
|
|||
for fname, fexpressions in self.expressions.items():
|
||||
for type, texpressions in fexpressions.items():
|
||||
if type == 'function':
|
||||
self.expression_list.append(texpressions)
|
||||
self.expression_list.append(texpressions)
|
||||
self.expression_keys.append([fname, type])
|
||||
self.expression_order.append(1)
|
||||
self.expression_order.append(1)
|
||||
elif type[-10:] == 'derivative':
|
||||
for dtype, expression in texpressions.items():
|
||||
self.expression_list.append(expression)
|
||||
|
|
@ -274,9 +274,9 @@ class Symbolic_core():
|
|||
elif type[:-10] == 'third_':
|
||||
self.expression_order.append(5) #sym.count_ops(self.expressions[type][dtype]))
|
||||
else:
|
||||
self.expression_list.append(fexpressions[type])
|
||||
self.expression_list.append(fexpressions[type])
|
||||
self.expression_keys.append([fname, type])
|
||||
self.expression_order.append(2)
|
||||
self.expression_order.append(2)
|
||||
|
||||
# This step may be unecessary.
|
||||
# Not 100% sure if the sub expression elimination is order sensitive. This step orders the list with the 'function' code first and derivatives after.
|
||||
|
|
@ -313,7 +313,7 @@ class Symbolic_core():
|
|||
sym_var = sym.var(cache_prefix + str(i))
|
||||
self.variables[cache_prefix].append(sym_var)
|
||||
replace_dict[expr.name] = sym_var
|
||||
|
||||
|
||||
for i, expr in enumerate(params_change_list):
|
||||
sym_var = sym.var(sub_prefix + str(i))
|
||||
self.variables[sub_prefix].append(sym_var)
|
||||
|
|
@ -329,7 +329,7 @@ class Symbolic_core():
|
|||
for keys in self.expression_keys:
|
||||
for replace, void in common_sub_expressions:
|
||||
setInDict(self.expressions, keys, getFromDict(self.expressions, keys).subs(replace, replace_dict[replace.name]))
|
||||
|
||||
|
||||
self.expressions['parameters_changed'] = {}
|
||||
self.expressions['update_cache'] = {}
|
||||
for var, expr in common_sub_expressions:
|
||||
|
|
@ -339,7 +339,7 @@ class Symbolic_core():
|
|||
self.expressions['update_cache'][replace_dict[var.name].name] = expr
|
||||
else:
|
||||
self.expressions['parameters_changed'][replace_dict[var.name].name] = expr
|
||||
|
||||
|
||||
|
||||
def _gen_code(self):
|
||||
"""Generate code for the list of expressions provided using the common sub-expression eliminator to separate out portions that are computed multiple times."""
|
||||
|
|
@ -357,8 +357,8 @@ class Symbolic_core():
|
|||
return code
|
||||
|
||||
self.code = match_key(self.expressions)
|
||||
|
||||
|
||||
|
||||
|
||||
def _expr2code(self, arg_list, expr):
|
||||
"""Convert the given symbolic expression into code."""
|
||||
code = lambdastr(arg_list, expr)
|
||||
|
|
@ -379,7 +379,7 @@ class Symbolic_core():
|
|||
def _display_expression(self, keys, user_substitutes={}):
|
||||
"""Helper function for human friendly display of the symbolic components."""
|
||||
# Create some pretty maths symbols for the display.
|
||||
sigma, alpha, nu, omega, l, variance = sym.var('\sigma, \alpha, \nu, \omega, \ell, \sigma^2')
|
||||
sigma, alpha, nu, omega, l, variance = sym.var(r'\sigma, \alpha, \nu, \omega, \ell, \sigma^2')
|
||||
substitutes = {'scale': sigma, 'shape': alpha, 'lengthscale': l, 'variance': variance}
|
||||
substitutes.update(user_substitutes)
|
||||
|
||||
|
|
@ -416,5 +416,5 @@ class Symbolic_core():
|
|||
return int(digits[0])
|
||||
else:
|
||||
return x[0]
|
||||
|
||||
|
||||
return sorted(var_dict.items(), key=sort_key, reverse=reverse)
|
||||
|
|
|
|||
|
|
@ -134,10 +134,10 @@ class posteriorParams(posteriorParamsBase):
|
|||
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
|
||||
L = jitchol(B)
|
||||
V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
|
||||
Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \Sigma^(-1))^(-1)
|
||||
Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \\Sigma^(-1))^(-1)
|
||||
|
||||
aux_alpha , _ = dpotrs(L, tau_tilde_root * (np.dot(K, ga_approx.v) + mean_prior), lower=1)
|
||||
alpha = ga_approx.v - tau_tilde_root * aux_alpha #(K + Sigma^(\tilde))^(-1) (/mu^(/tilde) - /mu_p)
|
||||
alpha = ga_approx.v - tau_tilde_root * aux_alpha #(K + Sigma^(\\tilde))^(-1) (/mu^(/tilde) - /mu_p)
|
||||
mu = np.dot(K, alpha) + mean_prior
|
||||
|
||||
return posteriorParams(mu=mu, Sigma=Sigma, L=L)
|
||||
|
|
@ -151,8 +151,8 @@ class posteriorParamsDTC(posteriorParamsBase):
|
|||
DSYR(LLT,Kmn[:,i].copy(),delta_tau)
|
||||
L = jitchol(LLT)
|
||||
V,info = dtrtrs(L,Kmn,lower=1)
|
||||
self.Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\top)^(-1)) K_mn
|
||||
si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i]
|
||||
self.Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\\top)^(-1)) K_mn
|
||||
si = np.sum(V.T*V[:,i],-1) #(V V^\\top)[:,i]
|
||||
self.mu += (delta_v-delta_tau*self.mu[i])*si
|
||||
#mu = np.dot(Sigma, v_tilde)
|
||||
|
||||
|
|
@ -229,24 +229,17 @@ class EPBase(object):
|
|||
v_diff = np.mean(np.square(ga_approx.v-self.ga_approx_old.v))
|
||||
return ((tau_diff < self.epsilon) and (v_diff < self.epsilon))
|
||||
|
||||
def __setstate__(self, state):
|
||||
super(EPBase, self).__setstate__(state[0])
|
||||
self.epsilon, self.eta, self.delta = state[1]
|
||||
self.reset()
|
||||
|
||||
def __getstate__(self):
|
||||
return [super(EPBase, self).__getstate__() , [self.epsilon, self.eta, self.delta]]
|
||||
|
||||
def _save_to_input_dict(self):
|
||||
input_dict = super(EPBase, self)._save_to_input_dict()
|
||||
input_dict["epsilon"]=self.epsilon
|
||||
input_dict["eta"]=self.eta
|
||||
input_dict["delta"]=self.delta
|
||||
input_dict["always_reset"]=self.always_reset
|
||||
input_dict["max_iters"]=self.max_iters
|
||||
input_dict["ep_mode"]=self.ep_mode
|
||||
input_dict["parallel_updates"]=self.parallel_updates
|
||||
input_dict["loading"]=True
|
||||
input_dict = {
|
||||
"epsilon": self.epsilon,
|
||||
"eta": self.eta,
|
||||
"delta": self.delta,
|
||||
"always_reset": self.always_reset,
|
||||
"max_iters": self.max_iters,
|
||||
"ep_mode": self.ep_mode,
|
||||
"parallel_updates": self.parallel_updates,
|
||||
"loading": True
|
||||
}
|
||||
return input_dict
|
||||
|
||||
class EP(EPBase, ExactGaussianInference):
|
||||
|
|
@ -391,11 +384,11 @@ class EP(EPBase, ExactGaussianInference):
|
|||
|
||||
|
||||
aux_alpha , _ = dpotrs(post_params.L, tau_tilde_root * (np.dot(K, ga_approx.v) + mean_prior), lower=1)
|
||||
alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) (/mu^(/tilde) - /mu_p)
|
||||
alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\\tilde))^(-1) (/mu^(/tilde) - /mu_p)
|
||||
|
||||
LWi, _ = dtrtrs(post_params.L, np.diag(tau_tilde_root), lower=1)
|
||||
Wi = np.dot(LWi.T,LWi)
|
||||
symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
|
||||
symmetrify(Wi) #(K + Sigma^(\\tilde))^(-1)
|
||||
|
||||
dL_dK = 0.5 * (tdot(alpha) - Wi)
|
||||
dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
|
||||
|
|
@ -530,7 +523,7 @@ class EPDTC(EPBase, VarDTC):
|
|||
#initial values - Gaussian factors
|
||||
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
||||
LLT0 = Kmm.copy()
|
||||
Lm = jitchol(LLT0) #K_m = L_m L_m^\top
|
||||
Lm = jitchol(LLT0) #K_m = L_m L_m^\\top
|
||||
Vm,info = dtrtrs(Lm, Kmn,lower=1)
|
||||
# Lmi = dtrtri(Lm)
|
||||
# Kmmi = np.dot(Lmi.T,Lmi)
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class Laplace(LatentFunctionInference):
|
|||
"""
|
||||
Laplace Approximation
|
||||
|
||||
Find the moments \hat{f} and the hessian at this point
|
||||
Find the moments \\hat{f} and the hessian at this point
|
||||
(using Newton-Raphson) of the unnormalised posterior
|
||||
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -8,14 +8,14 @@ log_2_pi = np.log(2*np.pi)
|
|||
class PEP(LatentFunctionInference):
|
||||
'''
|
||||
Sparse Gaussian processes using Power-Expectation Propagation
|
||||
for regression: alpha \approx 0 gives VarDTC and alpha = 1 gives FITC
|
||||
|
||||
Reference: A Unifying Framework for Sparse Gaussian Process Approximation using
|
||||
for regression: alpha \\approx 0 gives VarDTC and alpha = 1 gives FITC
|
||||
|
||||
Reference: A Unifying Framework for Sparse Gaussian Process Approximation using
|
||||
Power Expectation Propagation, https://arxiv.org/abs/1605.07066
|
||||
|
||||
|
||||
'''
|
||||
const_jitter = 1e-6
|
||||
|
||||
|
||||
def __init__(self, alpha):
|
||||
super(PEP, self).__init__()
|
||||
self.alpha = alpha
|
||||
|
|
@ -69,7 +69,7 @@ class PEP(LatentFunctionInference):
|
|||
#compute dL_dR
|
||||
Uv = np.dot(U, v)
|
||||
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - (1.0+alpha_const_term)/beta_star + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) \
|
||||
+ np.sum(np.square(Uv), 1))*beta_star**2
|
||||
+ np.sum(np.square(Uv), 1))*beta_star**2
|
||||
|
||||
# Compute dL_dKmm
|
||||
vvT_P = tdot(v.reshape(-1,1)) + P
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ class Posterior(object):
|
|||
Posterior mean
|
||||
$$
|
||||
K_{xx}v
|
||||
v := \texttt{Woodbury vector}
|
||||
v := \\texttt{Woodbury vector}
|
||||
$$
|
||||
"""
|
||||
if self._mean is None:
|
||||
|
|
@ -95,7 +95,7 @@ class Posterior(object):
|
|||
Posterior covariance
|
||||
$$
|
||||
K_{xx} - K_{xx}W_{xx}^{-1}K_{xx}
|
||||
W_{xx} := \texttt{Woodbury inv}
|
||||
W_{xx} := \\texttt{Woodbury inv}
|
||||
$$
|
||||
"""
|
||||
if self._covariance is None:
|
||||
|
|
@ -146,8 +146,8 @@ class Posterior(object):
|
|||
"""
|
||||
return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix
|
||||
$$
|
||||
L_{W}L_{W}^{\top} = W^{-1}
|
||||
W^{-1} := \texttt{Woodbury inv}
|
||||
L_{W}L_{W}^{\\top} = W^{-1}
|
||||
W^{-1} := \\texttt{Woodbury inv}
|
||||
$$
|
||||
"""
|
||||
if self._woodbury_chol is None:
|
||||
|
|
@ -178,8 +178,8 @@ class Posterior(object):
|
|||
"""
|
||||
The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
|
||||
$$
|
||||
(K_{xx} + \Sigma_{xx})^{-1}
|
||||
\Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
(K_{xx} + \\Sigma_{xx})^{-1}
|
||||
\\Sigma_{xx} := \\texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
$$
|
||||
"""
|
||||
if self._woodbury_inv is None:
|
||||
|
|
@ -200,8 +200,8 @@ class Posterior(object):
|
|||
"""
|
||||
Woodbury vector in the gaussian likelihood case only is defined as
|
||||
$$
|
||||
(K_{xx} + \Sigma)^{-1}Y
|
||||
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
(K_{xx} + \\Sigma)^{-1}Y
|
||||
\\Sigma := \\texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
$$
|
||||
"""
|
||||
if self._woodbury_vector is None:
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ class VarDTC_minibatch(LatentFunctionInference):
|
|||
if not het_noise:
|
||||
YRY_full = trYYT*beta
|
||||
|
||||
if self.mpi_comm != None:
|
||||
if self.mpi_comm is not None:
|
||||
from mpi4py import MPI
|
||||
psi0_all = np.array(psi0_full)
|
||||
psi1Y_all = psi1Y_full.copy()
|
||||
|
|
@ -142,7 +142,7 @@ class VarDTC_minibatch(LatentFunctionInference):
|
|||
|
||||
num_data, output_dim = Y.shape
|
||||
input_dim = Z.shape[0]
|
||||
if self.mpi_comm != None:
|
||||
if self.mpi_comm is not None:
|
||||
from mpi4py import MPI
|
||||
num_data_all = np.array(num_data,dtype=np.int32)
|
||||
self.mpi_comm.Allreduce([np.int32(num_data), MPI.INT], [num_data_all, MPI.INT])
|
||||
|
|
@ -384,7 +384,7 @@ def update_gradients(model, mpi_comm=None):
|
|||
dL_dthetaL += grad_dict['dL_dthetaL']
|
||||
|
||||
# Gather the gradients from multiple MPI nodes
|
||||
if mpi_comm != None:
|
||||
if mpi_comm is not None:
|
||||
from mpi4py import MPI
|
||||
if het_noise:
|
||||
raise "het_noise not implemented!"
|
||||
|
|
@ -407,7 +407,7 @@ def update_gradients(model, mpi_comm=None):
|
|||
# update for the KL divergence
|
||||
model.variational_prior.update_gradients_KL(X)
|
||||
|
||||
if mpi_comm != None:
|
||||
if mpi_comm is not None:
|
||||
from mpi4py import MPI
|
||||
KL_div_all = np.array(KL_div)
|
||||
mpi_comm.Allreduce([np.float64(KL_div), MPI.DOUBLE], [KL_div_all, MPI.DOUBLE])
|
||||
|
|
@ -467,7 +467,7 @@ def update_gradients_sparsegp(model, mpi_comm=None):
|
|||
dL_dthetaL += grad_dict['dL_dthetaL']
|
||||
|
||||
# Gather the gradients from multiple MPI nodes
|
||||
if mpi_comm != None:
|
||||
if mpi_comm is not None:
|
||||
from mpi4py import MPI
|
||||
if het_noise:
|
||||
raise "het_noise not implemented!"
|
||||
|
|
|
|||
|
|
@ -25,12 +25,12 @@ class Coregionalize(Kern):
|
|||
This covariance has the form:
|
||||
|
||||
.. math::
|
||||
\mathbf{B} = \mathbf{W}\mathbf{W}^\intercal + \mathrm{diag}(kappa)
|
||||
\\mathbf{B} = \\mathbf{W}\\mathbf{W}^\\intercal + \\mathrm{diag}(kappa)
|
||||
|
||||
An intrinsic/linear coregionalization covariance function of the form:
|
||||
|
||||
.. math::
|
||||
k_2(x, y)=\mathbf{B} k(x, y)
|
||||
k_2(x, y)=\\mathbf{B} k(x, y)
|
||||
|
||||
it is obtained as the tensor product between a covariance function
|
||||
k(x, y) and B.
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class EQ_ODE1(Kern):
|
|||
|
||||
This outputs of this kernel have the form
|
||||
.. math::
|
||||
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} u_i(t-\delta_j) - d_jy_j(t)
|
||||
\\frac{\\text{d}y_j}{\\text{d}t} = \\sum_{i=1}^R w_{j,i} u_i(t-\\delta_j) - d_jy_j(t)
|
||||
|
||||
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`u_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class EQ_ODE2(Kern):
|
|||
|
||||
This outputs of this kernel have the form
|
||||
.. math::
|
||||
\frac{\text{d}^2y_j(t)}{\text{d}^2t} + C_j\frac{\text{d}y_j(t)}{\text{d}t} + B_jy_j(t) = \sum_{i=1}^R w_{j,i} u_i(t)
|
||||
\\frac{\\text{d}^2y_j(t)}{\\text{d}^2t} + C_j\\frac{\\text{d}y_j(t)}{\\text{d}t} + B_jy_j(t) = \\sum_{i=1}^R w_{j,i} u_i(t)
|
||||
|
||||
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
|
||||
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ class GridRBF(GridKern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
||||
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
||||
|
||||
"""
|
||||
_support_GPU = True
|
||||
|
|
|
|||
|
|
@ -146,25 +146,25 @@ class Kern(Parameterized):
|
|||
def psi0(self, Z, variational_posterior):
|
||||
"""
|
||||
.. math::
|
||||
\psi_0 = \sum_{i=0}^{n}E_{q(X)}[k(X_i, X_i)]
|
||||
\\psi_0 = \\sum_{i=0}^{n}E_{q(X)}[k(X_i, X_i)]
|
||||
"""
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
|
||||
def psi1(self, Z, variational_posterior):
|
||||
"""
|
||||
.. math::
|
||||
\psi_1^{n,m} = E_{q(X)}[k(X_n, Z_m)]
|
||||
\\psi_1^{n,m} = E_{q(X)}[k(X_n, Z_m)]
|
||||
"""
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
|
||||
def psi2(self, Z, variational_posterior):
|
||||
"""
|
||||
.. math::
|
||||
\psi_2^{m,m'} = \sum_{i=0}^{n}E_{q(X)}[ k(Z_m, X_i) k(X_i, Z_{m'})]
|
||||
\\psi_2^{m,m'} = \\sum_{i=0}^{n}E_{q(X)}[ k(Z_m, X_i) k(X_i, Z_{m'})]
|
||||
"""
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
"""
|
||||
.. math::
|
||||
\psi_2^{n,m,m'} = E_{q(X)}[ k(Z_m, X_n) k(X_n, Z_{m'})]
|
||||
\\psi_2^{n,m,m'} = E_{q(X)}[ k(Z_m, X_n) k(X_n, Z_{m'})]
|
||||
|
||||
Thus, we do not sum out n, compared to psi2
|
||||
"""
|
||||
|
|
@ -173,7 +173,7 @@ class Kern(Parameterized):
|
|||
"""
|
||||
.. math::
|
||||
|
||||
\\frac{\partial L}{\partial X} = \\frac{\partial L}{\partial K}\\frac{\partial K}{\partial X}
|
||||
\\frac{\\partial L}{\\partial X} = \\frac{\\partial L}{\\partial K}\\frac{\\partial K}{\\partial X}
|
||||
"""
|
||||
raise NotImplementedError
|
||||
def gradients_X_X2(self, dL_dK, X, X2):
|
||||
|
|
@ -182,7 +182,7 @@ class Kern(Parameterized):
|
|||
"""
|
||||
.. math::
|
||||
|
||||
\\frac{\partial^2 L}{\partial X\partial X_2} = \\frac{\partial L}{\partial K}\\frac{\partial^2 K}{\partial X\partial X_2}
|
||||
\\frac{\\partial^2 L}{\\partial X\\partial X_2} = \\frac{\\partial L}{\\partial K}\\frac{\\partial^2 K}{\\partial X\\partial X_2}
|
||||
"""
|
||||
raise NotImplementedError("This is the second derivative of K wrt X and X2, and not implemented for this kernel")
|
||||
def gradients_XX_diag(self, dL_dKdiag, X, cov=True):
|
||||
|
|
@ -203,7 +203,7 @@ class Kern(Parameterized):
|
|||
def update_gradients_full(self, dL_dK, X, X2):
|
||||
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def reset_gradients(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
|
@ -216,9 +216,9 @@ class Kern(Parameterized):
|
|||
|
||||
.. math::
|
||||
|
||||
\\frac{\partial L}{\partial \\theta_i} & = \\frac{\partial L}{\partial \psi_0}\\frac{\partial \psi_0}{\partial \\theta_i}\\
|
||||
& \quad + \\frac{\partial L}{\partial \psi_1}\\frac{\partial \psi_1}{\partial \\theta_i}\\
|
||||
& \quad + \\frac{\partial L}{\partial \psi_2}\\frac{\partial \psi_2}{\partial \\theta_i}
|
||||
\\frac{\\partial L}{\\partial \\theta_i} & = \\frac{\\partial L}{\\partial \\psi_0}\\frac{\\partial \\psi_0}{\\partial \\theta_i}\\
|
||||
& \\quad + \\frac{\\partial L}{\\partial \\psi_1}\\frac{\\partial \\psi_1}{\\partial \\theta_i}\\
|
||||
& \\quad + \\frac{\\partial L}{\\partial \\psi_2}\\frac{\\partial \\psi_2}{\\partial \\theta_i}
|
||||
|
||||
Thus, we push the different derivatives through the gradients of the psi
|
||||
statistics. Be sure to set the gradients for all kernel
|
||||
|
|
|
|||
|
|
@ -16,15 +16,15 @@ class Linear(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sum_{i=1}^{\\text{input_dim}} \sigma^2_i x_iy_i
|
||||
k(x,y) = \\sum_{i=1}^{\\text{input_dim}} \\sigma^2_i x_iy_i
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variances: the vector of variances :math:`\sigma^2_i`
|
||||
:param variances: the vector of variances :math:`\\sigma^2_i`
|
||||
:type variances: array or list of the appropriate size (or float if there
|
||||
is only one variance parameter)
|
||||
:param ARD: Auto Relevance Determination. If False, the kernel has only one
|
||||
variance parameter \sigma^2, otherwise there is one variance
|
||||
variance parameter \\sigma^2, otherwise there is one variance
|
||||
parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
|
@ -121,7 +121,7 @@ class Linear(Kern):
|
|||
the returned array is of shape [NxNxQxQ].
|
||||
|
||||
..math:
|
||||
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
|
||||
\\frac{\\partial^2 K}{\\partial X2 ^2} = - \\frac{\\partial^2 K}{\\partial X\\partial X2}
|
||||
|
||||
..returns:
|
||||
dL2_dXdX2: [NxMxQxQ] for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
|
||||
|
|
|
|||
|
|
@ -20,12 +20,12 @@ class MLP(Kern):
|
|||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:param variance: the variance :math:`\\sigma^2`
|
||||
:type variance: float
|
||||
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
|
||||
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\\sigma^2_w`
|
||||
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
|
||||
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:param bias_variance: the variance of the prior over bias parameters :math:`\\sigma^2_b`
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \\sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ class RBF(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
||||
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
||||
|
||||
"""
|
||||
_support_GPU = True
|
||||
|
|
|
|||
|
|
@ -12,48 +12,48 @@ import numpy as np
|
|||
|
||||
class sde_Brownian(Brownian):
|
||||
"""
|
||||
|
||||
|
||||
Class provide extra functionality to transfer this covariance function into
|
||||
SDE form.
|
||||
|
||||
|
||||
Linear kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sigma^2 min(x,y)
|
||||
k(x,y) = \\sigma^2 min(x,y)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def sde_update_gradient_full(self, gradients):
|
||||
"""
|
||||
Update gradient in the order in which parameters are represented in the
|
||||
kernel
|
||||
"""
|
||||
|
||||
|
||||
self.variance.gradient = gradients[0]
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
|
||||
|
||||
|
||||
F = np.array( ((0,1.0),(0,0) ))
|
||||
L = np.array( ((1.0,),(0,)) )
|
||||
Qc = np.array( ((variance,),) )
|
||||
H = np.array( ((1.0,0),) )
|
||||
|
||||
|
||||
Pinf = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
|
||||
#P0 = Pinf.copy()
|
||||
P0 = np.zeros((2,2))
|
||||
#P0 = Pinf.copy()
|
||||
P0 = np.zeros((2,2))
|
||||
#Pinf = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
|
||||
dF = np.zeros((2,2,1))
|
||||
dQc = np.ones( (1,1,1) )
|
||||
|
||||
|
||||
dPinf = np.zeros((2,2,1))
|
||||
dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
|
||||
#dP0 = dPinf.copy()
|
||||
#dP0 = dPinf.copy()
|
||||
dP0 = np.zeros((2,2,1))
|
||||
|
||||
|
||||
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
|
||||
|
|
|
|||
|
|
@ -11,15 +11,15 @@ import numpy as np
|
|||
|
||||
class sde_Linear(Linear):
|
||||
"""
|
||||
|
||||
|
||||
Class provide extra functionality to transfer this covariance function into
|
||||
SDE form.
|
||||
|
||||
|
||||
Linear kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
|
||||
k(x,y) = \\sum_{i=1}^{input dim} \\sigma^2_i x_iy_i
|
||||
|
||||
"""
|
||||
def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
|
||||
|
|
@ -27,40 +27,40 @@ class sde_Linear(Linear):
|
|||
Modify the init method, because one extra parameter is required. X - points
|
||||
on the X axis.
|
||||
"""
|
||||
|
||||
|
||||
super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
|
||||
|
||||
|
||||
self.t0 = np.min(X)
|
||||
|
||||
|
||||
|
||||
|
||||
def sde_update_gradient_full(self, gradients):
|
||||
"""
|
||||
Update gradient in the order in which parameters are represented in the
|
||||
kernel
|
||||
"""
|
||||
|
||||
|
||||
self.variances.gradient = gradients[0]
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
|
||||
t0 = float(self.t0)
|
||||
|
||||
|
||||
F = np.array( ((0,1.0),(0,0) ))
|
||||
L = np.array( ((0,),(1.0,)) )
|
||||
Qc = np.zeros((1,1))
|
||||
H = np.array( ((1.0,0),) )
|
||||
|
||||
|
||||
Pinf = np.zeros((2,2))
|
||||
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
|
||||
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
|
||||
dF = np.zeros((2,2,1))
|
||||
dQc = np.zeros( (1,1,1) )
|
||||
|
||||
|
||||
dPinf = np.zeros((2,2,1))
|
||||
dP0 = np.zeros((2,2,1))
|
||||
dP0[:,:,0] = P0 / variance
|
||||
|
||||
|
||||
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
|
||||
|
|
|
|||
|
|
@ -11,15 +11,15 @@ import numpy as np
|
|||
|
||||
class sde_Matern32(Matern32):
|
||||
"""
|
||||
|
||||
|
||||
Class provide extra functionality to transfer this covariance function into
|
||||
SDE forrm.
|
||||
|
||||
|
||||
Matern 3/2 kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \\exp(- \\sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
def sde_update_gradient_full(self, gradients):
|
||||
|
|
@ -27,59 +27,59 @@ class sde_Matern32(Matern32):
|
|||
Update gradient in the order in which parameters are represented in the
|
||||
kernel
|
||||
"""
|
||||
|
||||
|
||||
self.variance.gradient = gradients[0]
|
||||
self.lengthscale.gradient = gradients[1]
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
variance = float(self.variance.values)
|
||||
lengthscale = float(self.lengthscale.values)
|
||||
|
||||
foo = np.sqrt(3.)/lengthscale
|
||||
F = np.array(((0, 1.0), (-foo**2, -2*foo)))
|
||||
|
||||
foo = np.sqrt(3.)/lengthscale
|
||||
F = np.array(((0, 1.0), (-foo**2, -2*foo)))
|
||||
L = np.array(( (0,), (1.0,) ))
|
||||
Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),))
|
||||
H = np.array(((1.0, 0),))
|
||||
Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),))
|
||||
H = np.array(((1.0, 0),))
|
||||
Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2))))
|
||||
P0 = Pinf.copy()
|
||||
|
||||
# Allocate space for the derivatives
|
||||
|
||||
# Allocate space for the derivatives
|
||||
dF = np.empty([F.shape[0],F.shape[1],2])
|
||||
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
|
||||
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
|
||||
# The partial derivatives
|
||||
dFvariance = np.zeros((2,2))
|
||||
dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2)))
|
||||
dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3))
|
||||
dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance))
|
||||
dPinfvariance = np.array(((1,0),(0,3./lengthscale**2)))
|
||||
dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3)))
|
||||
# Combine the derivatives
|
||||
dF[:,:,0] = dFvariance
|
||||
dF[:,:,1] = dFlengthscale
|
||||
dQc[:,:,0] = dQcvariance
|
||||
dQc[:,:,1] = dQclengthscale
|
||||
dPinf[:,:,0] = dPinfvariance
|
||||
dPinf[:,:,1] = dPinflengthscale
|
||||
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
|
||||
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
|
||||
# The partial derivatives
|
||||
dFvariance = np.zeros((2,2))
|
||||
dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2)))
|
||||
dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3))
|
||||
dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance))
|
||||
dPinfvariance = np.array(((1,0),(0,3./lengthscale**2)))
|
||||
dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3)))
|
||||
# Combine the derivatives
|
||||
dF[:,:,0] = dFvariance
|
||||
dF[:,:,1] = dFlengthscale
|
||||
dQc[:,:,0] = dQcvariance
|
||||
dQc[:,:,1] = dQclengthscale
|
||||
dPinf[:,:,0] = dPinfvariance
|
||||
dPinf[:,:,1] = dPinflengthscale
|
||||
dP0 = dPinf.copy()
|
||||
|
||||
|
||||
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
|
||||
|
||||
class sde_Matern52(Matern52):
|
||||
"""
|
||||
|
||||
|
||||
Class provide extra functionality to transfer this covariance function into
|
||||
SDE forrm.
|
||||
|
||||
|
||||
Matern 5/2 kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{5} r + \\frac{5}{3}r^2) \\exp(- \\sqrt{5} r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
def sde_update_gradient_full(self, gradients):
|
||||
|
|
@ -87,51 +87,51 @@ class sde_Matern52(Matern52):
|
|||
Update gradient in the order in which parameters are represented in the
|
||||
kernel
|
||||
"""
|
||||
|
||||
|
||||
self.variance.gradient = gradients[0]
|
||||
self.lengthscale.gradient = gradients[1]
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
variance = float(self.variance.values)
|
||||
lengthscale = float(self.lengthscale.values)
|
||||
|
||||
lamda = np.sqrt(5.0)/lengthscale
|
||||
kappa = 5.0/3.0*variance/lengthscale**2
|
||||
|
||||
kappa = 5.0/3.0*variance/lengthscale**2
|
||||
|
||||
F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
|
||||
L = np.array(((0,),(0,),(1,)))
|
||||
Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
|
||||
H = np.array(((1,0,0),))
|
||||
|
||||
H = np.array(((1,0,0),))
|
||||
|
||||
Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
|
||||
P0 = Pinf.copy()
|
||||
# Allocate space for the derivatives
|
||||
dF = np.empty((3,3,2))
|
||||
dQc = np.empty((1,1,2))
|
||||
# Allocate space for the derivatives
|
||||
dF = np.empty((3,3,2))
|
||||
dQc = np.empty((1,1,2))
|
||||
dPinf = np.empty((3,3,2))
|
||||
|
||||
# The partial derivatives
|
||||
|
||||
# The partial derivatives
|
||||
dFvariance = np.zeros((3,3))
|
||||
dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
|
||||
dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
|
||||
30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
|
||||
dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
|
||||
dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
|
||||
|
||||
dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
|
||||
|
||||
dPinf_variance = Pinf/variance
|
||||
kappa2 = -2.0*kappa/lengthscale
|
||||
dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
|
||||
0,-100*variance/lengthscale**5)))
|
||||
# Combine the derivatives
|
||||
dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
|
||||
0,-100*variance/lengthscale**5)))
|
||||
# Combine the derivatives
|
||||
dF[:,:,0] = dFvariance
|
||||
dF[:,:,1] = dFlengthscale
|
||||
dQc[:,:,0] = dQcvariance
|
||||
dQc[:,:,1] = dQclengthscale
|
||||
dF[:,:,1] = dFlengthscale
|
||||
dQc[:,:,0] = dQcvariance
|
||||
dQc[:,:,1] = dQclengthscale
|
||||
dPinf[:,:,0] = dPinf_variance
|
||||
dPinf[:,:,1] = dPinf_lengthscale
|
||||
dP0 = dPinf.copy()
|
||||
|
||||
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
|
||||
|
||||
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
|
||||
|
|
@ -24,8 +24,8 @@ class sde_StdPeriodic(StdPeriodic):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
|
||||
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
|
||||
k(x,y) = \\theta_1 \\exp \\left[ - \\frac{1}{2} {}\\sum_{i=1}^{input\\_dim}
|
||||
\\left( \\frac{\\sin(\\frac{\\pi}{\\lambda_i} (x_i - y_i) )}{l_i} \\right)^2 \\right] }
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -177,7 +177,7 @@ def seriescoeff(m=6, lengthScale=1.0, magnSigma2=1.0, true_covariance=False):
|
|||
Calculate the coefficients q_j^2 for the covariance function
|
||||
approximation:
|
||||
|
||||
k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
|
||||
k(\\tau) = \\sum_{j=0}^{+\\infty} q_j^2 \\cos(j\\omega_0 \\tau)
|
||||
|
||||
Reference is:
|
||||
|
||||
|
|
|
|||
|
|
@ -12,63 +12,63 @@ import numpy as np
|
|||
|
||||
class sde_White(White):
|
||||
"""
|
||||
|
||||
|
||||
Class provide extra functionality to transfer this covariance function into
|
||||
SDE forrm.
|
||||
|
||||
|
||||
White kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \alpha*\delta(x-y)
|
||||
k(x,y) = \\alpha*\\delta(x-y)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def sde_update_gradient_full(self, gradients):
|
||||
"""
|
||||
Update gradient in the order in which parameters are represented in the
|
||||
kernel
|
||||
"""
|
||||
|
||||
|
||||
self.variance.gradient = gradients[0]
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
variance = float(self.variance.values)
|
||||
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
variance = float(self.variance.values)
|
||||
|
||||
F = np.array( ((-np.inf,),) )
|
||||
L = np.array( ((1.0,),) )
|
||||
Qc = np.array( ((variance,),) )
|
||||
H = np.array( ((1.0,),) )
|
||||
|
||||
|
||||
Pinf = np.array( ((variance,),) )
|
||||
P0 = Pinf.copy()
|
||||
|
||||
P0 = Pinf.copy()
|
||||
|
||||
dF = np.zeros((1,1,1))
|
||||
dQc = np.zeros((1,1,1))
|
||||
dQc[:,:,0] = np.array( ((1.0,),) )
|
||||
|
||||
|
||||
dPinf = np.zeros((1,1,1))
|
||||
dPinf[:,:,0] = np.array( ((1.0,),) )
|
||||
dP0 = dPinf.copy()
|
||||
|
||||
|
||||
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
|
||||
|
||||
|
||||
class sde_Bias(Bias):
|
||||
"""
|
||||
|
||||
|
||||
Class provide extra functionality to transfer this covariance function into
|
||||
SDE forrm.
|
||||
|
||||
|
||||
Bias kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \alpha
|
||||
k(x,y) = \\alpha
|
||||
|
||||
"""
|
||||
def sde_update_gradient_full(self, gradients):
|
||||
|
|
@ -76,28 +76,28 @@ class sde_Bias(Bias):
|
|||
Update gradient in the order in which parameters are represented in the
|
||||
kernel
|
||||
"""
|
||||
|
||||
|
||||
self.variance.gradient = gradients[0]
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
variance = float(self.variance.values)
|
||||
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
variance = float(self.variance.values)
|
||||
|
||||
F = np.array( ((0.0,),))
|
||||
L = np.array( ((1.0,),))
|
||||
Qc = np.zeros((1,1))
|
||||
H = np.array( ((1.0,),))
|
||||
|
||||
|
||||
Pinf = np.zeros((1,1))
|
||||
P0 = np.array( ((variance,),) )
|
||||
|
||||
P0 = np.array( ((variance,),) )
|
||||
|
||||
dF = np.zeros((1,1,1))
|
||||
dQc = np.zeros((1,1,1))
|
||||
|
||||
|
||||
dPinf = np.zeros((1,1,1))
|
||||
dP0 = np.zeros((1,1,1))
|
||||
dP0[:,:,0] = np.array( ((1.0,),) )
|
||||
|
||||
|
||||
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
|
||||
|
|
@ -29,7 +29,7 @@ class sde_RBF(RBF):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -102,7 +102,7 @@ class sde_RBF(RBF):
|
|||
eps = 1e-12
|
||||
if (float(Qc) > 1.0 / eps) or (float(Qc) < eps):
|
||||
warnings.warn(
|
||||
"""sde_RBF kernel: the noise variance Qc is either very large or very small.
|
||||
"""sde_RBF kernel: the noise variance Qc is either very large or very small.
|
||||
It influece conditioning of P_inf: {0:e}""".format(
|
||||
float(Qc)
|
||||
)
|
||||
|
|
@ -204,7 +204,7 @@ class sde_Exponential(Exponential):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -259,7 +259,7 @@ class sde_RatQuad(RatQuad):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \\alpha} \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -24,19 +24,19 @@ class StdPeriodic(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} \sum_{i=1}^{input\_dim}
|
||||
\left( \frac{\sin(\frac{\pi}{T_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
|
||||
k(x,y) = \\theta_1 \\exp \\left[ - \\frac{1}{2} \\sum_{i=1}^{input\\_dim}
|
||||
\\left( \\frac{\\sin(\\frac{\\pi}{T_i} (x_i - y_i) )}{l_i} \\right)^2 \\right] }
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\theta_1` in the formula above
|
||||
:param variance: the variance :math:`\\theta_1` in the formula above
|
||||
:type variance: float
|
||||
:param period: the vector of periods :math:`\T_i`. If None then 1.0 is assumed.
|
||||
:param period: the vector of periods :math:`\\T_i`. If None then 1.0 is assumed.
|
||||
:type period: array or list of the appropriate size (or float if there is only one period parameter)
|
||||
:param lengthscale: the vector of lengthscale :math:`\l_i`. If None then 1.0 is assumed.
|
||||
:param lengthscale: the vector of lengthscale :math:`\\l_i`. If None then 1.0 is assumed.
|
||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||
:param ARD1: Auto Relevance Determination with respect to period.
|
||||
If equal to "False" one single period parameter :math:`\T_i` for
|
||||
If equal to "False" one single period parameter :math:`\\T_i` for
|
||||
each dimension is assumed, otherwise there is one lengthscale
|
||||
parameter per dimension.
|
||||
:type ARD1: Boolean
|
||||
|
|
@ -177,7 +177,7 @@ class StdPeriodic(Kern):
|
|||
Returns only diagonal elements.
|
||||
"""
|
||||
return np.zeros(X.shape[0])
|
||||
|
||||
|
||||
def dK2_dXdX2(self, X, X2, dimX, dimX2):
|
||||
"""
|
||||
Compute the second derivative of K with respect to:
|
||||
|
|
@ -578,9 +578,9 @@ class StdPeriodic(Kern):
|
|||
X2 = X
|
||||
dX = -np.pi*((dL_dK*K)[:,:,None]*np.sin(2*np.pi/self.period*(X[:,None,:] - X2[None,:,:]))/(2.*np.square(self.lengthscale)*self.period)).sum(1)
|
||||
return dX
|
||||
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
return np.zeros(X.shape)
|
||||
|
||||
|
||||
def input_sensitivity(self, summarize=True):
|
||||
return self.variance*np.ones(self.input_dim)/self.lengthscale**2
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ class Stationary(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
r(x, x') = \\sqrt{ \\sum_{q=1}^Q \\frac{(x_q - x'_q)^2}{\ell_q^2} }.
|
||||
r(x, x') = \\sqrt{ \\sum_{q=1}^Q \\frac{(x_q - x'_q)^2}{\\ell_q^2} }.
|
||||
|
||||
By default, there's only one lengthscale: seaprate lengthscales for each
|
||||
dimension can be enables by setting ARD=True.
|
||||
|
|
@ -153,7 +153,7 @@ class Stationary(Kern):
|
|||
Efficiently compute the scaled distance, r.
|
||||
|
||||
..math::
|
||||
r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
|
||||
r = \\sqrt( \\sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
|
||||
|
||||
Note that if thre is only one lengthscale, l comes outside the sum. In
|
||||
this case we compute the unscaled distance first (in a separate
|
||||
|
|
@ -259,7 +259,7 @@ class Stationary(Kern):
|
|||
the returned array is of shape [NxNxQxQ].
|
||||
|
||||
..math:
|
||||
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
|
||||
\\frac{\\partial^2 K}{\\partial X2 ^2} = - \\frac{\\partial^2 K}{\\partial X\\partial X2}
|
||||
|
||||
..returns:
|
||||
dL2_dXdX2: [NxMxQxQ] in the cov=True case, or [NxMxQ] in the cov=False case,
|
||||
|
|
@ -295,7 +295,7 @@ class Stationary(Kern):
|
|||
Given the derivative of the objective dL_dK, compute the second derivative of K wrt X:
|
||||
|
||||
..math:
|
||||
\frac{\partial^2 K}{\partial X\partial X}
|
||||
\\frac{\\partial^2 K}{\\partial X\\partial X}
|
||||
|
||||
..returns:
|
||||
dL2_dXdX: [NxQxQ]
|
||||
|
|
@ -423,7 +423,7 @@ class OU(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 \\exp(- r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -460,7 +460,7 @@ class Matern32(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \\exp(- \\sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -559,7 +559,7 @@ class Matern52(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{5} r + \\frac53 r^2) \\exp(- \\sqrt{5} r)
|
||||
"""
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
|
||||
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
|
|
@ -626,7 +626,7 @@ class ExpQuad(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp(- 0.5 r^2)
|
||||
k(r) = \\sigma^2 \\exp(- 0.5 r^2)
|
||||
|
||||
notes::
|
||||
- This is exactly the same as the RBF covariance function, but the
|
||||
|
|
@ -664,10 +664,10 @@ class ExpQuad(Stationary):
|
|||
class Cosine(Stationary):
|
||||
"""
|
||||
Cosine Covariance function
|
||||
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \cos(r)
|
||||
k(r) = \\sigma^2 \\cos(r)
|
||||
|
||||
"""
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Cosine'):
|
||||
|
|
@ -682,18 +682,18 @@ class Cosine(Stationary):
|
|||
class ExpQuadCosine(Stationary):
|
||||
"""
|
||||
Exponentiated quadratic multiplied by cosine covariance function (spectral mixture kernel).
|
||||
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp(-2\pi^2r^2)\cos(2\pi r/T)
|
||||
k(r) = \\sigma^2 \\exp(-2\\pi^2r^2)\\cos(2\\pi r/T)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, period=1., ARD=False, active_dims=None, name='ExpQuadCosine'):
|
||||
super(ExpQuadCosine, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
self.period = Param('period', period, Logexp())
|
||||
self.link_parameters(self.period)
|
||||
|
||||
|
||||
def K_of_r(self, r):
|
||||
return self.variance * np.exp(-2*np.pi**2*r**2)*np.cos(2*np.pi*r/self.period)
|
||||
|
||||
|
|
@ -712,18 +712,18 @@ class ExpQuadCosine(Stationary):
|
|||
super(ExpQuadCosine, self).update_gradients_diag(dL_dKdiag, X)
|
||||
self.period.gradient = 0.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class Sinc(Stationary):
|
||||
"""
|
||||
Sinc Covariance function
|
||||
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \sinc(\pi r)
|
||||
k(r) = \\sigma^2 \\sinc(\\pi r)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Sinc'):
|
||||
super(Sinc, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
|
||||
|
|
@ -734,7 +734,7 @@ class Sinc(Stationary):
|
|||
# small angle approximation to avoid divide by zero errors.
|
||||
return np.where(r<1e-5, -self.variance*4/3*np.pi*np.pi*r, self.variance/r * (np.cos(2*np.pi*r)-np.sinc(2*r)))
|
||||
|
||||
|
||||
|
||||
|
||||
class RatQuad(Stationary):
|
||||
"""
|
||||
|
|
@ -742,7 +742,7 @@ class RatQuad(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \\alpha}
|
||||
k(r) = \\sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \\alpha}
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -14,23 +14,23 @@ class Eq_ode1(Kernpart):
|
|||
|
||||
This outputs of this kernel have the form
|
||||
.. math::
|
||||
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
|
||||
\\frac{\\text{d}y_j}{\\text{d}t} = \\sum_{i=1}^R w_{j,i} f_i(t-\\delta_j) +\\sqrt{\\kappa_j}g_j(t) - d_jy_j(t)
|
||||
|
||||
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
|
||||
|
||||
|
||||
:param output_dim: number of outputs driven by latent function.
|
||||
:type output_dim: int
|
||||
:param W: sensitivities of each output to the latent driving function.
|
||||
:param W: sensitivities of each output to the latent driving function.
|
||||
:type W: ndarray (output_dim x rank).
|
||||
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
|
||||
:type rank: int
|
||||
:param decay: decay rates for the first order system.
|
||||
:param decay: decay rates for the first order system.
|
||||
:type decay: array of length output_dim.
|
||||
:param delay: delay between latent force and output response.
|
||||
:type delay: array of length output_dim.
|
||||
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
|
||||
:type kappa: array of length output_dim.
|
||||
|
||||
|
||||
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
|
||||
"""
|
||||
def __init__(self,output_dim, W=None, rank=1, kappa=None, lengthscale=1.0, decay=None, delay=None):
|
||||
|
|
@ -62,7 +62,7 @@ class Eq_ode1(Kernpart):
|
|||
self.is_stationary = False
|
||||
self.gaussian_initial = False
|
||||
self._set_params(self._get_params())
|
||||
|
||||
|
||||
def _get_params(self):
|
||||
param_list = [self.W.flatten()]
|
||||
if self.kappa is not None:
|
||||
|
|
@ -103,11 +103,11 @@ class Eq_ode1(Kernpart):
|
|||
param_names += ['decay_%i'%i for i in range(1,self.output_dim)]
|
||||
if self.delay is not None:
|
||||
param_names += ['delay_%i'%i for i in 1+range(1,self.output_dim)]
|
||||
param_names+= ['lengthscale']
|
||||
param_names+= ['lengthscale']
|
||||
return param_names
|
||||
|
||||
def K(self,X,X2,target):
|
||||
|
||||
|
||||
if X.shape[1] > 2:
|
||||
raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
|
||||
|
||||
|
|
@ -123,13 +123,13 @@ class Eq_ode1(Kernpart):
|
|||
def Kdiag(self,index,target):
|
||||
#target += np.diag(self.B)[np.asarray(index,dtype=int).flatten()]
|
||||
pass
|
||||
|
||||
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
|
||||
|
||||
# First extract times and indices.
|
||||
self._extract_t_indices(X, X2, dL_dK=dL_dK)
|
||||
self._dK_ode_dtheta(target)
|
||||
|
||||
|
||||
|
||||
def _dK_ode_dtheta(self, target):
|
||||
"""Do all the computations for the ode parts of the covariance function."""
|
||||
|
|
@ -138,7 +138,7 @@ class Eq_ode1(Kernpart):
|
|||
index_ode = self._index[self._index>0]-1
|
||||
if self._t2 is None:
|
||||
if t_ode.size==0:
|
||||
return
|
||||
return
|
||||
t2_ode = t_ode
|
||||
dL_dK_ode = dL_dK_ode[:, self._index>0]
|
||||
index2_ode = index_ode
|
||||
|
|
@ -210,7 +210,7 @@ class Eq_ode1(Kernpart):
|
|||
self._index = self._index[self._order]
|
||||
self._t = self._t[self._order]
|
||||
self._rorder = self._order.argsort() # rorder is for reversing the order
|
||||
|
||||
|
||||
if X2 is None:
|
||||
self._t2 = None
|
||||
self._index2 = None
|
||||
|
|
@ -229,7 +229,7 @@ class Eq_ode1(Kernpart):
|
|||
if dL_dK is not None:
|
||||
self._dL_dK = dL_dK[self._order, :]
|
||||
self._dL_dK = self._dL_dK[:, self._order2]
|
||||
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
"""Perform main body of computations for the ode1 covariance function."""
|
||||
# First extract times and indices.
|
||||
|
|
@ -253,8 +253,8 @@ class Eq_ode1(Kernpart):
|
|||
np.hstack((self._K_ode_eq, self._K_ode))))
|
||||
self._K_dvar = self._K_dvar[self._rorder, :]
|
||||
self._K_dvar = self._K_dvar[:, self._rorder2]
|
||||
|
||||
|
||||
|
||||
|
||||
if X2 is None:
|
||||
# Matrix giving scales of each output
|
||||
self._scale = np.zeros((self._t.size, self._t.size))
|
||||
|
|
@ -303,7 +303,7 @@ class Eq_ode1(Kernpart):
|
|||
self._K_eq = np.zeros((t_eq.size, t2_eq.size))
|
||||
return
|
||||
self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
|
||||
|
||||
|
||||
self._K_eq = np.exp(-self._dist2/(2*self.lengthscale*self.lengthscale))
|
||||
if self.is_normalized:
|
||||
self._K_eq/=(np.sqrt(2*np.pi)*self.lengthscale)
|
||||
|
|
@ -361,7 +361,7 @@ class Eq_ode1(Kernpart):
|
|||
self._K_eq_ode = sK.T
|
||||
else:
|
||||
self._K_ode_eq = sK
|
||||
|
||||
|
||||
def _K_compute_ode(self):
|
||||
# Compute covariances between outputs of the ODE models.
|
||||
|
||||
|
|
@ -370,7 +370,7 @@ class Eq_ode1(Kernpart):
|
|||
if self._t2 is None:
|
||||
if t_ode.size==0:
|
||||
self._K_ode = np.zeros((0, 0))
|
||||
return
|
||||
return
|
||||
t2_ode = t_ode
|
||||
index2_ode = index_ode
|
||||
else:
|
||||
|
|
@ -379,14 +379,14 @@ class Eq_ode1(Kernpart):
|
|||
self._K_ode = np.zeros((t_ode.size, t2_ode.size))
|
||||
return
|
||||
index2_ode = self._index2[self._index2>0]-1
|
||||
|
||||
|
||||
# When index is identical
|
||||
h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary)
|
||||
|
||||
if self._t2 is None:
|
||||
self._K_ode = 0.5 * (h + h.T)
|
||||
else:
|
||||
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)
|
||||
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)
|
||||
self._K_ode = 0.5 * (h + h2.T)
|
||||
|
||||
if not self.is_normalized:
|
||||
|
|
@ -410,28 +410,28 @@ class Eq_ode1(Kernpart):
|
|||
Decay = self.decay[index]
|
||||
if self.delay is not None:
|
||||
t = t - self.delay[index]
|
||||
|
||||
|
||||
t_squared = t*t
|
||||
half_sigma_decay = 0.5*self.sigma*Decay
|
||||
[ln_part_1, sign1] = ln_diff_erfs(half_sigma_decay + t/self.sigma,
|
||||
half_sigma_decay)
|
||||
|
||||
|
||||
[ln_part_2, sign2] = ln_diff_erfs(half_sigma_decay,
|
||||
half_sigma_decay - t/self.sigma)
|
||||
|
||||
|
||||
h = (sign1*np.exp(half_sigma_decay*half_sigma_decay
|
||||
+ ln_part_1
|
||||
- log(Decay + D_j))
|
||||
- log(Decay + D_j))
|
||||
- sign2*np.exp(half_sigma_decay*half_sigma_decay
|
||||
- (Decay + D_j)*t
|
||||
+ ln_part_2
|
||||
+ ln_part_2
|
||||
- log(Decay + D_j)))
|
||||
|
||||
|
||||
sigma2 = self.sigma*self.sigma
|
||||
|
||||
if update_derivatives:
|
||||
|
||||
dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h
|
||||
|
||||
dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h
|
||||
+ t*sign2*np.exp(
|
||||
half_sigma_decay*half_sigma_decay-(Decay+D_j)*t + ln_part_2
|
||||
)
|
||||
|
|
@ -439,11 +439,11 @@ class Eq_ode1(Kernpart):
|
|||
(-1 + np.exp(-t_squared/sigma2-Decay*t)
|
||||
+ np.exp(-t_squared/sigma2-D_j*t)
|
||||
- np.exp(-(Decay + D_j)*t)))
|
||||
|
||||
|
||||
dh_dD_i = (dh_dD_i/(Decay+D_j)).real
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
dh_dD_j = (t*sign2*np.exp(
|
||||
half_sigma_decay*half_sigma_decay-(Decay + D_j)*t+ln_part_2
|
||||
)
|
||||
|
|
@ -457,7 +457,7 @@ class Eq_ode1(Kernpart):
|
|||
- (-t/sigma2-Decay/2)*np.exp(-t_squared/sigma2 - D_j*t) \
|
||||
- Decay/2*np.exp(-(Decay+D_j)*t))"""
|
||||
pass
|
||||
|
||||
|
||||
def _compute_H(self, t, index, t2, index2, update_derivatives=False, stationary=False):
|
||||
"""Helper function for computing part of the ode1 covariance function.
|
||||
|
||||
|
|
@ -491,7 +491,7 @@ class Eq_ode1(Kernpart):
|
|||
inv_sigma_diff_t = 1./self.sigma*diff_t
|
||||
half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]
|
||||
|
||||
ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma,
|
||||
ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma,
|
||||
half_sigma_decay_i - inv_sigma_diff_t,
|
||||
return_sign=True)
|
||||
ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
|
||||
|
|
@ -529,7 +529,7 @@ class Eq_ode1(Kernpart):
|
|||
)
|
||||
))
|
||||
self._dh_ddecay = (dh_ddecay/(Decay[:, None]+Decay2[None, :])).real
|
||||
|
||||
|
||||
# Update jth decay gradient
|
||||
dh_ddecay2 = (t2_mat*sign2
|
||||
*np.exp(
|
||||
|
|
@ -539,7 +539,7 @@ class Eq_ode1(Kernpart):
|
|||
)
|
||||
-h)
|
||||
self._dh_ddecay2 = (dh_ddecay/(Decay[:, None] + Decay2[None, :])).real
|
||||
|
||||
|
||||
# Update sigma gradient
|
||||
self._dh_dsigma = (half_sigma_decay_i*Decay[:, None]*h
|
||||
+ 2/(np.sqrt(np.pi)
|
||||
|
|
@ -547,10 +547,10 @@ class Eq_ode1(Kernpart):
|
|||
*((-diff_t/sigma2-Decay[:, None]/2)
|
||||
*np.exp(-diff_t*diff_t/sigma2)
|
||||
+ (-t2_mat/sigma2+Decay[:, None]/2)
|
||||
*np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat)
|
||||
- (-t_mat/sigma2-Decay[:, None]/2)
|
||||
*np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat)
|
||||
*np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat)
|
||||
- (-t_mat/sigma2-Decay[:, None]/2)
|
||||
*np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat)
|
||||
- Decay[:, None]/2
|
||||
*np.exp(-(Decay[:, None]*t_mat+Decay2[None, :]*t2_mat))))
|
||||
|
||||
|
||||
return h
|
||||
|
|
|
|||
|
|
@ -9,13 +9,13 @@ import GPy
|
|||
|
||||
class Gibbs(Kernpart):
|
||||
"""
|
||||
Gibbs non-stationary covariance function.
|
||||
Gibbs non-stationary covariance function.
|
||||
|
||||
.. math::
|
||||
|
||||
|
||||
r = sqrt((x_i - x_j)'*(x_i - x_j))
|
||||
|
||||
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
|
||||
|
||||
k(x_i, x_j) = \\sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
|
||||
|
||||
Z = (2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')^{q/2}
|
||||
|
||||
|
|
@ -25,18 +25,18 @@ class Gibbs(Kernpart):
|
|||
with input location. This leads to an additional term in front of
|
||||
the kernel.
|
||||
|
||||
The parameters are :math:`\sigma^2`, the process variance, and
|
||||
The parameters are :math:`\\sigma^2`, the process variance, and
|
||||
the parameters of l(x) which is a function that can be
|
||||
specified by the user, by default an multi-layer peceptron is
|
||||
used.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\\sigma^2`
|
||||
:type variance: float
|
||||
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
|
||||
:type mapping: GPy.core.Mapping
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \\sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
|
|
@ -113,7 +113,7 @@ class Gibbs(Kernpart):
|
|||
target += 2.*self.mapping.df_dX(self._dL_dl[:, None], X)
|
||||
else:
|
||||
target += self.mapping.df_dX(self._dL_dl[:, None], X)
|
||||
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X."""
|
||||
pass
|
||||
|
|
@ -123,7 +123,7 @@ class Gibbs(Kernpart):
|
|||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
|
||||
|
||||
|
||||
def _K_computations(self, X, X2=None):
|
||||
"""Pre-computations for the covariance function (used both when computing the covariance and its gradients). Here self._dK_dvar and self._K_dist2 are updated."""
|
||||
self._lengthscales=self.mapping.f(X)
|
||||
|
|
@ -146,7 +146,7 @@ class Gibbs(Kernpart):
|
|||
"""Pre-computations for the gradients of the covaraince function. Here the gradient of the covariance with respect to all the individual lengthscales is computed.
|
||||
:param dL_dK: the gradient of the objective with respect to the covariance function.
|
||||
:type dL_dK: ndarray"""
|
||||
|
||||
|
||||
self._dL_dl = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales_two.T**4 - self._lengthscales**4) + 2*self._lengthscales2*self._K_dist2)/(self._w2*self._w2*self._lengthscales)).sum(1)
|
||||
if self._lengthscales_two is self._lengthscales:
|
||||
self._dL_dl_two = None
|
||||
|
|
|
|||
|
|
@ -19,11 +19,11 @@ class Hetero(Kernpart):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x_i, x_j) = \delta_{i,j} \sigma^2(x_i)
|
||||
k(x_i, x_j) = \\delta_{i,j} \\sigma^2(x_i)
|
||||
|
||||
where :math:`\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
|
||||
where :math:`\\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\\delta_{i,j}` is the Kronecker delta function.
|
||||
|
||||
The parameters are the parameters of \sigma^2(x) which is a
|
||||
The parameters are the parameters of \\sigma^2(x) which is a
|
||||
function that can be specified by the user, by default an
|
||||
multi-layer peceptron is used.
|
||||
|
||||
|
|
|
|||
|
|
@ -11,28 +11,28 @@ class POLY(Kernpart):
|
|||
Polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel:
|
||||
|
||||
.. math::
|
||||
k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
|
||||
k(x, y) = \\sigma^2\\*(\\sigma_w^2 x'y+\\sigma_b^b)^d
|
||||
|
||||
The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
|
||||
(weight_variance), :math:`\sigma^2_b` (bias_variance) and d
|
||||
The kernel parameters are :math:`\\sigma^2` (variance), :math:`\\sigma^2_w`
|
||||
(weight_variance), :math:`\\sigma^2_b` (bias_variance) and d
|
||||
(degree). Only gradients of the first three are provided for
|
||||
kernel optimisation, it is assumed that polynomial degree would
|
||||
be set by hand.
|
||||
|
||||
The kernel is not recommended as it is badly behaved when the
|
||||
:math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
|
||||
:math:`\\sigma^2_w\\*x'\\*y + \\sigma^2_b` has a magnitude greater than one. For completeness
|
||||
there is an automatic relevance determination version of this
|
||||
kernel provided (NOTE YET IMPLEMENTED!).
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\\sigma^2`
|
||||
:type variance: float
|
||||
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
|
||||
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\\sigma^2_w`
|
||||
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
|
||||
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
|
||||
:param bias_variance: the variance of the prior over bias parameters :math:`\\sigma^2_b`
|
||||
:param degree: the degree of the polynomial.
|
||||
:type degree: int
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
|
|
@ -93,7 +93,7 @@ class POLY(Kernpart):
|
|||
base_cov_grad = base*dL_dK
|
||||
|
||||
|
||||
|
||||
|
||||
target[0] += np.sum(self._K_dvar*dL_dK)
|
||||
target[1] += (self._K_inner_prod*base_cov_grad).sum()
|
||||
target[2] += base_cov_grad.sum()
|
||||
|
|
@ -107,14 +107,14 @@ class POLY(Kernpart):
|
|||
target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
else:
|
||||
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X"""
|
||||
self._K_diag_computations(X)
|
||||
arg = self._K_diag_poly_arg
|
||||
target += 2.*self.weight_variance*self.degree*self.variance*X*dL_dKdiag[:, None]*(arg**(self.degree-1))[:, None]
|
||||
|
||||
|
||||
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
if self.ARD:
|
||||
pass
|
||||
|
|
@ -133,6 +133,6 @@ class POLY(Kernpart):
|
|||
self._K_diag_poly_arg = (X*X).sum(1)*self.weight_variance + self.bias_variance
|
||||
self._K_diag_dvar = self._K_diag_poly_arg**self.degree
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -15,9 +15,9 @@ class RBFInv(RBF):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
|
||||
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \\ \\text{ where } r^2 = \\sum_{i=1}^d \\frac{ (x_i-x^\\prime_i)^2}{\\ell_i^2}
|
||||
|
||||
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
|
||||
where \\ell_i is the lengthscale, \\sigma^2 the variance and d the dimensionality of the input.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
|
|
@ -25,7 +25,7 @@ class RBFInv(RBF):
|
|||
:type variance: float
|
||||
:param lengthscale: the vector of lengthscale of the kernel
|
||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \\ell), otherwise there is one lengthscale parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
|
|
|
|||
|
|
@ -14,15 +14,15 @@ class TruncLinear(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
|
||||
k(x,y) = \\sum_{i=1}^input_dim \\sigma^2_i \\max(0, x_iy_i - \\sigma_q)
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variances: the vector of variances :math:`\sigma^2_i`
|
||||
:param variances: the vector of variances :math:`\\sigma^2_i`
|
||||
:type variances: array or list of the appropriate size (or float if there
|
||||
is only one variance parameter)
|
||||
:param ARD: Auto Relevance Determination. If False, the kernel has only one
|
||||
variance parameter \sigma^2, otherwise there is one variance
|
||||
variance parameter \\sigma^2, otherwise there is one variance
|
||||
parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
|
@ -113,15 +113,15 @@ class TruncLinear_inf(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
|
||||
k(x,y) = \\sum_{i=1}^input_dim \\sigma^2_i \\max(0, x_iy_i - \\sigma_q)
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variances: the vector of variances :math:`\sigma^2_i`
|
||||
:param variances: the vector of variances :math:`\\sigma^2_i`
|
||||
:type variances: array or list of the appropriate size (or float if there
|
||||
is only one variance parameter)
|
||||
:param ARD: Auto Relevance Determination. If False, the kernel has only one
|
||||
variance parameter \sigma^2, otherwise there is one variance
|
||||
variance parameter \\sigma^2, otherwise there is one variance
|
||||
parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
|
|
|||
|
|
@ -243,7 +243,7 @@ class Bernoulli(Likelihood):
|
|||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
#d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3))
|
||||
state = np.seterr(divide='ignore')
|
||||
# TODO check y \in {0, 1} or {-1, 1}
|
||||
# TODO check y \\in {0, 1} or {-1, 1}
|
||||
d3logpdf_dlink3 = np.where(y==1, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3))
|
||||
np.seterr(**state)
|
||||
return d3logpdf_dlink3
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ class Exponential(Likelihood):
|
|||
Y is expected to take values in {0,1,2,...}
|
||||
-----
|
||||
$$
|
||||
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
||||
L(x) = \\exp(\\lambda) * \\lambda**Y_i / Y_i!
|
||||
$$
|
||||
"""
|
||||
def __init__(self,gp_link=None):
|
||||
|
|
@ -46,7 +46,7 @@ class Exponential(Likelihood):
|
|||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
|
||||
|
||||
:param link_f: latent variables (link(f))
|
||||
:type link_f: Nx1 array
|
||||
|
|
@ -65,7 +65,7 @@ class Exponential(Likelihood):
|
|||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
|
||||
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
|
||||
|
||||
:param link_f: latent variables (f)
|
||||
:type link_f: Nx1 array
|
||||
|
|
@ -87,7 +87,7 @@ class Exponential(Likelihood):
|
|||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
|
||||
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
|
|
@ -110,7 +110,7 @@ class Exponential(Likelihood):
|
|||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ class Gamma(Likelihood):
|
|||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables (link(f))
|
||||
|
|
@ -101,7 +101,7 @@ class Gamma(Likelihood):
|
|||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
|
|
@ -126,7 +126,7 @@ class Gamma(Likelihood):
|
|||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ class Likelihood(Parameterized):
|
|||
Calculation of the log predictive density
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
|
|
@ -199,7 +199,7 @@ class Likelihood(Parameterized):
|
|||
|
||||
.. math:
|
||||
log p(y_{*}|D) = log 1/num_samples prod^{S}_{s=1} p(y_{*}|f_{*s})
|
||||
f_{*s} ~ p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
f_{*s} ~ p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@ class ScaledProbit(Probit):
|
|||
"""
|
||||
def __init__(self, nu=1.):
|
||||
self.nu = float(nu)
|
||||
|
||||
|
||||
def transf(self,f):
|
||||
return std_norm_cdf(f*self.nu)
|
||||
|
||||
|
|
@ -157,7 +157,7 @@ class ScaledProbit(Probit):
|
|||
|
||||
def d3transf_df3(self,f):
|
||||
return (safe_square(f*self.nu)-1.)*std_norm_pdf(f*self.nu)*(self.nu**3)
|
||||
|
||||
|
||||
def to_dict(self):
|
||||
"""
|
||||
Convert the object into a json serializable dictionary.
|
||||
|
|
@ -180,7 +180,7 @@ class Cloglog(GPTransformation):
|
|||
|
||||
or
|
||||
|
||||
f = \log (-\log(1-p))
|
||||
f = \\log (-\\log(1-p))
|
||||
|
||||
"""
|
||||
def transf(self,f):
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ class Poisson(Likelihood):
|
|||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = -\\lambda(f_{i}) + y_{i}\\log \\lambda(f_{i}) - \\log y_{i}!
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = -\\lambda(f_{i}) + y_{i}\\log \\lambda(f_{i}) - \\log y_{i}!
|
||||
|
||||
:param link_f: latent variables (link(f))
|
||||
:type link_f: Nx1 array
|
||||
|
|
@ -72,7 +72,7 @@ class Poisson(Likelihood):
|
|||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - 1
|
||||
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - 1
|
||||
|
||||
:param link_f: latent variables (f)
|
||||
:type link_f: Nx1 array
|
||||
|
|
@ -92,7 +92,7 @@ class Poisson(Likelihood):
|
|||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{-y_{i}}{\\lambda(f_{i})^{2}}
|
||||
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{-y_{i}}{\\lambda(f_{i})^{2}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
|
|
@ -113,7 +113,7 @@ class Poisson(Likelihood):
|
|||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f_{i})^{3}}
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f_{i})^{3}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ class StudentT(Likelihood):
|
|||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
|
||||
|
||||
:param inv_link_f: latent variables (link(f))
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
@ -107,7 +107,7 @@ class StudentT(Likelihood):
|
|||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
|
||||
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\\lambda(f_{i}))}{(y_{i}-\\lambda(f_{i}))^{2} + \\sigma^{2}v}
|
||||
|
||||
:param inv_link_f: latent variables (f)
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
@ -129,7 +129,7 @@ class StudentT(Likelihood):
|
|||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
|
||||
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
|
||||
|
||||
:param inv_link_f: latent variables inv_link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
@ -154,7 +154,7 @@ class StudentT(Likelihood):
|
|||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \\lambda(f_{i}))^3 - 3(y_{i} - \\lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \\lambda(f_{i})) + \\sigma^{2} v)^3}
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
@ -175,7 +175,7 @@ class StudentT(Likelihood):
|
|||
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
|
||||
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \\lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \\lambda(f_{i}))^{2})}
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
@ -199,7 +199,7 @@ class StudentT(Likelihood):
|
|||
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\\lambda(f_{i}))}{(y_{i}-\\lambda(f_{i}))^2 + \\sigma^2 v)^2}
|
||||
|
||||
:param inv_link_f: latent variables inv_link_f
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
@ -220,7 +220,7 @@ class StudentT(Likelihood):
|
|||
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \\lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \\lambda(f_{i}))^{2})^{3}}
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ class Weibull(Likelihood):
|
|||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables (link(f))
|
||||
|
|
@ -117,7 +117,7 @@ class Weibull(Likelihood):
|
|||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
|
|
@ -150,7 +150,7 @@ class Weibull(Likelihood):
|
|||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ class Additive(Mapping):
|
|||
|
||||
.. math::
|
||||
|
||||
f(\mathbf{x}*) = f_1(\mathbf{x}*) + f_2(\mathbf(x)*)
|
||||
f(\\mathbf{x}*) = f_1(\\mathbf{x}*) + f_2(\\mathbf(x)*)
|
||||
|
||||
:param mapping1: first mapping to add together.
|
||||
:type mapping1: GPy.mappings.Mapping
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ class Compound(Mapping):
|
|||
|
||||
.. math::
|
||||
|
||||
f(\mathbf{x}) = f_2(f_1(\mathbf{x}))
|
||||
f(\\mathbf{x}) = f_2(f_1(\\mathbf{x}))
|
||||
|
||||
:param mapping1: first mapping
|
||||
:type mapping1: GPy.mappings.Mapping
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ class Constant(Mapping):
|
|||
|
||||
.. math::
|
||||
|
||||
F(\mathbf{x}) = c
|
||||
F(\\mathbf{x}) = c
|
||||
|
||||
|
||||
:param input_dim: dimension of input.
|
||||
|
|
|
|||
|
|
@ -12,20 +12,20 @@ class Kernel(Mapping):
|
|||
|
||||
.. math::
|
||||
|
||||
f(\mathbf{x}) = \sum_i \alpha_i k(\mathbf{z}_i, \mathbf{x})
|
||||
f(\\mathbf{x}) = \\sum_i \\alpha_i k(\\mathbf{z}_i, \\mathbf{x})
|
||||
|
||||
or for multple outputs
|
||||
|
||||
.. math::
|
||||
|
||||
f_i(\mathbf{x}) = \sum_j \alpha_{i,j} k(\mathbf{z}_i, \mathbf{x})
|
||||
f_i(\\mathbf{x}) = \\sum_j \\alpha_{i,j} k(\\mathbf{z}_i, \\mathbf{x})
|
||||
|
||||
|
||||
:param input_dim: dimension of input.
|
||||
:type input_dim: int
|
||||
:param output_dim: dimension of output.
|
||||
:type output_dim: int
|
||||
:param Z: input observations containing :math:`\mathbf{Z}`
|
||||
:param Z: input observations containing :math:`\\mathbf{Z}`
|
||||
:type Z: ndarray
|
||||
:param kernel: a GPy kernel, defaults to GPy.kern.RBF
|
||||
:type kernel: GPy.kern.kern
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ class Linear(Mapping):
|
|||
|
||||
.. math::
|
||||
|
||||
F(\mathbf{x}) = \mathbf{A} \mathbf{x})
|
||||
F(\\mathbf{x}) = \\mathbf{A} \\mathbf{x})
|
||||
|
||||
|
||||
:param input_dim: dimension of input.
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ class GPKroneckerGaussianRegression(Model):
|
|||
|
||||
.. rubric:: References
|
||||
|
||||
.. [stegle_et_al_2011] Stegle, O.; Lippert, C.; Mooij, J.M.; Lawrence, N.D.; Borgwardt, K.:Efficient inference in matrix-variate Gaussian models with \iid observation noise. In: Advances in Neural Information Processing Systems, 2011, Pages 630-638
|
||||
.. [stegle_et_al_2011] Stegle, O.; Lippert, C.; Mooij, J.M.; Lawrence, N.D.; Borgwardt, K.:Efficient inference in matrix-variate Gaussian models with \\iid observation noise. In: Advances in Neural Information Processing Systems, 2011, Pages 630-638
|
||||
|
||||
"""
|
||||
def __init__(self, X1, X2, Y, kern1, kern2, noise_var=1., name='KGPR'):
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ class SSMRD(Model):
|
|||
|
||||
@Model.optimizer_array.setter
|
||||
def optimizer_array(self, p):
|
||||
if self.mpi_comm != None:
|
||||
if self.mpi_comm is not None:
|
||||
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank == 0:
|
||||
self.mpi_comm.Bcast(np.int32(1), root=0)
|
||||
self.mpi_comm.Bcast(p, root=0)
|
||||
|
|
|
|||
|
|
@ -4002,10 +4002,10 @@ class ContDescrStateSpace(DescreteStateSpace):
|
|||
"""
|
||||
Linear Time-Invariant Stochastic Differential Equation (LTI SDE):
|
||||
|
||||
dx(t) = F x(t) dt + L d \beta ,where
|
||||
dx(t) = F x(t) dt + L d \\beta ,where
|
||||
|
||||
x(t): (vector) stochastic process
|
||||
\beta: (vector) Brownian motion process
|
||||
\\beta: (vector) Brownian motion process
|
||||
F, L: (time invariant) matrices of corresponding dimensions
|
||||
Qc: covariance of noise.
|
||||
|
||||
|
|
@ -4022,7 +4022,7 @@ class ContDescrStateSpace(DescreteStateSpace):
|
|||
F,L: LTI SDE matrices of corresponding dimensions
|
||||
|
||||
Qc: matrix (n,n)
|
||||
Covarince between different dimensions of noise \beta.
|
||||
Covarince between different dimensions of noise \\beta.
|
||||
n is the dimensionality of the noise.
|
||||
|
||||
dt: double or iterable
|
||||
|
|
|
|||
|
|
@ -171,7 +171,7 @@ class TPRegression(Model):
|
|||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
The log marginal likelihood of the model, :math:`p(\mathbf{y})`, this is the objective function of the model being optimised
|
||||
The log marginal likelihood of the model, :math:`p(\\mathbf{y})`, this is the objective function of the model being optimised
|
||||
"""
|
||||
return self._log_marginal_likelihood or self.inference()[1]
|
||||
|
||||
|
|
@ -184,10 +184,10 @@ class TPRegression(Model):
|
|||
diagonal of the covariance is returned.
|
||||
|
||||
.. math::
|
||||
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
|
||||
= MVN\left(\nu + N,f*| K_{x*x}(K_{xx})^{-1}Y,
|
||||
\frac{\nu + \beta - 2}{\nu + N - 2}K_{x*x*} - K_{xx*}(K_{xx})^{-1}K_{xx*}\right)
|
||||
\nu := \texttt{Degrees of freedom}
|
||||
p(f*|X*, X, Y) = \\int^{\\inf}_{\\inf} p(f*|f,X*)p(f|X,Y) df
|
||||
= MVN\\left(\\nu + N,f*| K_{x*x}(K_{xx})^{-1}Y,
|
||||
\\frac{\\nu + \\beta - 2}{\\nu + N - 2}K_{x*x*} - K_{xx*}(K_{xx})^{-1}K_{xx*}\\right)
|
||||
\\nu := \\texttt{Degrees of freedom}
|
||||
"""
|
||||
mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew,
|
||||
pred_var=self._predictive_variable, full_cov=full_cov)
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ class WarpedGP(GP):
|
|||
the jacobian of the warping function here.
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param x_test: test locations (x_{*})
|
||||
:type x_test: (Nx1) array
|
||||
|
|
|
|||
|
|
@ -167,7 +167,7 @@ class lvm(matplotlib_show):
|
|||
|
||||
def show_sensitivities(self):
|
||||
# A click in the bar chart axis for selection a dimension.
|
||||
if self.sense_axes != None:
|
||||
if self.sense_axes is not None:
|
||||
self.sense_axes.cla()
|
||||
self.sense_axes.bar(np.arange(self.model.input_dim), self.model.input_sensitivity(), color='b')
|
||||
|
||||
|
|
|
|||
|
|
@ -159,7 +159,7 @@ def generate_brownian_data(
|
|||
):
|
||||
"""
|
||||
Generate brownian data - data from Brownian motion.
|
||||
First point is always 0, and \Beta(0) = 0 - standard conditions for Brownian motion.
|
||||
First point is always 0, and \\Beta(0) = 0 - standard conditions for Brownian motion.
|
||||
|
||||
Input:
|
||||
--------------------------------
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@
|
|||
The test cases for various inference algorithms
|
||||
"""
|
||||
|
||||
import copy
|
||||
import pickle
|
||||
import numpy as np
|
||||
import GPy
|
||||
|
||||
|
|
@ -146,6 +148,32 @@ class TestInferenceGPEP:
|
|||
< 1e6
|
||||
)
|
||||
|
||||
def test_pickle_copy_EP(self):
|
||||
"""Pickling and deep-copying a classification model employing EP"""
|
||||
|
||||
# Dummy binary classification dataset
|
||||
X = np.array([0, 1, 2, 3]).reshape(-1, 1)
|
||||
Y = np.array([0, 0, 1, 1]).reshape(-1, 1)
|
||||
|
||||
# Some classification model
|
||||
inf = GPy.inference.latent_function_inference.expectation_propagation.EP(
|
||||
max_iters=30, delta=0.5
|
||||
)
|
||||
m = GPy.core.GP(
|
||||
X=X,
|
||||
Y=Y,
|
||||
kernel=GPy.kern.RBF(input_dim=1, variance=1.0, lengthscale=1.0),
|
||||
inference_method = inf,
|
||||
likelihood=GPy.likelihoods.Bernoulli(),
|
||||
mean_function=None
|
||||
)
|
||||
m.optimize()
|
||||
|
||||
m_pickled = pickle.dumps(m)
|
||||
assert pickle.loads(m_pickled) is not None
|
||||
|
||||
assert copy.deepcopy(m) is not None
|
||||
|
||||
# NOTE: adding a test like above for parameterized likelihood- the above test is
|
||||
# only for probit likelihood which does not have any tunable hyperparameter which is why
|
||||
# the term in dictionary of gradients: dL_dthetaL will always be zero. So here we repeat tests for
|
||||
|
|
|
|||
|
|
@ -269,8 +269,8 @@ class TestMisc:
|
|||
from GPy.core.parameterization.variational import NormalPosterior
|
||||
|
||||
X_pred = NormalPosterior(X_pred_mu, X_pred_var)
|
||||
# mu = \int f(x)q(x|mu,S) dx = \int 2x.q(x|mu,S) dx = 2.mu
|
||||
# S = \int (f(x) - m)^2q(x|mu,S) dx = \int f(x)^2 q(x) dx - mu**2 = 4(mu^2 + S) - (2.mu)^2 = 4S
|
||||
# mu = \\int f(x)q(x|mu,S) dx = \\int 2x.q(x|mu,S) dx = 2.mu
|
||||
# S = \\int (f(x) - m)^2q(x|mu,S) dx = \\int f(x)^2 q(x) dx - mu**2 = 4(mu^2 + S) - (2.mu)^2 = 4S
|
||||
Y_mu_true = 2 * X_pred_mu
|
||||
Y_var_true = 4 * X_pred_var
|
||||
Y_mu_pred, Y_var_pred = m.predict_noiseless(X_pred)
|
||||
|
|
@ -684,7 +684,7 @@ class TestMisc:
|
|||
warp_m = GPy.models.WarpedGP(
|
||||
X, Y
|
||||
) # , kernel=warp_k)#, warping_function=warp_f)
|
||||
warp_m[".*\.d"].constrain_fixed(1.0)
|
||||
warp_m[r".*\.d"].constrain_fixed(1.0)
|
||||
warp_m.optimize_restarts(
|
||||
parallel=False, robust=False, num_restarts=5, max_iters=max_iters
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,21 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
import GPy
|
||||
from GPy.core.parameterization.priors import (
|
||||
Gaussian,
|
||||
Uniform,
|
||||
LogGaussian,
|
||||
MultivariateGaussian,
|
||||
Gamma,
|
||||
InverseGamma,
|
||||
DGPLVM,
|
||||
DGPLVM_KFDA,
|
||||
DGPLVM_Lamda,
|
||||
DGPLVM_T,
|
||||
HalfT,
|
||||
Exponential,
|
||||
StudentT,
|
||||
)
|
||||
|
||||
|
||||
class TestPrior:
|
||||
|
|
@ -178,3 +193,90 @@ class TestPrior:
|
|||
# should raise an assertionerror.
|
||||
with pytest.raises(AssertionError):
|
||||
m.rbf.set_prior(gaussian)
|
||||
|
||||
|
||||
def initialize_gaussian_prior() -> None:
|
||||
return Gaussian(0, 1)
|
||||
|
||||
|
||||
def initialize_uniform_prior() -> None:
|
||||
return Uniform(0, 1)
|
||||
|
||||
|
||||
def initialize_log_gaussian_prior() -> None:
|
||||
return LogGaussian(0, 1)
|
||||
|
||||
|
||||
def initialize_multivariate_gaussian_prior() -> None:
|
||||
return MultivariateGaussian(np.zeros(2), np.eye(2))
|
||||
|
||||
|
||||
def initialize_gamma_prior() -> None:
|
||||
return Gamma(1, 1)
|
||||
|
||||
|
||||
def initialize_inverse_gamma_prior() -> None:
|
||||
return InverseGamma(1, 1)
|
||||
|
||||
|
||||
def initialize_dgplvm_prior() -> None:
|
||||
# return DGPLVM(...)
|
||||
raise NotImplementedError("No idea how to initialize this prior")
|
||||
|
||||
|
||||
def initialize_dgplvm_kfda_prior() -> None:
|
||||
# return DGPLVM_KFDA(...)
|
||||
raise NotImplementedError("No idea how to initialize this prior")
|
||||
|
||||
|
||||
def initialize_dgplvm_lamda_prior() -> None:
|
||||
# return DGPLVM_Lamda(...)
|
||||
raise NotImplementedError("No idea how to initialize this prior")
|
||||
|
||||
|
||||
def initialize_dgplvm_t_prior() -> None:
|
||||
# return DGPLVM_T(1, 1, (1, 1))
|
||||
raise NotImplementedError("No idea how to initialize this prior")
|
||||
|
||||
|
||||
def initialize_half_t_prior() -> None:
|
||||
return HalfT(1, 1)
|
||||
|
||||
|
||||
def initialize_exponential_prior() -> None:
|
||||
return Exponential(1)
|
||||
|
||||
|
||||
def initialize_student_t_prior() -> None:
|
||||
return StudentT(1, 1, 1)
|
||||
|
||||
|
||||
PRIORS = {
|
||||
"Gaussian": initialize_gaussian_prior,
|
||||
"Uniform": initialize_uniform_prior,
|
||||
"LogGaussian": initialize_log_gaussian_prior,
|
||||
"MultivariateGaussian": initialize_multivariate_gaussian_prior,
|
||||
"Gamma": initialize_gamma_prior,
|
||||
"InverseGamma": initialize_inverse_gamma_prior,
|
||||
# "DGPLVM": initialize_dgplvm_prior,
|
||||
# "DGPLVM_KFDA": initialize_dgplvm_kfda_prior,
|
||||
# "DGPLVM_Lamda": initialize_dgplvm_lamda_prior,
|
||||
# "DGPLVM_T": initialize_dgplvm_t_prior,
|
||||
"HalfT": initialize_half_t_prior,
|
||||
"Exponential": initialize_exponential_prior,
|
||||
"StudentT": initialize_student_t_prior,
|
||||
}
|
||||
|
||||
|
||||
def check_prior(prior_getter: str) -> None:
|
||||
prior_getter()
|
||||
|
||||
|
||||
def test_priors() -> None:
|
||||
for prior_name, prior_getter in PRIORS.items():
|
||||
try:
|
||||
check_prior(prior_getter)
|
||||
except Exception as e:
|
||||
raise RuntimeError(
|
||||
f"Failed to initialize {prior_name} prior"
|
||||
) from e # noqa E501
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ class TestRVTransformation:
|
|||
# ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Histogram')
|
||||
# ax.plot(phi, kde(phi), '--', linewidth=2, label='Kernel Density Estimation')
|
||||
# ax.plot(phi, pdf_phi, ':', linewidth=2, label='Transformed PDF')
|
||||
# ax.set_xlabel(r'transformed $\theta$', fontsize=16)
|
||||
# ax.set_xlabel(r'transformed $\\theta$', fontsize=16)
|
||||
# ax.set_ylabel('PDF', fontsize=16)
|
||||
# plt.legend(loc='best')
|
||||
# plt.show(block=True)
|
||||
|
|
|
|||
|
|
@ -537,7 +537,7 @@ http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%2
|
|||
# In the notebook they did some data cleaning: remove Javascript header+footer, and translate new Date(....,..,..) into YYYY-MM-DD.
|
||||
header = """// Data table response\ngoogle.visualization.Query.setResponse("""
|
||||
data = data[len(header):-2]
|
||||
data = re.sub('new Date\((\d+),(\d+),(\d+)\)', (lambda m: '"%s-%02d-%02d"' % (m.group(1).strip(), 1+int(m.group(2)), int(m.group(3)))), data)
|
||||
data = re.sub(r'new Date\((\d+),(\d+),(\d+)\)', (lambda m: '"%s-%02d-%02d"' % (m.group(1).strip(), 1+int(m.group(2)), int(m.group(3)))), data)
|
||||
timeseries = json.loads(data)
|
||||
columns = [k['label'] for k in timeseries['table']['cols']]
|
||||
rows = map(lambda x: [k['v'] for k in x['c']], timeseries['table']['rows'])
|
||||
|
|
@ -782,7 +782,7 @@ def hapmap3(data_set='hapmap3'):
|
|||
|
||||
/ 1, iff SNPij==(B1,B1)
|
||||
Aij = | 0, iff SNPij==(B1,B2)
|
||||
\ -1, iff SNPij==(B2,B2)
|
||||
\\ -1, iff SNPij==(B2,B2)
|
||||
|
||||
The SNP data and the meta information (such as iid, sex and phenotype) are
|
||||
stored in the dataframe datadf, index is the Individual ID,
|
||||
|
|
@ -1011,7 +1011,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
|
|||
sample_info.columns = c
|
||||
|
||||
# get the labels right:
|
||||
rep = re.compile('\(.*\)')
|
||||
rep = re.compile(r'\(.*\)')
|
||||
def filter_dev_stage(row):
|
||||
if isnull(row):
|
||||
row = "2-cell stage embryo"
|
||||
|
|
@ -1050,7 +1050,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
|
|||
#gene_info[file_info.name[:-18]] = inner.Refseq_IDs
|
||||
|
||||
# Strip GSM number off data index
|
||||
rep = re.compile('GSM\d+_')
|
||||
rep = re.compile(r'GSM\d+_')
|
||||
|
||||
from pandas import MultiIndex
|
||||
columns = MultiIndex.from_tuples([row.split('_', 1) for row in data.columns])
|
||||
|
|
|
|||
|
|
@ -180,24 +180,24 @@ class NetpbmFile(object):
|
|||
"""Read PAM header and initialize instance."""
|
||||
regroups = re.search(
|
||||
b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|"
|
||||
b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
|
||||
b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
|
||||
rb"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
|
||||
rb"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
|
||||
self.header = regroups[0]
|
||||
self.magicnum = b'P7'
|
||||
for group in regroups[1:]:
|
||||
key, value = group.split()
|
||||
setattr(self, unicode(key).lower(), int(value))
|
||||
matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header)
|
||||
matches = re.findall(rb"(TUPLTYPE\s+\w+)", self.header)
|
||||
self.tupltypes = [s.split(None, 1)[1] for s in matches]
|
||||
|
||||
def _read_pnm_header(self, data):
|
||||
"""Read PNM header and initialize instance."""
|
||||
bpm = data[1:2] in b"14"
|
||||
regroups = re.search(b"".join((
|
||||
b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
|
||||
b"\s*(\d+)\s+(?:#.*[\r\n])*",
|
||||
b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
|
||||
b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
|
||||
rb"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
|
||||
rb"\s*(\d+)\s+(?:#.*[\r\n])*",
|
||||
rb"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
|
||||
rb"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
|
||||
self.header = regroups[0]
|
||||
self.magicnum = regroups[1]
|
||||
self.width = int(regroups[2])
|
||||
|
|
|
|||
|
|
@ -150,7 +150,7 @@ with open('../../GPy/__version__.py', 'r') as f:
|
|||
version = f.read()
|
||||
release = version
|
||||
|
||||
print version
|
||||
print(version)
|
||||
|
||||
# version = '0.8.8'
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue