mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-08 15:05:15 +02:00
Fix invalid escape sequence
This commit is contained in:
parent
aa49220fd1
commit
4fa858ee6d
45 changed files with 224 additions and 224 deletions
|
|
@ -283,7 +283,7 @@ class GP(Model):
|
|||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
The log marginal likelihood of the model, :math:`p(\mathbf{y})`, this is the objective function of the model being optimised
|
||||
The log marginal likelihood of the model, :math:`p(\\mathbf{y})`, this is the objective function of the model being optimised
|
||||
"""
|
||||
return self._log_marginal_likelihood
|
||||
|
||||
|
|
@ -296,9 +296,9 @@ class GP(Model):
|
|||
diagonal of the covariance is returned.
|
||||
|
||||
.. math::
|
||||
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
|
||||
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
|
||||
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
p(f*|X*, X, Y) = \\int^{\\inf}_{\\inf} p(f*|f,X*)p(f|X,Y) df
|
||||
= N(f*| K_{x*x}(K_{xx} + \\Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \\Sigma)^{-1}K_{xx*}
|
||||
\\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
"""
|
||||
mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov)
|
||||
if self.mean_function is not None:
|
||||
|
|
@ -702,7 +702,7 @@ class GP(Model):
|
|||
Calculation of the log predictive density
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param x_test: test locations (x_{*})
|
||||
:type x_test: (Nx1) array
|
||||
|
|
@ -718,7 +718,7 @@ class GP(Model):
|
|||
Calculation of the log predictive density by sampling
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param x_test: test locations (x_{*})
|
||||
:type x_test: (Nx1) array
|
||||
|
|
@ -734,24 +734,24 @@ class GP(Model):
|
|||
|
||||
def _raw_posterior_covariance_between_points(self, X1, X2):
|
||||
"""
|
||||
Computes the posterior covariance between points. Does not account for
|
||||
Computes the posterior covariance between points. Does not account for
|
||||
normalization or likelihood
|
||||
|
||||
:param X1: some input observations
|
||||
:param X2: other input observations
|
||||
|
||||
:returns:
|
||||
:returns:
|
||||
cov: raw posterior covariance: k(X1,X2) - k(X1,X) G^{-1} K(X,X2)
|
||||
"""
|
||||
return self.posterior.covariance_between_points(self.kern, self.X, X1, X2)
|
||||
|
||||
|
||||
def posterior_covariance_between_points(self, X1, X2, Y_metadata=None,
|
||||
likelihood=None,
|
||||
def posterior_covariance_between_points(self, X1, X2, Y_metadata=None,
|
||||
likelihood=None,
|
||||
include_likelihood=True):
|
||||
"""
|
||||
Computes the posterior covariance between points. Includes likelihood
|
||||
variance as well as normalization so that evaluation at (x,x) is consistent
|
||||
Computes the posterior covariance between points. Includes likelihood
|
||||
variance as well as normalization so that evaluation at (x,x) is consistent
|
||||
with model.predict
|
||||
|
||||
:param X1: some input observations
|
||||
|
|
@ -762,8 +762,8 @@ class GP(Model):
|
|||
the predicted underlying latent function f.
|
||||
:type include_likelihood: bool
|
||||
|
||||
:returns:
|
||||
cov: posterior covariance, a Numpy array, Nnew x Nnew if
|
||||
:returns:
|
||||
cov: posterior covariance, a Numpy array, Nnew x Nnew if
|
||||
self.output_dim == 1, and Nnew x Nnew x self.output_dim otherwise.
|
||||
"""
|
||||
|
||||
|
|
@ -774,7 +774,7 @@ class GP(Model):
|
|||
mean, _ = self._raw_predict(X1, full_cov=True)
|
||||
if likelihood is None:
|
||||
likelihood = self.likelihood
|
||||
_, cov = likelihood.predictive_values(mean, cov, full_cov=True,
|
||||
_, cov = likelihood.predictive_values(mean, cov, full_cov=True,
|
||||
Y_metadata=Y_metadata)
|
||||
|
||||
if self.normalizer is not None:
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class Symbolic_core():
|
|||
self._set_derivatives(derivatives)
|
||||
self._set_parameters(parameters)
|
||||
# Convert the expressions to a list for common sub expression elimination
|
||||
# We should find the following type of expressions: 'function', 'derivative', 'second_derivative', 'third_derivative'.
|
||||
# We should find the following type of expressions: 'function', 'derivative', 'second_derivative', 'third_derivative'.
|
||||
self.update_expression_list()
|
||||
|
||||
# Apply any global stabilisation operations to expressions.
|
||||
|
|
@ -86,7 +86,7 @@ class Symbolic_core():
|
|||
# object except as cached. For covariance functions this is X
|
||||
# and Z, for likelihoods F and for mapping functions X.
|
||||
self.cacheable_vars = [] # list of everything that's cacheable
|
||||
for var in cacheable:
|
||||
for var in cacheable:
|
||||
self.variables[var] = [e for e in vars if e.name.split('_')[0]==var.lower()]
|
||||
self.cacheable_vars += self.variables[var]
|
||||
for var in cacheable:
|
||||
|
|
@ -105,7 +105,7 @@ class Symbolic_core():
|
|||
for derivative in derivatives:
|
||||
derivative_arguments += self.variables[derivative]
|
||||
|
||||
# Do symbolic work to compute derivatives.
|
||||
# Do symbolic work to compute derivatives.
|
||||
for key, func in self.expressions.items():
|
||||
# if func['function'].is_Matrix:
|
||||
# rows = func['function'].shape[0]
|
||||
|
|
@ -126,7 +126,7 @@ class Symbolic_core():
|
|||
if theta.name in parameters:
|
||||
val = parameters[theta.name]
|
||||
# Add parameter.
|
||||
|
||||
|
||||
self.link_parameters(Param(theta.name, val, None))
|
||||
#self._set_attribute(theta.name, )
|
||||
|
||||
|
|
@ -174,7 +174,7 @@ class Symbolic_core():
|
|||
code = self.code[function]['derivative'][theta.name]
|
||||
gradient[theta.name] = (partial*eval(code, self.namespace)).sum()
|
||||
return gradient
|
||||
|
||||
|
||||
def eval_gradients_X(self, function, partial, **kwargs):
|
||||
if 'X' in kwargs:
|
||||
gradients_X = np.zeros_like(kwargs['X'])
|
||||
|
|
@ -194,7 +194,7 @@ class Symbolic_core():
|
|||
for variable, code in self.variable_sort(self.code['parameters_changed']):
|
||||
lcode += self._print_code(variable) + ' = ' + self._print_code(code) + '\n'
|
||||
return lcode
|
||||
|
||||
|
||||
def code_update_cache(self):
|
||||
lcode = ''
|
||||
for var in self.cacheable:
|
||||
|
|
@ -208,7 +208,7 @@ class Symbolic_core():
|
|||
for i, theta in enumerate(self.variables[var]):
|
||||
lcode+= "\t" + var + '= np.atleast_2d(' + var + ')\n'
|
||||
lcode+= "\t" + self._print_code(theta.name) + ' = ' + var + '[:, ' + str(i) + "]" + reorder + "\n"
|
||||
|
||||
|
||||
for variable, code in self.variable_sort(self.code['update_cache']):
|
||||
lcode+= self._print_code(variable) + ' = ' + self._print_code(code) + "\n"
|
||||
|
||||
|
|
@ -250,7 +250,7 @@ class Symbolic_core():
|
|||
"""Make sure namespace gets updated when setting attributes."""
|
||||
setattr(self, name, value)
|
||||
self.namespace.update({name: getattr(self, name)})
|
||||
|
||||
|
||||
|
||||
def update_expression_list(self):
|
||||
"""Extract a list of expressions from the dictionary of expressions."""
|
||||
|
|
@ -260,9 +260,9 @@ class Symbolic_core():
|
|||
for fname, fexpressions in self.expressions.items():
|
||||
for type, texpressions in fexpressions.items():
|
||||
if type == 'function':
|
||||
self.expression_list.append(texpressions)
|
||||
self.expression_list.append(texpressions)
|
||||
self.expression_keys.append([fname, type])
|
||||
self.expression_order.append(1)
|
||||
self.expression_order.append(1)
|
||||
elif type[-10:] == 'derivative':
|
||||
for dtype, expression in texpressions.items():
|
||||
self.expression_list.append(expression)
|
||||
|
|
@ -274,9 +274,9 @@ class Symbolic_core():
|
|||
elif type[:-10] == 'third_':
|
||||
self.expression_order.append(5) #sym.count_ops(self.expressions[type][dtype]))
|
||||
else:
|
||||
self.expression_list.append(fexpressions[type])
|
||||
self.expression_list.append(fexpressions[type])
|
||||
self.expression_keys.append([fname, type])
|
||||
self.expression_order.append(2)
|
||||
self.expression_order.append(2)
|
||||
|
||||
# This step may be unecessary.
|
||||
# Not 100% sure if the sub expression elimination is order sensitive. This step orders the list with the 'function' code first and derivatives after.
|
||||
|
|
@ -313,7 +313,7 @@ class Symbolic_core():
|
|||
sym_var = sym.var(cache_prefix + str(i))
|
||||
self.variables[cache_prefix].append(sym_var)
|
||||
replace_dict[expr.name] = sym_var
|
||||
|
||||
|
||||
for i, expr in enumerate(params_change_list):
|
||||
sym_var = sym.var(sub_prefix + str(i))
|
||||
self.variables[sub_prefix].append(sym_var)
|
||||
|
|
@ -329,7 +329,7 @@ class Symbolic_core():
|
|||
for keys in self.expression_keys:
|
||||
for replace, void in common_sub_expressions:
|
||||
setInDict(self.expressions, keys, getFromDict(self.expressions, keys).subs(replace, replace_dict[replace.name]))
|
||||
|
||||
|
||||
self.expressions['parameters_changed'] = {}
|
||||
self.expressions['update_cache'] = {}
|
||||
for var, expr in common_sub_expressions:
|
||||
|
|
@ -339,7 +339,7 @@ class Symbolic_core():
|
|||
self.expressions['update_cache'][replace_dict[var.name].name] = expr
|
||||
else:
|
||||
self.expressions['parameters_changed'][replace_dict[var.name].name] = expr
|
||||
|
||||
|
||||
|
||||
def _gen_code(self):
|
||||
"""Generate code for the list of expressions provided using the common sub-expression eliminator to separate out portions that are computed multiple times."""
|
||||
|
|
@ -357,8 +357,8 @@ class Symbolic_core():
|
|||
return code
|
||||
|
||||
self.code = match_key(self.expressions)
|
||||
|
||||
|
||||
|
||||
|
||||
def _expr2code(self, arg_list, expr):
|
||||
"""Convert the given symbolic expression into code."""
|
||||
code = lambdastr(arg_list, expr)
|
||||
|
|
@ -379,7 +379,7 @@ class Symbolic_core():
|
|||
def _display_expression(self, keys, user_substitutes={}):
|
||||
"""Helper function for human friendly display of the symbolic components."""
|
||||
# Create some pretty maths symbols for the display.
|
||||
sigma, alpha, nu, omega, l, variance = sym.var('\sigma, \alpha, \nu, \omega, \ell, \sigma^2')
|
||||
sigma, alpha, nu, omega, l, variance = sym.var(r'\sigma, \alpha, \nu, \omega, \ell, \sigma^2')
|
||||
substitutes = {'scale': sigma, 'shape': alpha, 'lengthscale': l, 'variance': variance}
|
||||
substitutes.update(user_substitutes)
|
||||
|
||||
|
|
@ -416,5 +416,5 @@ class Symbolic_core():
|
|||
return int(digits[0])
|
||||
else:
|
||||
return x[0]
|
||||
|
||||
|
||||
return sorted(var_dict.items(), key=sort_key, reverse=reverse)
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class Laplace(LatentFunctionInference):
|
|||
"""
|
||||
Laplace Approximation
|
||||
|
||||
Find the moments \hat{f} and the hessian at this point
|
||||
Find the moments \\hat{f} and the hessian at this point
|
||||
(using Newton-Raphson) of the unnormalised posterior
|
||||
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -178,8 +178,8 @@ class Posterior(object):
|
|||
"""
|
||||
The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
|
||||
$$
|
||||
(K_{xx} + \Sigma_{xx})^{-1}
|
||||
\Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
(K_{xx} + \\Sigma_{xx})^{-1}
|
||||
\\Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
$$
|
||||
"""
|
||||
if self._woodbury_inv is None:
|
||||
|
|
@ -200,8 +200,8 @@ class Posterior(object):
|
|||
"""
|
||||
Woodbury vector in the gaussian likelihood case only is defined as
|
||||
$$
|
||||
(K_{xx} + \Sigma)^{-1}Y
|
||||
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
(K_{xx} + \\Sigma)^{-1}Y
|
||||
\\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
$$
|
||||
"""
|
||||
if self._woodbury_vector is None:
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ except ImportError:
|
|||
|
||||
|
||||
class Coregionalize(Kern):
|
||||
"""
|
||||
r"""
|
||||
Covariance function for intrinsic/linear coregionalization models
|
||||
|
||||
This covariance has the form:
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class EQ_ODE1(Kern):
|
|||
|
||||
This outputs of this kernel have the form
|
||||
.. math::
|
||||
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} u_i(t-\delta_j) - d_jy_j(t)
|
||||
\frac{\text{d}y_j}{\text{d}t} = \\sum_{i=1}^R w_{j,i} u_i(t-\\delta_j) - d_jy_j(t)
|
||||
|
||||
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`u_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class EQ_ODE2(Kern):
|
|||
|
||||
This outputs of this kernel have the form
|
||||
.. math::
|
||||
\frac{\text{d}^2y_j(t)}{\text{d}^2t} + C_j\frac{\text{d}y_j(t)}{\text{d}t} + B_jy_j(t) = \sum_{i=1}^R w_{j,i} u_i(t)
|
||||
\frac{\text{d}^2y_j(t)}{\text{d}^2t} + C_j\frac{\text{d}y_j(t)}{\text{d}t} + B_jy_j(t) = \\sum_{i=1}^R w_{j,i} u_i(t)
|
||||
|
||||
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
|
||||
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ class GridRBF(GridKern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
||||
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
||||
|
||||
"""
|
||||
_support_GPU = True
|
||||
|
|
|
|||
|
|
@ -146,25 +146,25 @@ class Kern(Parameterized):
|
|||
def psi0(self, Z, variational_posterior):
|
||||
"""
|
||||
.. math::
|
||||
\psi_0 = \sum_{i=0}^{n}E_{q(X)}[k(X_i, X_i)]
|
||||
\\psi_0 = \\sum_{i=0}^{n}E_{q(X)}[k(X_i, X_i)]
|
||||
"""
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
|
||||
def psi1(self, Z, variational_posterior):
|
||||
"""
|
||||
.. math::
|
||||
\psi_1^{n,m} = E_{q(X)}[k(X_n, Z_m)]
|
||||
\\psi_1^{n,m} = E_{q(X)}[k(X_n, Z_m)]
|
||||
"""
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
|
||||
def psi2(self, Z, variational_posterior):
|
||||
"""
|
||||
.. math::
|
||||
\psi_2^{m,m'} = \sum_{i=0}^{n}E_{q(X)}[ k(Z_m, X_i) k(X_i, Z_{m'})]
|
||||
\\psi_2^{m,m'} = \\sum_{i=0}^{n}E_{q(X)}[ k(Z_m, X_i) k(X_i, Z_{m'})]
|
||||
"""
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
"""
|
||||
.. math::
|
||||
\psi_2^{n,m,m'} = E_{q(X)}[ k(Z_m, X_n) k(X_n, Z_{m'})]
|
||||
\\psi_2^{n,m,m'} = E_{q(X)}[ k(Z_m, X_n) k(X_n, Z_{m'})]
|
||||
|
||||
Thus, we do not sum out n, compared to psi2
|
||||
"""
|
||||
|
|
@ -173,7 +173,7 @@ class Kern(Parameterized):
|
|||
"""
|
||||
.. math::
|
||||
|
||||
\\frac{\partial L}{\partial X} = \\frac{\partial L}{\partial K}\\frac{\partial K}{\partial X}
|
||||
\\frac{\\partial L}{\\partial X} = \\frac{\\partial L}{\\partial K}\\frac{\\partial K}{\\partial X}
|
||||
"""
|
||||
raise NotImplementedError
|
||||
def gradients_X_X2(self, dL_dK, X, X2):
|
||||
|
|
@ -182,7 +182,7 @@ class Kern(Parameterized):
|
|||
"""
|
||||
.. math::
|
||||
|
||||
\\frac{\partial^2 L}{\partial X\partial X_2} = \\frac{\partial L}{\partial K}\\frac{\partial^2 K}{\partial X\partial X_2}
|
||||
\\frac{\\partial^2 L}{\\partial X\\partial X_2} = \\frac{\\partial L}{\\partial K}\\frac{\\partial^2 K}{\\partial X\\partial X_2}
|
||||
"""
|
||||
raise NotImplementedError("This is the second derivative of K wrt X and X2, and not implemented for this kernel")
|
||||
def gradients_XX_diag(self, dL_dKdiag, X, cov=True):
|
||||
|
|
@ -203,7 +203,7 @@ class Kern(Parameterized):
|
|||
def update_gradients_full(self, dL_dK, X, X2):
|
||||
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def reset_gradients(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
|
@ -216,9 +216,9 @@ class Kern(Parameterized):
|
|||
|
||||
.. math::
|
||||
|
||||
\\frac{\partial L}{\partial \\theta_i} & = \\frac{\partial L}{\partial \psi_0}\\frac{\partial \psi_0}{\partial \\theta_i}\\
|
||||
& \quad + \\frac{\partial L}{\partial \psi_1}\\frac{\partial \psi_1}{\partial \\theta_i}\\
|
||||
& \quad + \\frac{\partial L}{\partial \psi_2}\\frac{\partial \psi_2}{\partial \\theta_i}
|
||||
\\frac{\\partial L}{\\partial \\theta_i} & = \\frac{\\partial L}{\\partial \\psi_0}\\frac{\\partial \\psi_0}{\\partial \\theta_i}\\
|
||||
& \\quad + \\frac{\\partial L}{\\partial \\psi_1}\\frac{\\partial \\psi_1}{\\partial \\theta_i}\\
|
||||
& \\quad + \\frac{\\partial L}{\\partial \\psi_2}\\frac{\\partial \\psi_2}{\\partial \\theta_i}
|
||||
|
||||
Thus, we push the different derivatives through the gradients of the psi
|
||||
statistics. Be sure to set the gradients for all kernel
|
||||
|
|
|
|||
|
|
@ -16,15 +16,15 @@ class Linear(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sum_{i=1}^{\\text{input_dim}} \sigma^2_i x_iy_i
|
||||
k(x,y) = \\sum_{i=1}^{\\text{input_dim}} \\sigma^2_i x_iy_i
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variances: the vector of variances :math:`\sigma^2_i`
|
||||
:param variances: the vector of variances :math:`\\sigma^2_i`
|
||||
:type variances: array or list of the appropriate size (or float if there
|
||||
is only one variance parameter)
|
||||
:param ARD: Auto Relevance Determination. If False, the kernel has only one
|
||||
variance parameter \sigma^2, otherwise there is one variance
|
||||
variance parameter \\sigma^2, otherwise there is one variance
|
||||
parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
|
@ -121,7 +121,7 @@ class Linear(Kern):
|
|||
the returned array is of shape [NxNxQxQ].
|
||||
|
||||
..math:
|
||||
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
|
||||
\frac{\\partial^2 K}{\\partial X2 ^2} = - \frac{\\partial^2 K}{\\partial X\\partial X2}
|
||||
|
||||
..returns:
|
||||
dL2_dXdX2: [NxMxQxQ] for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
|
||||
|
|
|
|||
|
|
@ -20,12 +20,12 @@ class MLP(Kern):
|
|||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:param variance: the variance :math:`\\sigma^2`
|
||||
:type variance: float
|
||||
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
|
||||
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\\sigma^2_w`
|
||||
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
|
||||
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:param bias_variance: the variance of the prior over bias parameters :math:`\\sigma^2_b`
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \\sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ class RBF(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
||||
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
||||
|
||||
"""
|
||||
_support_GPU = True
|
||||
|
|
|
|||
|
|
@ -12,48 +12,48 @@ import numpy as np
|
|||
|
||||
class sde_Brownian(Brownian):
|
||||
"""
|
||||
|
||||
|
||||
Class provide extra functionality to transfer this covariance function into
|
||||
SDE form.
|
||||
|
||||
|
||||
Linear kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sigma^2 min(x,y)
|
||||
k(x,y) = \\sigma^2 min(x,y)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def sde_update_gradient_full(self, gradients):
|
||||
"""
|
||||
Update gradient in the order in which parameters are represented in the
|
||||
kernel
|
||||
"""
|
||||
|
||||
|
||||
self.variance.gradient = gradients[0]
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
|
||||
|
||||
|
||||
F = np.array( ((0,1.0),(0,0) ))
|
||||
L = np.array( ((1.0,),(0,)) )
|
||||
Qc = np.array( ((variance,),) )
|
||||
H = np.array( ((1.0,0),) )
|
||||
|
||||
|
||||
Pinf = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
|
||||
#P0 = Pinf.copy()
|
||||
P0 = np.zeros((2,2))
|
||||
#P0 = Pinf.copy()
|
||||
P0 = np.zeros((2,2))
|
||||
#Pinf = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
|
||||
dF = np.zeros((2,2,1))
|
||||
dQc = np.ones( (1,1,1) )
|
||||
|
||||
|
||||
dPinf = np.zeros((2,2,1))
|
||||
dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
|
||||
#dP0 = dPinf.copy()
|
||||
#dP0 = dPinf.copy()
|
||||
dP0 = np.zeros((2,2,1))
|
||||
|
||||
|
||||
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
|
||||
|
|
|
|||
|
|
@ -11,15 +11,15 @@ import numpy as np
|
|||
|
||||
class sde_Linear(Linear):
|
||||
"""
|
||||
|
||||
|
||||
Class provide extra functionality to transfer this covariance function into
|
||||
SDE form.
|
||||
|
||||
|
||||
Linear kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
|
||||
k(x,y) = \\sum_{i=1}^{input dim} \\sigma^2_i x_iy_i
|
||||
|
||||
"""
|
||||
def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
|
||||
|
|
@ -27,40 +27,40 @@ class sde_Linear(Linear):
|
|||
Modify the init method, because one extra parameter is required. X - points
|
||||
on the X axis.
|
||||
"""
|
||||
|
||||
|
||||
super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
|
||||
|
||||
|
||||
self.t0 = np.min(X)
|
||||
|
||||
|
||||
|
||||
|
||||
def sde_update_gradient_full(self, gradients):
|
||||
"""
|
||||
Update gradient in the order in which parameters are represented in the
|
||||
kernel
|
||||
"""
|
||||
|
||||
|
||||
self.variances.gradient = gradients[0]
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Return the state space representation of the covariance.
|
||||
"""
|
||||
|
||||
variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
|
||||
t0 = float(self.t0)
|
||||
|
||||
|
||||
F = np.array( ((0,1.0),(0,0) ))
|
||||
L = np.array( ((0,),(1.0,)) )
|
||||
Qc = np.zeros((1,1))
|
||||
H = np.array( ((1.0,0),) )
|
||||
|
||||
|
||||
Pinf = np.zeros((2,2))
|
||||
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
|
||||
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
|
||||
dF = np.zeros((2,2,1))
|
||||
dQc = np.zeros( (1,1,1) )
|
||||
|
||||
|
||||
dPinf = np.zeros((2,2,1))
|
||||
dP0 = np.zeros((2,2,1))
|
||||
dP0[:,:,0] = P0 / variance
|
||||
|
||||
|
||||
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ class sde_Matern32(Matern32):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \\exp(- \\sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
def sde_update_gradient_full(self, gradients):
|
||||
|
|
@ -79,7 +79,7 @@ class sde_Matern52(Matern52):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{5} r + \frac{5}{3}r^2) \\exp(- \\sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
def sde_update_gradient_full(self, gradients):
|
||||
|
|
|
|||
|
|
@ -24,8 +24,8 @@ class sde_StdPeriodic(StdPeriodic):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
|
||||
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
|
||||
k(x,y) = \theta_1 \\exp \\left[ - \frac{1}{2} {}\\sum_{i=1}^{input\\_dim}
|
||||
\\left( \frac{\\sin(\frac{\\pi}{\\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -177,7 +177,7 @@ def seriescoeff(m=6, lengthScale=1.0, magnSigma2=1.0, true_covariance=False):
|
|||
Calculate the coefficients q_j^2 for the covariance function
|
||||
approximation:
|
||||
|
||||
k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
|
||||
k(\tau) = \\sum_{j=0}^{+\\infty} q_j^2 \\cos(j\\omega_0 \tau)
|
||||
|
||||
Reference is:
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ class sde_White(White):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \alpha*\delta(x-y)
|
||||
k(x,y) = \alpha*\\delta(x-y)
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ class sde_RBF(RBF):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -204,7 +204,7 @@ class sde_Exponential(Exponential):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -259,7 +259,7 @@ class sde_RatQuad(RatQuad):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\ \text{ where } r = \\sqrt{\\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -24,19 +24,19 @@ class StdPeriodic(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} \sum_{i=1}^{input\_dim}
|
||||
\left( \frac{\sin(\frac{\pi}{T_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
|
||||
k(x,y) = \theta_1 \\exp \\left[ - \frac{1}{2} \\sum_{i=1}^{input\\_dim}
|
||||
\\left( \frac{\\sin(\frac{\\pi}{T_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\theta_1` in the formula above
|
||||
:type variance: float
|
||||
:param period: the vector of periods :math:`\T_i`. If None then 1.0 is assumed.
|
||||
:param period: the vector of periods :math:`\\T_i`. If None then 1.0 is assumed.
|
||||
:type period: array or list of the appropriate size (or float if there is only one period parameter)
|
||||
:param lengthscale: the vector of lengthscale :math:`\l_i`. If None then 1.0 is assumed.
|
||||
:param lengthscale: the vector of lengthscale :math:`\\l_i`. If None then 1.0 is assumed.
|
||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||
:param ARD1: Auto Relevance Determination with respect to period.
|
||||
If equal to "False" one single period parameter :math:`\T_i` for
|
||||
If equal to "False" one single period parameter :math:`\\T_i` for
|
||||
each dimension is assumed, otherwise there is one lengthscale
|
||||
parameter per dimension.
|
||||
:type ARD1: Boolean
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ class Stationary(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
r(x, x') = \\sqrt{ \\sum_{q=1}^Q \\frac{(x_q - x'_q)^2}{\ell_q^2} }.
|
||||
r(x, x') = \\sqrt{ \\sum_{q=1}^Q \\frac{(x_q - x'_q)^2}{\\ell_q^2} }.
|
||||
|
||||
By default, there's only one lengthscale: seaprate lengthscales for each
|
||||
dimension can be enables by setting ARD=True.
|
||||
|
|
@ -153,7 +153,7 @@ class Stationary(Kern):
|
|||
Efficiently compute the scaled distance, r.
|
||||
|
||||
..math::
|
||||
r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
|
||||
r = \\sqrt( \\sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
|
||||
|
||||
Note that if thre is only one lengthscale, l comes outside the sum. In
|
||||
this case we compute the unscaled distance first (in a separate
|
||||
|
|
@ -259,7 +259,7 @@ class Stationary(Kern):
|
|||
the returned array is of shape [NxNxQxQ].
|
||||
|
||||
..math:
|
||||
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
|
||||
\frac{\\partial^2 K}{\\partial X2 ^2} = - \frac{\\partial^2 K}{\\partial X\\partial X2}
|
||||
|
||||
..returns:
|
||||
dL2_dXdX2: [NxMxQxQ] in the cov=True case, or [NxMxQ] in the cov=False case,
|
||||
|
|
@ -295,7 +295,7 @@ class Stationary(Kern):
|
|||
Given the derivative of the objective dL_dK, compute the second derivative of K wrt X:
|
||||
|
||||
..math:
|
||||
\frac{\partial^2 K}{\partial X\partial X}
|
||||
\frac{\\partial^2 K}{\\partial X\\partial X}
|
||||
|
||||
..returns:
|
||||
dL2_dXdX: [NxQxQ]
|
||||
|
|
@ -423,7 +423,7 @@ class OU(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 \\exp(- r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{\text{input_dim}} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -460,7 +460,7 @@ class Matern32(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \\exp(- \\sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \\sqrt{\\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -559,7 +559,7 @@ class Matern52(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{5} r + \\frac53 r^2) \\exp(- \\sqrt{5} r)
|
||||
"""
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
|
||||
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
|
|
@ -626,7 +626,7 @@ class ExpQuad(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp(- 0.5 r^2)
|
||||
k(r) = \\sigma^2 \\exp(- 0.5 r^2)
|
||||
|
||||
notes::
|
||||
- This is exactly the same as the RBF covariance function, but the
|
||||
|
|
@ -664,10 +664,10 @@ class ExpQuad(Stationary):
|
|||
class Cosine(Stationary):
|
||||
"""
|
||||
Cosine Covariance function
|
||||
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \cos(r)
|
||||
k(r) = \\sigma^2 \\cos(r)
|
||||
|
||||
"""
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Cosine'):
|
||||
|
|
@ -682,18 +682,18 @@ class Cosine(Stationary):
|
|||
class ExpQuadCosine(Stationary):
|
||||
"""
|
||||
Exponentiated quadratic multiplied by cosine covariance function (spectral mixture kernel).
|
||||
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp(-2\pi^2r^2)\cos(2\pi r/T)
|
||||
k(r) = \\sigma^2 \\exp(-2\\pi^2r^2)\\cos(2\\pi r/T)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, period=1., ARD=False, active_dims=None, name='ExpQuadCosine'):
|
||||
super(ExpQuadCosine, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
self.period = Param('period', period, Logexp())
|
||||
self.link_parameters(self.period)
|
||||
|
||||
|
||||
def K_of_r(self, r):
|
||||
return self.variance * np.exp(-2*np.pi**2*r**2)*np.cos(2*np.pi*r/self.period)
|
||||
|
||||
|
|
@ -712,18 +712,18 @@ class ExpQuadCosine(Stationary):
|
|||
super(ExpQuadCosine, self).update_gradients_diag(dL_dKdiag, X)
|
||||
self.period.gradient = 0.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class Sinc(Stationary):
|
||||
"""
|
||||
Sinc Covariance function
|
||||
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \sinc(\pi r)
|
||||
k(r) = \\sigma^2 \\sinc(\\pi r)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Sinc'):
|
||||
super(Sinc, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
|
||||
|
|
@ -734,7 +734,7 @@ class Sinc(Stationary):
|
|||
# small angle approximation to avoid divide by zero errors.
|
||||
return np.where(r<1e-5, -self.variance*4/3*np.pi*np.pi*r, self.variance/r * (np.cos(2*np.pi*r)-np.sinc(2*r)))
|
||||
|
||||
|
||||
|
||||
|
||||
class RatQuad(Stationary):
|
||||
"""
|
||||
|
|
@ -742,7 +742,7 @@ class RatQuad(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \\alpha}
|
||||
k(r) = \\sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \\alpha}
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -9,13 +9,13 @@ import GPy
|
|||
|
||||
class Gibbs(Kernpart):
|
||||
"""
|
||||
Gibbs non-stationary covariance function.
|
||||
Gibbs non-stationary covariance function.
|
||||
|
||||
.. math::
|
||||
|
||||
|
||||
r = sqrt((x_i - x_j)'*(x_i - x_j))
|
||||
|
||||
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
|
||||
|
||||
k(x_i, x_j) = \\sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
|
||||
|
||||
Z = (2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')^{q/2}
|
||||
|
||||
|
|
@ -25,18 +25,18 @@ class Gibbs(Kernpart):
|
|||
with input location. This leads to an additional term in front of
|
||||
the kernel.
|
||||
|
||||
The parameters are :math:`\sigma^2`, the process variance, and
|
||||
The parameters are :math:`\\sigma^2`, the process variance, and
|
||||
the parameters of l(x) which is a function that can be
|
||||
specified by the user, by default an multi-layer peceptron is
|
||||
used.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\\sigma^2`
|
||||
:type variance: float
|
||||
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
|
||||
:type mapping: GPy.core.Mapping
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \\sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
|
|
@ -113,7 +113,7 @@ class Gibbs(Kernpart):
|
|||
target += 2.*self.mapping.df_dX(self._dL_dl[:, None], X)
|
||||
else:
|
||||
target += self.mapping.df_dX(self._dL_dl[:, None], X)
|
||||
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X."""
|
||||
pass
|
||||
|
|
@ -123,7 +123,7 @@ class Gibbs(Kernpart):
|
|||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
|
||||
|
||||
|
||||
def _K_computations(self, X, X2=None):
|
||||
"""Pre-computations for the covariance function (used both when computing the covariance and its gradients). Here self._dK_dvar and self._K_dist2 are updated."""
|
||||
self._lengthscales=self.mapping.f(X)
|
||||
|
|
@ -146,7 +146,7 @@ class Gibbs(Kernpart):
|
|||
"""Pre-computations for the gradients of the covaraince function. Here the gradient of the covariance with respect to all the individual lengthscales is computed.
|
||||
:param dL_dK: the gradient of the objective with respect to the covariance function.
|
||||
:type dL_dK: ndarray"""
|
||||
|
||||
|
||||
self._dL_dl = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales_two.T**4 - self._lengthscales**4) + 2*self._lengthscales2*self._K_dist2)/(self._w2*self._w2*self._lengthscales)).sum(1)
|
||||
if self._lengthscales_two is self._lengthscales:
|
||||
self._dL_dl_two = None
|
||||
|
|
|
|||
|
|
@ -19,11 +19,11 @@ class Hetero(Kernpart):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x_i, x_j) = \delta_{i,j} \sigma^2(x_i)
|
||||
k(x_i, x_j) = \\delta_{i,j} \\sigma^2(x_i)
|
||||
|
||||
where :math:`\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
|
||||
where :math:`\\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\\delta_{i,j}` is the Kronecker delta function.
|
||||
|
||||
The parameters are the parameters of \sigma^2(x) which is a
|
||||
The parameters are the parameters of \\sigma^2(x) which is a
|
||||
function that can be specified by the user, by default an
|
||||
multi-layer peceptron is used.
|
||||
|
||||
|
|
|
|||
|
|
@ -11,28 +11,28 @@ class POLY(Kernpart):
|
|||
Polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel:
|
||||
|
||||
.. math::
|
||||
k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
|
||||
k(x, y) = \\sigma^2\\*(\\sigma_w^2 x'y+\\sigma_b^b)^d
|
||||
|
||||
The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
|
||||
(weight_variance), :math:`\sigma^2_b` (bias_variance) and d
|
||||
The kernel parameters are :math:`\\sigma^2` (variance), :math:`\\sigma^2_w`
|
||||
(weight_variance), :math:`\\sigma^2_b` (bias_variance) and d
|
||||
(degree). Only gradients of the first three are provided for
|
||||
kernel optimisation, it is assumed that polynomial degree would
|
||||
be set by hand.
|
||||
|
||||
The kernel is not recommended as it is badly behaved when the
|
||||
:math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
|
||||
:math:`\\sigma^2_w\\*x'\\*y + \\sigma^2_b` has a magnitude greater than one. For completeness
|
||||
there is an automatic relevance determination version of this
|
||||
kernel provided (NOTE YET IMPLEMENTED!).
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\\sigma^2`
|
||||
:type variance: float
|
||||
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
|
||||
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\\sigma^2_w`
|
||||
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
|
||||
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
|
||||
:param bias_variance: the variance of the prior over bias parameters :math:`\\sigma^2_b`
|
||||
:param degree: the degree of the polynomial.
|
||||
:type degree: int
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
|
|
@ -93,7 +93,7 @@ class POLY(Kernpart):
|
|||
base_cov_grad = base*dL_dK
|
||||
|
||||
|
||||
|
||||
|
||||
target[0] += np.sum(self._K_dvar*dL_dK)
|
||||
target[1] += (self._K_inner_prod*base_cov_grad).sum()
|
||||
target[2] += base_cov_grad.sum()
|
||||
|
|
@ -107,14 +107,14 @@ class POLY(Kernpart):
|
|||
target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
else:
|
||||
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X"""
|
||||
self._K_diag_computations(X)
|
||||
arg = self._K_diag_poly_arg
|
||||
target += 2.*self.weight_variance*self.degree*self.variance*X*dL_dKdiag[:, None]*(arg**(self.degree-1))[:, None]
|
||||
|
||||
|
||||
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
if self.ARD:
|
||||
pass
|
||||
|
|
@ -133,6 +133,6 @@ class POLY(Kernpart):
|
|||
self._K_diag_poly_arg = (X*X).sum(1)*self.weight_variance + self.bias_variance
|
||||
self._K_diag_dvar = self._K_diag_poly_arg**self.degree
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -15,9 +15,9 @@ class RBFInv(RBF):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
|
||||
k(r) = \\sigma^2 \\exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \\ \\text{ where } r^2 = \\sum_{i=1}^d \\frac{ (x_i-x^\\prime_i)^2}{\\ell_i^2}
|
||||
|
||||
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
|
||||
where \\ell_i is the lengthscale, \\sigma^2 the variance and d the dimensionality of the input.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
|
|
@ -25,7 +25,7 @@ class RBFInv(RBF):
|
|||
:type variance: float
|
||||
:param lengthscale: the vector of lengthscale of the kernel
|
||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \\ell), otherwise there is one lengthscale parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
|
|
|
|||
|
|
@ -14,15 +14,15 @@ class TruncLinear(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
|
||||
k(x,y) = \\sum_{i=1}^input_dim \\sigma^2_i \\max(0, x_iy_i - \\sigma_q)
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variances: the vector of variances :math:`\sigma^2_i`
|
||||
:param variances: the vector of variances :math:`\\sigma^2_i`
|
||||
:type variances: array or list of the appropriate size (or float if there
|
||||
is only one variance parameter)
|
||||
:param ARD: Auto Relevance Determination. If False, the kernel has only one
|
||||
variance parameter \sigma^2, otherwise there is one variance
|
||||
variance parameter \\sigma^2, otherwise there is one variance
|
||||
parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
|
@ -113,15 +113,15 @@ class TruncLinear_inf(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
|
||||
k(x,y) = \\sum_{i=1}^input_dim \\sigma^2_i \\max(0, x_iy_i - \\sigma_q)
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variances: the vector of variances :math:`\sigma^2_i`
|
||||
:param variances: the vector of variances :math:`\\sigma^2_i`
|
||||
:type variances: array or list of the appropriate size (or float if there
|
||||
is only one variance parameter)
|
||||
:param ARD: Auto Relevance Determination. If False, the kernel has only one
|
||||
variance parameter \sigma^2, otherwise there is one variance
|
||||
variance parameter \\sigma^2, otherwise there is one variance
|
||||
parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ class Exponential(Likelihood):
|
|||
Y is expected to take values in {0,1,2,...}
|
||||
-----
|
||||
$$
|
||||
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
||||
L(x) = \\exp(\\lambda) * \\lambda**Y_i / Y_i!
|
||||
$$
|
||||
"""
|
||||
def __init__(self,gp_link=None):
|
||||
|
|
@ -46,7 +46,7 @@ class Exponential(Likelihood):
|
|||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = \\ln \\lambda(f_{i}) - y_{i}\\lambda(f_{i})
|
||||
|
||||
:param link_f: latent variables (link(f))
|
||||
:type link_f: Nx1 array
|
||||
|
|
@ -65,7 +65,7 @@ class Exponential(Likelihood):
|
|||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
|
||||
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\lambda(f)} - y_{i}
|
||||
|
||||
:param link_f: latent variables (f)
|
||||
:type link_f: Nx1 array
|
||||
|
|
@ -87,7 +87,7 @@ class Exponential(Likelihood):
|
|||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
|
||||
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\frac{1}{\\lambda(f_{i})^{2}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
|
|
@ -110,7 +110,7 @@ class Exponential(Likelihood):
|
|||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2}{\\lambda(f_{i})^{3}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ class Gamma(Likelihood):
|
|||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables (link(f))
|
||||
|
|
@ -101,7 +101,7 @@ class Gamma(Likelihood):
|
|||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
|
|
@ -126,7 +126,7 @@ class Gamma(Likelihood):
|
|||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ class Likelihood(Parameterized):
|
|||
Calculation of the log predictive density
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
|
|
@ -199,7 +199,7 @@ class Likelihood(Parameterized):
|
|||
|
||||
.. math:
|
||||
log p(y_{*}|D) = log 1/num_samples prod^{S}_{s=1} p(y_{*}|f_{*s})
|
||||
f_{*s} ~ p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
f_{*s} ~ p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@ class ScaledProbit(Probit):
|
|||
"""
|
||||
def __init__(self, nu=1.):
|
||||
self.nu = float(nu)
|
||||
|
||||
|
||||
def transf(self,f):
|
||||
return std_norm_cdf(f*self.nu)
|
||||
|
||||
|
|
@ -157,7 +157,7 @@ class ScaledProbit(Probit):
|
|||
|
||||
def d3transf_df3(self,f):
|
||||
return (safe_square(f*self.nu)-1.)*std_norm_pdf(f*self.nu)*(self.nu**3)
|
||||
|
||||
|
||||
def to_dict(self):
|
||||
"""
|
||||
Convert the object into a json serializable dictionary.
|
||||
|
|
@ -180,7 +180,7 @@ class Cloglog(GPTransformation):
|
|||
|
||||
or
|
||||
|
||||
f = \log (-\log(1-p))
|
||||
f = \\log (-\\log(1-p))
|
||||
|
||||
"""
|
||||
def transf(self,f):
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ class Poisson(Likelihood):
|
|||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = -\\lambda(f_{i}) + y_{i}\\log \\lambda(f_{i}) - \\log y_{i}!
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = -\\lambda(f_{i}) + y_{i}\\log \\lambda(f_{i}) - \\log y_{i}!
|
||||
|
||||
:param link_f: latent variables (link(f))
|
||||
:type link_f: Nx1 array
|
||||
|
|
@ -72,7 +72,7 @@ class Poisson(Likelihood):
|
|||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - 1
|
||||
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - 1
|
||||
|
||||
:param link_f: latent variables (f)
|
||||
:type link_f: Nx1 array
|
||||
|
|
@ -92,7 +92,7 @@ class Poisson(Likelihood):
|
|||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{-y_{i}}{\\lambda(f_{i})^{2}}
|
||||
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{-y_{i}}{\\lambda(f_{i})^{2}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
|
|
@ -113,7 +113,7 @@ class Poisson(Likelihood):
|
|||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f_{i})^{3}}
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f_{i})^{3}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ class StudentT(Likelihood):
|
|||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
|
||||
|
||||
:param inv_link_f: latent variables (link(f))
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
@ -107,7 +107,7 @@ class StudentT(Likelihood):
|
|||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
|
||||
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\\lambda(f_{i}))}{(y_{i}-\\lambda(f_{i}))^{2} + \\sigma^{2}v}
|
||||
|
||||
:param inv_link_f: latent variables (f)
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
@ -129,7 +129,7 @@ class StudentT(Likelihood):
|
|||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
|
||||
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
|
||||
|
||||
:param inv_link_f: latent variables inv_link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
@ -154,7 +154,7 @@ class StudentT(Likelihood):
|
|||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \\lambda(f_{i}))^3 - 3(y_{i} - \\lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \\lambda(f_{i})) + \\sigma^{2} v)^3}
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
@ -175,7 +175,7 @@ class StudentT(Likelihood):
|
|||
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
|
||||
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \\lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \\lambda(f_{i}))^{2})}
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
@ -199,7 +199,7 @@ class StudentT(Likelihood):
|
|||
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\\lambda(f_{i}))}{(y_{i}-\\lambda(f_{i}))^2 + \\sigma^2 v)^2}
|
||||
|
||||
:param inv_link_f: latent variables inv_link_f
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
@ -220,7 +220,7 @@ class StudentT(Likelihood):
|
|||
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \\lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \\lambda(f_{i}))^{2})^{3}}
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ class Weibull(Likelihood):
|
|||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables (link(f))
|
||||
|
|
@ -117,7 +117,7 @@ class Weibull(Likelihood):
|
|||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
|
|
@ -150,7 +150,7 @@ class Weibull(Likelihood):
|
|||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ class Additive(Mapping):
|
|||
|
||||
.. math::
|
||||
|
||||
f(\mathbf{x}*) = f_1(\mathbf{x}*) + f_2(\mathbf(x)*)
|
||||
f(\\mathbf{x}*) = f_1(\\mathbf{x}*) + f_2(\\mathbf(x)*)
|
||||
|
||||
:param mapping1: first mapping to add together.
|
||||
:type mapping1: GPy.mappings.Mapping
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ class Compound(Mapping):
|
|||
|
||||
.. math::
|
||||
|
||||
f(\mathbf{x}) = f_2(f_1(\mathbf{x}))
|
||||
f(\\mathbf{x}) = f_2(f_1(\\mathbf{x}))
|
||||
|
||||
:param mapping1: first mapping
|
||||
:type mapping1: GPy.mappings.Mapping
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ class Constant(Mapping):
|
|||
|
||||
.. math::
|
||||
|
||||
F(\mathbf{x}) = c
|
||||
F(\\mathbf{x}) = c
|
||||
|
||||
|
||||
:param input_dim: dimension of input.
|
||||
|
|
|
|||
|
|
@ -12,20 +12,20 @@ class Kernel(Mapping):
|
|||
|
||||
.. math::
|
||||
|
||||
f(\mathbf{x}) = \sum_i \alpha_i k(\mathbf{z}_i, \mathbf{x})
|
||||
f(\\mathbf{x}) = \\sum_i \alpha_i k(\\mathbf{z}_i, \\mathbf{x})
|
||||
|
||||
or for multple outputs
|
||||
|
||||
.. math::
|
||||
|
||||
f_i(\mathbf{x}) = \sum_j \alpha_{i,j} k(\mathbf{z}_i, \mathbf{x})
|
||||
f_i(\\mathbf{x}) = \\sum_j \alpha_{i,j} k(\\mathbf{z}_i, \\mathbf{x})
|
||||
|
||||
|
||||
:param input_dim: dimension of input.
|
||||
:type input_dim: int
|
||||
:param output_dim: dimension of output.
|
||||
:type output_dim: int
|
||||
:param Z: input observations containing :math:`\mathbf{Z}`
|
||||
:param Z: input observations containing :math:`\\mathbf{Z}`
|
||||
:type Z: ndarray
|
||||
:param kernel: a GPy kernel, defaults to GPy.kern.RBF
|
||||
:type kernel: GPy.kern.kern
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ class Linear(Mapping):
|
|||
|
||||
.. math::
|
||||
|
||||
F(\mathbf{x}) = \mathbf{A} \mathbf{x})
|
||||
F(\\mathbf{x}) = \\mathbf{A} \\mathbf{x})
|
||||
|
||||
|
||||
:param input_dim: dimension of input.
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ class GPKroneckerGaussianRegression(Model):
|
|||
|
||||
.. rubric:: References
|
||||
|
||||
.. [stegle_et_al_2011] Stegle, O.; Lippert, C.; Mooij, J.M.; Lawrence, N.D.; Borgwardt, K.:Efficient inference in matrix-variate Gaussian models with \iid observation noise. In: Advances in Neural Information Processing Systems, 2011, Pages 630-638
|
||||
.. [stegle_et_al_2011] Stegle, O.; Lippert, C.; Mooij, J.M.; Lawrence, N.D.; Borgwardt, K.:Efficient inference in matrix-variate Gaussian models with \\iid observation noise. In: Advances in Neural Information Processing Systems, 2011, Pages 630-638
|
||||
|
||||
"""
|
||||
def __init__(self, X1, X2, Y, kern1, kern2, noise_var=1., name='KGPR'):
|
||||
|
|
|
|||
|
|
@ -171,7 +171,7 @@ class TPRegression(Model):
|
|||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
The log marginal likelihood of the model, :math:`p(\mathbf{y})`, this is the objective function of the model being optimised
|
||||
The log marginal likelihood of the model, :math:`p(\\mathbf{y})`, this is the objective function of the model being optimised
|
||||
"""
|
||||
return self._log_marginal_likelihood or self.inference()[1]
|
||||
|
||||
|
|
@ -184,8 +184,8 @@ class TPRegression(Model):
|
|||
diagonal of the covariance is returned.
|
||||
|
||||
.. math::
|
||||
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
|
||||
= MVN\left(\nu + N,f*| K_{x*x}(K_{xx})^{-1}Y,
|
||||
p(f*|X*, X, Y) = \\int^{\\inf}_{\\inf} p(f*|f,X*)p(f|X,Y) df
|
||||
= MVN\\left(\nu + N,f*| K_{x*x}(K_{xx})^{-1}Y,
|
||||
\frac{\nu + \beta - 2}{\nu + N - 2}K_{x*x*} - K_{xx*}(K_{xx})^{-1}K_{xx*}\right)
|
||||
\nu := \texttt{Degrees of freedom}
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ class WarpedGP(GP):
|
|||
the jacobian of the warping function here.
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param x_test: test locations (x_{*})
|
||||
:type x_test: (Nx1) array
|
||||
|
|
|
|||
|
|
@ -159,7 +159,7 @@ def generate_brownian_data(
|
|||
):
|
||||
"""
|
||||
Generate brownian data - data from Brownian motion.
|
||||
First point is always 0, and \Beta(0) = 0 - standard conditions for Brownian motion.
|
||||
First point is always 0, and \\Beta(0) = 0 - standard conditions for Brownian motion.
|
||||
|
||||
Input:
|
||||
--------------------------------
|
||||
|
|
|
|||
|
|
@ -684,7 +684,7 @@ class TestMisc:
|
|||
warp_m = GPy.models.WarpedGP(
|
||||
X, Y
|
||||
) # , kernel=warp_k)#, warping_function=warp_f)
|
||||
warp_m[".*\.d"].constrain_fixed(1.0)
|
||||
warp_m[r".*\.d"].constrain_fixed(1.0)
|
||||
warp_m.optimize_restarts(
|
||||
parallel=False, robust=False, num_restarts=5, max_iters=max_iters
|
||||
)
|
||||
|
|
|
|||
|
|
@ -537,7 +537,7 @@ http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%2
|
|||
# In the notebook they did some data cleaning: remove Javascript header+footer, and translate new Date(....,..,..) into YYYY-MM-DD.
|
||||
header = """// Data table response\ngoogle.visualization.Query.setResponse("""
|
||||
data = data[len(header):-2]
|
||||
data = re.sub('new Date\((\d+),(\d+),(\d+)\)', (lambda m: '"%s-%02d-%02d"' % (m.group(1).strip(), 1+int(m.group(2)), int(m.group(3)))), data)
|
||||
data = re.sub(r'new Date\((\d+),(\d+),(\d+)\)', (lambda m: '"%s-%02d-%02d"' % (m.group(1).strip(), 1+int(m.group(2)), int(m.group(3)))), data)
|
||||
timeseries = json.loads(data)
|
||||
columns = [k['label'] for k in timeseries['table']['cols']]
|
||||
rows = map(lambda x: [k['v'] for k in x['c']], timeseries['table']['rows'])
|
||||
|
|
@ -782,7 +782,7 @@ def hapmap3(data_set='hapmap3'):
|
|||
|
||||
/ 1, iff SNPij==(B1,B1)
|
||||
Aij = | 0, iff SNPij==(B1,B2)
|
||||
\ -1, iff SNPij==(B2,B2)
|
||||
\\ -1, iff SNPij==(B2,B2)
|
||||
|
||||
The SNP data and the meta information (such as iid, sex and phenotype) are
|
||||
stored in the dataframe datadf, index is the Individual ID,
|
||||
|
|
@ -1011,7 +1011,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
|
|||
sample_info.columns = c
|
||||
|
||||
# get the labels right:
|
||||
rep = re.compile('\(.*\)')
|
||||
rep = re.compile(r'\(.*\)')
|
||||
def filter_dev_stage(row):
|
||||
if isnull(row):
|
||||
row = "2-cell stage embryo"
|
||||
|
|
@ -1050,7 +1050,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
|
|||
#gene_info[file_info.name[:-18]] = inner.Refseq_IDs
|
||||
|
||||
# Strip GSM number off data index
|
||||
rep = re.compile('GSM\d+_')
|
||||
rep = re.compile(r'GSM\d+_')
|
||||
|
||||
from pandas import MultiIndex
|
||||
columns = MultiIndex.from_tuples([row.split('_', 1) for row in data.columns])
|
||||
|
|
|
|||
|
|
@ -180,24 +180,24 @@ class NetpbmFile(object):
|
|||
"""Read PAM header and initialize instance."""
|
||||
regroups = re.search(
|
||||
b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|"
|
||||
b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
|
||||
b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
|
||||
rb"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|"
|
||||
rb"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups()
|
||||
self.header = regroups[0]
|
||||
self.magicnum = b'P7'
|
||||
for group in regroups[1:]:
|
||||
key, value = group.split()
|
||||
setattr(self, unicode(key).lower(), int(value))
|
||||
matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header)
|
||||
matches = re.findall(rb"(TUPLTYPE\s+\w+)", self.header)
|
||||
self.tupltypes = [s.split(None, 1)[1] for s in matches]
|
||||
|
||||
def _read_pnm_header(self, data):
|
||||
"""Read PNM header and initialize instance."""
|
||||
bpm = data[1:2] in b"14"
|
||||
regroups = re.search(b"".join((
|
||||
b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
|
||||
b"\s*(\d+)\s+(?:#.*[\r\n])*",
|
||||
b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
|
||||
b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
|
||||
rb"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*",
|
||||
rb"\s*(\d+)\s+(?:#.*[\r\n])*",
|
||||
rb"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm),
|
||||
rb"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm
|
||||
self.header = regroups[0]
|
||||
self.magicnum = regroups[1]
|
||||
self.width = int(regroups[2])
|
||||
|
|
|
|||
|
|
@ -150,7 +150,7 @@ with open('../../GPy/__version__.py', 'r') as f:
|
|||
version = f.read()
|
||||
release = version
|
||||
|
||||
print version
|
||||
print(version)
|
||||
|
||||
# version = '0.8.8'
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue