mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-18 13:55:14 +02:00
Merge branch 'devel' of github.com:SheffieldML/GPy into devel
Conflicts: GPy/examples/classification.py
This commit is contained in:
commit
c129b98b3b
26 changed files with 316 additions and 177 deletions
|
|
@ -11,18 +11,20 @@ from sparse_gp import SparseGP
|
|||
|
||||
class FITC(SparseGP):
|
||||
"""
|
||||
sparse FITC approximation
|
||||
|
||||
Sparse FITC approximation
|
||||
|
||||
:param X: inputs
|
||||
:type X: np.ndarray (num_data x Q)
|
||||
:param likelihood: a likelihood instance, containing the observed data
|
||||
:type likelihood: GPy.likelihood.(Gaussian | EP)
|
||||
:param kernel : the kernel (covariance function). See link kernels
|
||||
:param kernel: the kernel (covariance function). See link kernels
|
||||
:type kernel: a GPy.kern.kern instance
|
||||
:param Z: inducing inputs (optional, see note)
|
||||
:type Z: np.ndarray (M x Q) | None
|
||||
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
|
||||
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
|
||||
:type normalize_(X|Y): bool
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, X, likelihood, kernel, Z, normalize_X=False):
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ class Mapping(Parameterized):
|
|||
|
||||
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue']):
|
||||
"""
|
||||
|
||||
Plot the mapping.
|
||||
|
||||
Plots the mapping associated with the model.
|
||||
|
|
@ -79,8 +80,7 @@ class Mapping(Parameterized):
|
|||
:type fixed_inputs: a list of tuples
|
||||
:param linecol: color of line to plot.
|
||||
:type linecol:
|
||||
:param levels: for 2D plotting, the number of contour levels to use
|
||||
is ax is None, create a new figure
|
||||
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
||||
|
||||
"""
|
||||
# TODO include samples
|
||||
|
|
|
|||
|
|
@ -56,10 +56,11 @@ class Model(Parameterized):
|
|||
|
||||
def set_prior(self, regexp, what):
|
||||
"""
|
||||
|
||||
Sets priors on the model parameters.
|
||||
|
||||
Notes
|
||||
-----
|
||||
**Notes**
|
||||
|
||||
Asserts that the prior is suitable for the constraint. If the
|
||||
wrong constraint is in place, an error is raised. If no
|
||||
constraint is in place, one is added (warning printed).
|
||||
|
|
@ -185,8 +186,8 @@ class Model(Parameterized):
|
|||
be handled silently. If _all_ runs fail, the model is reset to the
|
||||
existing parameter values.
|
||||
|
||||
Notes
|
||||
-----
|
||||
**Notes**
|
||||
|
||||
:param num_restarts: number of restarts to use (default 10)
|
||||
:type num_restarts: int
|
||||
:param robust: whether to handle exceptions silently or not (default False)
|
||||
|
|
@ -195,7 +196,9 @@ class Model(Parameterized):
|
|||
:type parallel: bool
|
||||
:param num_processes: number of workers in the multiprocessing pool
|
||||
:type numprocesses: int
|
||||
**kwargs are passed to the optimizer. They can be:
|
||||
|
||||
\*\*kwargs are passed to the optimizer. They can be:
|
||||
|
||||
:param max_f_eval: maximum number of function evaluations
|
||||
:type max_f_eval: int
|
||||
:param max_iters: maximum number of iterations
|
||||
|
|
@ -203,9 +206,7 @@ class Model(Parameterized):
|
|||
:param messages: whether to display during optimisation
|
||||
:type messages: bool
|
||||
|
||||
..Note: If num_processes is None, the number of workes in the multiprocessing pool is automatically
|
||||
set to the number of processors on the current machine.
|
||||
|
||||
.. note:: If num_processes is None, the number of workes in the multiprocessing pool is automatically set to the number of processors on the current machine.
|
||||
|
||||
"""
|
||||
initial_parameters = self._get_params_transformed()
|
||||
|
|
|
|||
|
|
@ -231,17 +231,19 @@ class Parameterized(object):
|
|||
|
||||
def constrain_fixed(self, regexp, value=None):
|
||||
"""
|
||||
Arguments
|
||||
---------
|
||||
|
||||
:param regexp: which parameters need to be fixed.
|
||||
:type regexp: ndarray(dtype=int) or regular expression object or string
|
||||
:param value: the vlaue to fix the parameters to. If the value is not specified,
|
||||
the parameter is fixed to the current value
|
||||
:type value: float
|
||||
Notes
|
||||
-----
|
||||
|
||||
**Notes**
|
||||
|
||||
Fixing a parameter which is tied to another, or constrained in some way will result in an error.
|
||||
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
|
||||
|
||||
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes.
|
||||
|
||||
"""
|
||||
matches = self.grep_param_names(regexp)
|
||||
overlap = set(matches).intersection(set(self.all_constrained_indices()))
|
||||
|
|
|
|||
|
|
@ -16,16 +16,17 @@ class SparseGP(GPBase):
|
|||
:type X: np.ndarray (num_data x input_dim)
|
||||
:param likelihood: a likelihood instance, containing the observed data
|
||||
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
|
||||
:param kernel : the kernel (covariance function). See link kernels
|
||||
:param kernel: the kernel (covariance function). See link kernels
|
||||
:type kernel: a GPy.kern.kern instance
|
||||
:param X_variance: The uncertainty in the measurements of X (Gaussian variance)
|
||||
:type X_variance: np.ndarray (num_data x input_dim) | None
|
||||
:param Z: inducing inputs (optional, see note)
|
||||
:type Z: np.ndarray (num_inducing x input_dim) | None
|
||||
:param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||
:param num_inducing: Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||
:type num_inducing: int
|
||||
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
|
||||
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
|
||||
:type normalize_(X|Y): bool
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
|
||||
|
|
@ -306,10 +307,11 @@ class SparseGP(GPBase):
|
|||
|
||||
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
|
||||
"""
|
||||
|
||||
Predict the function(s) at the new point(s) Xnew.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
**Arguments**
|
||||
|
||||
:param Xnew: The points at which to make a prediction
|
||||
:type Xnew: np.ndarray, Nnew x self.input_dim
|
||||
:param X_variance_new: The uncertainty in the prediction points
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ import sys
|
|||
|
||||
class SVIGP(GPBase):
|
||||
"""
|
||||
|
||||
Stochastic Variational inference in a Gaussian Process
|
||||
|
||||
:param X: inputs
|
||||
|
|
@ -22,25 +23,26 @@ class SVIGP(GPBase):
|
|||
:type Y: np.ndarray of observations (N x D)
|
||||
:param batchsize: the size of a h
|
||||
|
||||
Additional kwargs are used as for a sparse GP. They include
|
||||
Additional kwargs are used as for a sparse GP. They include:
|
||||
|
||||
:param q_u: canonical parameters of the distribution squasehd into a 1D array
|
||||
:type q_u: np.ndarray
|
||||
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||
:type M: int
|
||||
:param kernel : the kernel/covariance function. See link kernels
|
||||
:param kernel: the kernel/covariance function. See link kernels
|
||||
:type kernel: a GPy kernel
|
||||
:param Z: inducing inputs (optional, see note)
|
||||
:type Z: np.ndarray (M x Q) | None
|
||||
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
|
||||
:type X_uncertainty: np.ndarray (N x Q) | None
|
||||
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
|
||||
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||
:type M: int
|
||||
:param beta: noise precision. TODO> ignore beta if doing EP
|
||||
:param beta: noise precision. TODO: ignore beta if doing EP
|
||||
:type beta: float
|
||||
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
|
||||
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
|
||||
:type normalize_(X|Y): bool
|
||||
|
||||
"""
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ default_seed = 10000
|
|||
def oil(num_inducing=50, max_iters=100, kernel=None):
|
||||
"""
|
||||
Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
|
||||
|
||||
"""
|
||||
data = GPy.util.datasets.oil()
|
||||
X = data['X']
|
||||
|
|
@ -43,8 +44,10 @@ def oil(num_inducing=50, max_iters=100, kernel=None):
|
|||
def toy_linear_1d_classification(seed=default_seed):
|
||||
"""
|
||||
Simple 1D classification example
|
||||
:param seed : seed value for data generation (default is 4).
|
||||
|
||||
:param seed: seed value for data generation (default is 4).
|
||||
:type seed: int
|
||||
|
||||
"""
|
||||
|
||||
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
||||
|
|
@ -71,8 +74,10 @@ def toy_linear_1d_classification(seed=default_seed):
|
|||
def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
|
||||
"""
|
||||
Sparse 1D classification example
|
||||
:param seed : seed value for data generation (default is 4).
|
||||
|
||||
:param seed: seed value for data generation (default is 4).
|
||||
:type seed: int
|
||||
|
||||
"""
|
||||
|
||||
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
||||
|
|
@ -100,8 +105,10 @@ def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
|
|||
def toy_heaviside(seed=default_seed):
|
||||
"""
|
||||
Simple 1D classification example using a heavy side gp transformation
|
||||
:param seed : seed value for data generation (default is 4).
|
||||
|
||||
:param seed: seed value for data generation (default is 4).
|
||||
:type seed: int
|
||||
|
||||
"""
|
||||
|
||||
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
||||
|
|
|
|||
|
|
@ -233,7 +233,7 @@ class CGD(Async_Optimize):
|
|||
"""
|
||||
opt_async(self, f, df, x0, callback, update_rule=FletcherReeves,
|
||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||
report_every=10, *args, **kwargs)
|
||||
report_every=10, \*args, \*\*kwargs)
|
||||
|
||||
callback gets called every `report_every` iterations
|
||||
|
||||
|
|
@ -244,16 +244,14 @@ class CGD(Async_Optimize):
|
|||
|
||||
f, and df will be called with
|
||||
|
||||
f(xi, *args, **kwargs)
|
||||
df(xi, *args, **kwargs)
|
||||
f(xi, \*args, \*\*kwargs)
|
||||
df(xi, \*args, \*\*kwargs)
|
||||
|
||||
**returns**
|
||||
-----------
|
||||
**Returns:**
|
||||
|
||||
Started `Process` object, optimizing asynchronously
|
||||
|
||||
**calls**
|
||||
---------
|
||||
**Calls:**
|
||||
|
||||
callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
|
||||
|
||||
|
|
@ -265,7 +263,7 @@ class CGD(Async_Optimize):
|
|||
"""
|
||||
opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
|
||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||
report_every=10, *args, **kwargs)
|
||||
report_every=10, \*args, \*\*kwargs)
|
||||
|
||||
Minimize f, calling callback every `report_every` iterations with following syntax:
|
||||
|
||||
|
|
@ -276,11 +274,10 @@ class CGD(Async_Optimize):
|
|||
|
||||
f, and df will be called with
|
||||
|
||||
f(xi, *args, **kwargs)
|
||||
df(xi, *args, **kwargs)
|
||||
f(xi, \*args, \*\*kwargs)
|
||||
df(xi, \*args, \*\*kwargs)
|
||||
|
||||
**returns**
|
||||
---------
|
||||
|
||||
x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.rbf_inv.RBFInv(input_dim,variance,inv_lengthscale,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -33,6 +34,7 @@ def rbf(input_dim,variance=1., lengthscale=None,ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.rbf.RBF(input_dim,variance,lengthscale,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -41,11 +43,13 @@ def linear(input_dim,variances=None,ARD=False):
|
|||
"""
|
||||
Construct a linear kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
input_dimD (int), obligatory
|
||||
variances (np.ndarray)
|
||||
ARD (boolean)
|
||||
:param input_dim: dimensionality of the kernel, obligatory
|
||||
:type input_dim: int
|
||||
:param variances:
|
||||
:type variances: np.ndarray
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.linear.Linear(input_dim,variances,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -64,12 +68,14 @@ def mlp(input_dim,variance=1., weight_variance=None,bias_variance=100.,ARD=False
|
|||
:type bias_variance: float
|
||||
:param ARD: Auto Relevance Determination (allows for ARD version of covariance)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
||||
def gibbs(input_dim,variance=1., mapping=None):
|
||||
"""
|
||||
|
||||
Gibbs and MacKay non-stationary covariance function.
|
||||
|
||||
.. math::
|
||||
|
|
@ -124,6 +130,7 @@ def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2,
|
|||
:type degree: int
|
||||
:param ARD: Auto Relevance Determination (allows for ARD version of covariance)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.poly.POLY(input_dim,variance,weight_variance,bias_variance,degree,ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -132,10 +139,11 @@ def white(input_dim,variance=1.):
|
|||
"""
|
||||
Construct a white kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
input_dimD (int), obligatory
|
||||
variance (float)
|
||||
:param input_dim: dimensionality of the kernel, obligatory
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
part = parts.white.White(input_dim,variance)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -153,6 +161,7 @@ def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.exponential.Exponential(input_dim,variance, lengthscale, ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -169,6 +178,7 @@ def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.Matern32.Matern32(input_dim,variance, lengthscale, ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -185,6 +195,7 @@ def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
|
|||
:type lengthscale: float
|
||||
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
|
||||
:type ARD: Boolean
|
||||
|
||||
"""
|
||||
part = parts.Matern52.Matern52(input_dim, variance, lengthscale, ARD)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -193,10 +204,11 @@ def bias(input_dim, variance=1.):
|
|||
"""
|
||||
Construct a bias kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
input_dim (int), obligatory
|
||||
variance (float)
|
||||
:param input_dim: dimensionality of the kernel, obligatory
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
part = parts.bias.Bias(input_dim, variance)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -204,10 +216,15 @@ def bias(input_dim, variance=1.):
|
|||
def finite_dimensional(input_dim, F, G, variances=1., weights=None):
|
||||
"""
|
||||
Construct a finite dimensional kernel.
|
||||
input_dim: int - the number of input dimensions
|
||||
F: np.array of functions with shape (n,) - the n basis functions
|
||||
G: np.array with shape (n,n) - the Gram matrix associated to F
|
||||
variances : np.ndarray with shape (n,)
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param F: np.array of functions with shape (n,) - the n basis functions
|
||||
:type F: np.array
|
||||
:param G: np.array with shape (n,n) - the Gram matrix associated to F
|
||||
:type G: np.array
|
||||
:param variances: np.ndarray with shape (n,)
|
||||
:type: np.ndarray
|
||||
"""
|
||||
part = parts.finite_dimensional.FiniteDimensional(input_dim, F, G, variances, weights)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -220,6 +237,7 @@ def spline(input_dim, variance=1.):
|
|||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
part = parts.spline.Spline(input_dim, variance)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -232,6 +250,7 @@ def Brownian(input_dim, variance=1.):
|
|||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
part = parts.Brownian.Brownian(input_dim, variance)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -285,6 +304,7 @@ def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 *
|
|||
:type period: float
|
||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||
:type n_freq: int
|
||||
|
||||
"""
|
||||
part = parts.periodic_exponential.PeriodicExponential(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -303,6 +323,7 @@ def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi
|
|||
:type period: float
|
||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||
:type n_freq: int
|
||||
|
||||
"""
|
||||
part = parts.periodic_Matern32.PeriodicMatern32(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -321,6 +342,7 @@ def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi
|
|||
:type period: float
|
||||
:param n_freq: the number of frequencies considered for the periodic subspace
|
||||
:type n_freq: int
|
||||
|
||||
"""
|
||||
part = parts.periodic_Matern52.PeriodicMatern52(input_dim, variance, lengthscale, period, n_freq, lower, upper)
|
||||
return kern(input_dim, [part])
|
||||
|
|
@ -334,6 +356,7 @@ def prod(k1,k2,tensor=False):
|
|||
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
|
||||
:type tensor: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
part = parts.prod.Prod(k1, k2, tensor)
|
||||
return kern(part.input_dim, [part])
|
||||
|
|
@ -349,10 +372,12 @@ def symmetric(k):
|
|||
def coregionalize(num_outputs,W_columns=1, W=None, kappa=None):
|
||||
"""
|
||||
Coregionlization matrix B, of the form:
|
||||
|
||||
.. math::
|
||||
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
|
||||
|
||||
An intrinsic/linear coregionalization kernel of the form
|
||||
An intrinsic/linear coregionalization kernel of the form:
|
||||
|
||||
.. math::
|
||||
k_2(x, y)=\mathbf{B} k(x, y)
|
||||
|
||||
|
|
@ -422,7 +447,7 @@ def independent_outputs(k):
|
|||
|
||||
def hierarchical(k):
|
||||
"""
|
||||
TODO THis can't be right! Construct a kernel with independent outputs from an existing kernel
|
||||
TODO This can't be right! Construct a kernel with independent outputs from an existing kernel
|
||||
"""
|
||||
# for sl in k.input_slices:
|
||||
# assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
|
||||
|
|
@ -440,7 +465,8 @@ def build_lcm(input_dim, num_outputs, kernel_list = [], W_columns=1,W=None,kappa
|
|||
:param W_columns: number tuples of the corregionalization parameters 'coregion_W'
|
||||
:type W_columns: integer
|
||||
|
||||
..Note the kernels dimensionality is overwritten to fit input_dim
|
||||
..note the kernels dimensionality is overwritten to fit input_dim
|
||||
|
||||
"""
|
||||
|
||||
for k in kernel_list:
|
||||
|
|
|
|||
|
|
@ -78,13 +78,15 @@ class kern(Parameterized):
|
|||
|
||||
|
||||
def plot_ARD(self, fignum=None, ax=None, title='', legend=False):
|
||||
"""If an ARD kernel is present, it bar-plots the ARD parameters,
|
||||
"""If an ARD kernel is present, it bar-plots the ARD parameters.
|
||||
|
||||
:param fignum: figure number of the plot
|
||||
:param ax: matplotlib axis to plot on
|
||||
:param title:
|
||||
title of the plot,
|
||||
pass '' to not print a title
|
||||
pass None for a generic title
|
||||
|
||||
"""
|
||||
if ax is None:
|
||||
fig = pb.figure(fignum)
|
||||
|
|
@ -175,8 +177,10 @@ class kern(Parameterized):
|
|||
def add(self, other, tensor=False):
|
||||
"""
|
||||
Add another kernel to this one. Both kernels are defined on the same _space_
|
||||
|
||||
:param other: the other kernel to be added
|
||||
:type other: GPy.kern
|
||||
|
||||
"""
|
||||
if tensor:
|
||||
D = self.input_dim + other.input_dim
|
||||
|
|
@ -223,6 +227,7 @@ class kern(Parameterized):
|
|||
:type other: GPy.kern
|
||||
:param tensor: whether or not to use the tensor space (default is false).
|
||||
:type tensor: bool
|
||||
|
||||
"""
|
||||
K1 = self.copy()
|
||||
K2 = other.copy()
|
||||
|
|
@ -321,6 +326,7 @@ class kern(Parameterized):
|
|||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)
|
||||
|
||||
"""
|
||||
assert X.shape[1] == self.input_dim
|
||||
target = np.zeros(self.num_params)
|
||||
|
|
@ -340,6 +346,7 @@ class kern(Parameterized):
|
|||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||
|
||||
target = np.zeros_like(X)
|
||||
if X2 is None:
|
||||
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
|
|
@ -413,6 +420,7 @@ class kern(Parameterized):
|
|||
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
|
||||
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
|
||||
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
|
||||
|
||||
"""
|
||||
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
|
||||
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
|
||||
|
|
@ -568,7 +576,7 @@ class Kern_check_model(Model):
|
|||
|
||||
def is_positive_definite(self):
|
||||
v = np.linalg.eig(self.kernel.K(self.X))[0]
|
||||
if any(v<0):
|
||||
if any(v<-1e-6):
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
|
@ -657,6 +665,7 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
:type X: ndarray
|
||||
:param X2: X2 input values to test the covariance function.
|
||||
:type X2: ndarray
|
||||
|
||||
"""
|
||||
pass_checks = True
|
||||
if X==None:
|
||||
|
|
@ -683,7 +692,7 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt theta.")
|
||||
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
|
|
@ -694,7 +703,7 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt theta.")
|
||||
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
|
||||
|
|
@ -705,10 +714,15 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X) wrt X.")
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
try:
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
@ -719,7 +733,12 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt X.")
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
try:
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
@ -730,7 +749,12 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt X.")
|
||||
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
|
||||
try:
|
||||
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
@ -738,5 +762,5 @@ def kern_test(kern, X=None, X2=None, verbose=False):
|
|||
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
||||
return pass_checks
|
||||
|
|
|
|||
|
|
@ -11,12 +11,14 @@ class Coregionalize(Kernpart):
|
|||
"""
|
||||
Covariance function for intrinsic/linear coregionalization models
|
||||
|
||||
This covariance has the form
|
||||
This covariance has the form:
|
||||
.. math::
|
||||
|
||||
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
|
||||
|
||||
An intrinsic/linear coregionalization covariance function of the form
|
||||
An intrinsic/linear coregionalization covariance function of the form:
|
||||
.. math::
|
||||
|
||||
k_2(x, y)=\mathbf{B} k(x, y)
|
||||
|
||||
it is obtained as the tensor product between a covariance function
|
||||
|
|
@ -31,7 +33,7 @@ class Coregionalize(Kernpart):
|
|||
:param kappa: a vector which allows the outputs to behave independently
|
||||
:type kappa: numpy array of dimensionality (num_outputs,)
|
||||
|
||||
.. Note: see coregionalization examples in GPy.examples.regression for some usage.
|
||||
.. note: see coregionalization examples in GPy.examples.regression for some usage.
|
||||
"""
|
||||
def __init__(self,num_outputs,W_columns=1, W=None, kappa=None):
|
||||
self.input_dim = 1
|
||||
|
|
|
|||
|
|
@ -10,9 +10,12 @@ import GPy
|
|||
|
||||
class Hetero(Kernpart):
|
||||
"""
|
||||
TODO: Need to constrain the function outputs positive (still thinking of best way of doing this!!! Yes, intend to use transformations, but what's the *best* way). Currently just squaring output.
|
||||
TODO: Need to constrain the function outputs
|
||||
positive (still thinking of best way of doing this!!! Yes, intend to use
|
||||
transformations, but what's the *best* way). Currently just squaring output.
|
||||
|
||||
Heteroschedastic noise which depends on input location. See, for example, this paper by Goldberg et al.
|
||||
Heteroschedastic noise which depends on input location. See, for example,
|
||||
this paper by Goldberg et al.
|
||||
|
||||
.. math::
|
||||
|
||||
|
|
@ -20,15 +23,15 @@ class Hetero(Kernpart):
|
|||
|
||||
where :math:`\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
|
||||
|
||||
The parameters are the parameters of \sigma^2(x) which is a
|
||||
function that can be specified by the user, by default an
|
||||
multi-layer peceptron is used.
|
||||
The parameters are the parameters of \sigma^2(x) which is a
|
||||
function that can be specified by the user, by default an
|
||||
multi-layer peceptron is used.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
|
||||
:type mapping: GPy.core.Mapping
|
||||
:rtype: Kernpart object
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
|
||||
:type mapping: GPy.core.Mapping
|
||||
:rtype: Kernpart object
|
||||
|
||||
See this paper:
|
||||
|
||||
|
|
@ -36,7 +39,7 @@ class Hetero(Kernpart):
|
|||
C. M. (1998) Regression with Input-dependent Noise: a Gaussian
|
||||
Process Treatment In Advances in Neural Information Processing
|
||||
Systems, Volume 10, pp. 493-499. MIT Press
|
||||
|
||||
|
||||
for a Gaussian process treatment of this problem.
|
||||
|
||||
"""
|
||||
|
|
@ -47,7 +50,7 @@ class Hetero(Kernpart):
|
|||
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
|
||||
if not transform:
|
||||
transform = GPy.core.transformations.logexp()
|
||||
|
||||
|
||||
self.transform = transform
|
||||
self.mapping = mapping
|
||||
self.name='hetero'
|
||||
|
|
@ -66,7 +69,7 @@ class Hetero(Kernpart):
|
|||
|
||||
def K(self, X, X2, target):
|
||||
"""Return covariance between X and X2."""
|
||||
if X2==None or X2 is X:
|
||||
if (X2 is None) or (X2 is X):
|
||||
target[np.diag_indices_from(target)] += self._Kdiag(X)
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
|
|
@ -76,26 +79,26 @@ class Hetero(Kernpart):
|
|||
def _Kdiag(self, X):
|
||||
"""Helper function for computing the diagonal elements of the covariance."""
|
||||
return self.mapping.f(X).flatten()**2
|
||||
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
if X2==None or X2 is X:
|
||||
if (X2 is None) or (X2 is X):
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
self.dKdiag_dtheta(dL_dKdiag, X, target)
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to parameters."""
|
||||
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None], X)*self.mapping.f(X)
|
||||
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X."""
|
||||
if X2==None or X2 is X:
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
self.dKdiag_dX(dL_dKdiag, X, target)
|
||||
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X."""
|
||||
target += 2.*self.mapping.df_dX(dL_dKdiag[:, None], X)*self.mapping.f(X)
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -58,6 +58,8 @@ class Kernpart(object):
|
|||
raise NotImplementedError
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
raise NotImplementedError
|
||||
def dKdiag_dX(self, dL_dK, X, target):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
|
||||
|
|
@ -97,6 +99,9 @@ class Kernpart_stationary(Kernpart):
|
|||
# wrt lengthscale is 0.
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dKdiag_dX(self, dL_dK, X, target):
|
||||
pass # true for all stationary kernels
|
||||
|
||||
|
||||
class Kernpart_inner(Kernpart):
|
||||
def __init__(self,input_dim):
|
||||
|
|
|
|||
|
|
@ -7,11 +7,13 @@ four_over_tau = 2./np.pi
|
|||
|
||||
class MLP(Kernpart):
|
||||
"""
|
||||
multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
|
||||
|
||||
Multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
|
||||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sigma^2 \frac{2}{\pi} \text{asin} \left(\frac{\sigma_w^2 x^\top y+\sigma_b^2}{\sqrt{\sigma_w^2x^\top x + \sigma_b^2 + 1}\sqrt{\sigma_w^2 y^\top y \sigma_b^2 +1}} \right)
|
||||
k(x,y) = \\sigma^{2}\\frac{2}{\\pi } \\text{asin} \\left ( \\frac{ \\sigma_w^2 x^\\top y+\\sigma_b^2}{\\sqrt{\\sigma_w^2x^\\top x + \\sigma_b^2 + 1}\\sqrt{\\sigma_w^2 y^\\top y \\sigma_b^2 +1}} \\right )
|
||||
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
|
|
@ -24,6 +26,7 @@ class MLP(Kernpart):
|
|||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=100., ARD=False):
|
||||
|
|
|
|||
|
|
@ -7,19 +7,20 @@ four_over_tau = 2./np.pi
|
|||
|
||||
class POLY(Kernpart):
|
||||
"""
|
||||
polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel,
|
||||
.. math::
|
||||
|
||||
k(x, y) = \sigma^2*(\sigma_w^2 x'y+\sigma_b^b)^d
|
||||
|
||||
The kernel parameters are \sigma^2 (variance), \sigma^2_w
|
||||
(weight_variance), \sigma^2_b (bias_variance) and d
|
||||
Polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel:
|
||||
|
||||
.. math::
|
||||
k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
|
||||
|
||||
The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
|
||||
(weight_variance), :math:`\sigma^2_b` (bias_variance) and d
|
||||
(degree). Only gradients of the first three are provided for
|
||||
kernel optimisation, it is assumed that polynomial degree would
|
||||
be set by hand.
|
||||
|
||||
The kernel is not recommended as it is badly behaved when the
|
||||
\sigma^2_w*x'*y + \sigma^2_b has a magnitude greater than one. For completeness
|
||||
:math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
|
||||
there is an automatic relevance determination version of this
|
||||
kernel provided.
|
||||
|
||||
|
|
@ -32,7 +33,7 @@ class POLY(Kernpart):
|
|||
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
|
||||
:param degree: the degree of the polynomial.
|
||||
:type degree: int
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
|
|
|
|||
|
|
@ -10,19 +10,23 @@ from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_
|
|||
|
||||
class GPTransformation(object):
|
||||
"""
|
||||
|
||||
Link function class for doing non-Gaussian likelihoods approximation
|
||||
|
||||
:param Y: observed output (Nx1 numpy.darray)
|
||||
..Note:: Y values allowed depend on the likelihood_function used
|
||||
|
||||
.. note:: Y values allowed depend on the likelihood_function used
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
class Identity(GPTransformation):
|
||||
"""
|
||||
$$
|
||||
g(f) = f
|
||||
$$
|
||||
.. math::
|
||||
|
||||
g(f) = f
|
||||
|
||||
"""
|
||||
#def transf(self,mu):
|
||||
# return mu
|
||||
|
|
@ -39,9 +43,10 @@ class Identity(GPTransformation):
|
|||
|
||||
class Probit(GPTransformation):
|
||||
"""
|
||||
$$
|
||||
g(f) = \\Phi^{-1} (mu)
|
||||
$$
|
||||
.. math::
|
||||
|
||||
g(f) = \\Phi^{-1} (mu)
|
||||
|
||||
"""
|
||||
#def transf(self,mu):
|
||||
# return inv_std_norm_cdf(mu)
|
||||
|
|
@ -57,9 +62,9 @@ class Probit(GPTransformation):
|
|||
|
||||
class Log(GPTransformation):
|
||||
"""
|
||||
$$
|
||||
g(f) = \log(\mu)
|
||||
$$
|
||||
.. math::
|
||||
g(f) = \\log(\\mu)
|
||||
|
||||
"""
|
||||
#def transf(self,mu):
|
||||
# return np.log(mu)
|
||||
|
|
@ -75,9 +80,9 @@ class Log(GPTransformation):
|
|||
|
||||
class Log_ex_1(GPTransformation):
|
||||
"""
|
||||
$$
|
||||
g(f) = \log(\exp(\mu) - 1)
|
||||
$$
|
||||
.. math::
|
||||
g(f) = \\log(\\exp(\\mu) - 1)
|
||||
|
||||
"""
|
||||
#def transf(self,mu):
|
||||
# """
|
||||
|
|
@ -110,9 +115,11 @@ class Reciprocal(GPTransformation):
|
|||
|
||||
class Heaviside(GPTransformation):
|
||||
"""
|
||||
$$
|
||||
g(f) = I_{x \in A}
|
||||
$$
|
||||
|
||||
.. math::
|
||||
|
||||
g(f) = I_{x \\in A}
|
||||
|
||||
"""
|
||||
def transf(self,f):
|
||||
#transformation goes here
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@ class NoiseDistribution(object):
|
|||
Likelihood class for doing Expectation propagation
|
||||
|
||||
:param Y: observed output (Nx1 numpy.darray)
|
||||
..Note:: Y values allowed depend on the LikelihoodFunction used
|
||||
|
||||
.. note:: Y values allowed depend on the LikelihoodFunction used
|
||||
"""
|
||||
def __init__(self,gp_link,analytical_mean=False,analytical_variance=False):
|
||||
#assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."#FIXME
|
||||
|
|
@ -51,6 +52,7 @@ class NoiseDistribution(object):
|
|||
In case it is needed, this function assess the output values or makes any pertinent transformation on them.
|
||||
|
||||
:param Y: observed output (Nx1 numpy.darray)
|
||||
|
||||
"""
|
||||
return Y
|
||||
|
||||
|
|
@ -62,18 +64,21 @@ class NoiseDistribution(object):
|
|||
:param obs: observed output
|
||||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
"""
|
||||
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs)
|
||||
|
||||
def _nlog_product_scaled(self,gp,obs,mu,sigma):
|
||||
"""
|
||||
Negative log-product between the cavity distribution and a likelihood factor.
|
||||
..Note:: The constant term in the Gaussian distribution is ignored.
|
||||
|
||||
.. note:: The constant term in the Gaussian distribution is ignored.
|
||||
|
||||
:param gp: latent variable
|
||||
:param obs: observed output
|
||||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
"""
|
||||
return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs)
|
||||
|
||||
|
|
@ -85,6 +90,7 @@ class NoiseDistribution(object):
|
|||
:param obs: observed output
|
||||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
"""
|
||||
return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs)
|
||||
|
||||
|
|
@ -96,6 +102,7 @@ class NoiseDistribution(object):
|
|||
:param obs: observed output
|
||||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
"""
|
||||
return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs)
|
||||
|
||||
|
|
@ -106,6 +113,7 @@ class NoiseDistribution(object):
|
|||
:param obs: observed output
|
||||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
"""
|
||||
return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma),disp=False)
|
||||
|
||||
|
|
@ -122,6 +130,7 @@ class NoiseDistribution(object):
|
|||
:param obs: observed output
|
||||
:param tau: cavity distribution 1st natural parameter (precision)
|
||||
:param v: cavity distribution 2nd natural paramenter (mu*precision)
|
||||
|
||||
"""
|
||||
mu = v/tau
|
||||
mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau))
|
||||
|
|
@ -137,7 +146,8 @@ class NoiseDistribution(object):
|
|||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
..Note:: This function helps computing E(Y_star) = E(E(Y_star|f_star))
|
||||
.. note:: This function helps computing E(Y_star) = E(E(Y_star|f_star))
|
||||
|
||||
"""
|
||||
return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp))
|
||||
|
||||
|
|
@ -148,6 +158,7 @@ class NoiseDistribution(object):
|
|||
:param gp: latent variable
|
||||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
"""
|
||||
return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp)
|
||||
|
||||
|
|
@ -158,6 +169,7 @@ class NoiseDistribution(object):
|
|||
:param gp: latent variable
|
||||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
"""
|
||||
return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2
|
||||
|
||||
|
|
@ -169,7 +181,8 @@ class NoiseDistribution(object):
|
|||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
..Note:: This function helps computing E(V(Y_star|f_star))
|
||||
.. note:: This function helps computing E(V(Y_star|f_star))
|
||||
|
||||
"""
|
||||
return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp))
|
||||
|
||||
|
|
@ -180,6 +193,7 @@ class NoiseDistribution(object):
|
|||
:param gp: latent variable
|
||||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
"""
|
||||
return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp)
|
||||
|
||||
|
|
@ -190,6 +204,7 @@ class NoiseDistribution(object):
|
|||
:param gp: latent variable
|
||||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
"""
|
||||
return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2
|
||||
|
||||
|
|
@ -201,7 +216,8 @@ class NoiseDistribution(object):
|
|||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
..Note:: This function helps computing E( E(Y_star|f_star)**2 )
|
||||
.. note:: This function helps computing E( E(Y_star|f_star)**2 )
|
||||
|
||||
"""
|
||||
return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp))
|
||||
|
||||
|
|
@ -212,6 +228,7 @@ class NoiseDistribution(object):
|
|||
:param gp: latent variable
|
||||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
"""
|
||||
return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp)
|
||||
|
||||
|
|
@ -222,6 +239,7 @@ class NoiseDistribution(object):
|
|||
:param gp: latent variable
|
||||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
"""
|
||||
return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 )
|
||||
|
||||
|
|
@ -243,6 +261,7 @@ class NoiseDistribution(object):
|
|||
|
||||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
"""
|
||||
maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma),disp=False)
|
||||
mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma)
|
||||
|
|
@ -266,6 +285,7 @@ class NoiseDistribution(object):
|
|||
|
||||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
|
||||
"""
|
||||
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma),disp=False)
|
||||
mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma)
|
||||
|
|
@ -278,6 +298,7 @@ class NoiseDistribution(object):
|
|||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
|
||||
|
||||
"""
|
||||
# E( V(Y_star|f_star) )
|
||||
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma),disp=False)
|
||||
|
|
@ -310,6 +331,7 @@ class NoiseDistribution(object):
|
|||
:param mu: cavity distribution mean
|
||||
:param sigma: cavity distribution standard deviation
|
||||
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
|
||||
|
||||
"""
|
||||
qf = stats.norm.ppf(p,mu,sigma)
|
||||
return self.gp_link.transf(qf)
|
||||
|
|
@ -321,6 +343,7 @@ class NoiseDistribution(object):
|
|||
:param x: tuple (latent variable,output)
|
||||
:param mu: latent variable's predictive mean
|
||||
:param sigma: latent variable's predictive standard deviation
|
||||
|
||||
"""
|
||||
return self._nlog_product_scaled(x[0],x[1],mu,sigma)
|
||||
|
||||
|
|
@ -331,7 +354,9 @@ class NoiseDistribution(object):
|
|||
:param x: tuple (latent variable,output)
|
||||
:param mu: latent variable's predictive mean
|
||||
:param sigma: latent variable's predictive standard deviation
|
||||
..Note: Only avilable when the output is continuous
|
||||
|
||||
.. note: Only available when the output is continuous
|
||||
|
||||
"""
|
||||
assert not self.discrete, "Gradient not available for discrete outputs."
|
||||
return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0])))
|
||||
|
|
@ -343,7 +368,9 @@ class NoiseDistribution(object):
|
|||
:param x: tuple (latent variable,output)
|
||||
:param mu: latent variable's predictive mean
|
||||
:param sigma: latent variable's predictive standard deviation
|
||||
..Note: Only avilable when the output is continuous
|
||||
|
||||
.. note: Only available when the output is continuous
|
||||
|
||||
"""
|
||||
assert not self.discrete, "Hessian not available for discrete outputs."
|
||||
cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1])
|
||||
|
|
@ -356,14 +383,17 @@ class NoiseDistribution(object):
|
|||
:param x: tuple (latent variable,output)
|
||||
:param mu: latent variable's predictive mean
|
||||
:param sigma: latent variable's predictive standard deviation
|
||||
|
||||
"""
|
||||
return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma),disp=False)
|
||||
|
||||
def predictive_values(self,mu,var):
|
||||
"""
|
||||
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction
|
||||
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
|
||||
|
||||
:param mu: mean of the latent variable
|
||||
:param var: variance of the latent variable
|
||||
|
||||
"""
|
||||
if isinstance(mu,float) or isinstance(mu,int):
|
||||
mu = [mu]
|
||||
|
|
|
|||
|
|
@ -13,10 +13,10 @@ class Poisson(NoiseDistribution):
|
|||
"""
|
||||
Poisson likelihood
|
||||
Y is expected to take values in {0,1,2,...}
|
||||
-----
|
||||
$$
|
||||
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
||||
$$
|
||||
|
||||
.. math::
|
||||
L(x) = \\exp(\\lambda) * \\frac{\\lambda^Y_i}{Y_i!}
|
||||
|
||||
"""
|
||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
||||
#self.discrete = True
|
||||
|
|
|
|||
|
|
@ -10,11 +10,13 @@ class MLP(Mapping):
|
|||
|
||||
.. math::
|
||||
|
||||
f(\mathbf{x}*) = \mathbf{W}^0\boldsymbol{\phi}(\mathbf{W}^1\mathbf{x}+\mathb{b}^1)^* + \mathbf{b}^0
|
||||
f(\\mathbf{x}*) = \\mathbf{W}^0\\boldsymbol{\\phi}(\\mathbf{W}^1\\mathbf{x}+\\mathbf{b}^1)^* + \\mathbf{b}^0
|
||||
|
||||
where
|
||||
..math::
|
||||
\phi(\cdot) = \text{tanh}(\cdot)
|
||||
|
||||
.. math::
|
||||
|
||||
\\phi(\\cdot) = \\text{tanh}(\\cdot)
|
||||
|
||||
:param X: input observations
|
||||
:type X: ndarray
|
||||
|
|
@ -22,6 +24,7 @@ class MLP(Mapping):
|
|||
:type output_dim: int
|
||||
:param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it.
|
||||
:type hidden_dim: int or list of ints.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3):
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ class MRD(Model):
|
|||
:param num_inducing: number of inducing inputs to use
|
||||
:param kernels: list of kernels or kernel shared for all BGPLVMS
|
||||
:type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default)
|
||||
|
||||
"""
|
||||
def __init__(self, likelihood_or_Y_list, input_dim, num_inducing=10, names=None,
|
||||
kernels=None, initx='PCA',
|
||||
|
|
@ -338,8 +339,11 @@ class MRD(Model):
|
|||
|
||||
def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs):
|
||||
"""
|
||||
:param:`titles` :
|
||||
titles for axes of datasets
|
||||
|
||||
TODO: Explain other parameters
|
||||
|
||||
:param titles: titles for axes of datasets
|
||||
|
||||
"""
|
||||
if titles is None:
|
||||
titles = [r'${}$'.format(name) for name in self.names]
|
||||
|
|
|
|||
|
|
@ -27,48 +27,56 @@ except:
|
|||
_blas_available = False
|
||||
|
||||
def dtrtrs(A, B, lower=0, trans=0, unitdiag=0):
|
||||
"""Wrapper for lapack dtrtrs function
|
||||
"""
|
||||
Wrapper for lapack dtrtrs function
|
||||
|
||||
:param A: Matrix A
|
||||
:param B: Matrix B
|
||||
:param lower: is matrix lower (true) or upper (false)
|
||||
:returns:
|
||||
|
||||
"""
|
||||
return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag)
|
||||
|
||||
def dpotrs(A, B, lower=0):
|
||||
"""Wrapper for lapack dpotrs function
|
||||
"""
|
||||
Wrapper for lapack dpotrs function
|
||||
|
||||
:param A: Matrix A
|
||||
:param B: Matrix B
|
||||
:param lower: is matrix lower (true) or upper (false)
|
||||
:returns:
|
||||
|
||||
"""
|
||||
return lapack.dpotrs(A, B, lower=lower)
|
||||
|
||||
def dpotri(A, lower=0):
|
||||
"""Wrapper for lapack dpotri function
|
||||
"""
|
||||
Wrapper for lapack dpotri function
|
||||
|
||||
:param A: Matrix A
|
||||
:param lower: is matrix lower (true) or upper (false)
|
||||
:returns: A inverse
|
||||
|
||||
"""
|
||||
return lapack.dpotri(A, lower=lower)
|
||||
|
||||
def trace_dot(a, b):
|
||||
"""
|
||||
efficiently compute the trace of the matrix product of a and b
|
||||
Efficiently compute the trace of the matrix product of a and b
|
||||
"""
|
||||
return np.sum(a * b)
|
||||
|
||||
def mdot(*args):
|
||||
"""Multiply all the arguments using matrix product rules.
|
||||
"""
|
||||
Multiply all the arguments using matrix product rules.
|
||||
The output is equivalent to multiplying the arguments one by one
|
||||
from left to right using dot().
|
||||
Precedence can be controlled by creating tuples of arguments,
|
||||
for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)).
|
||||
Note that this means the output of dot(a,b) and mdot(a,b) will differ if
|
||||
a or b is a pure tuple of numbers.
|
||||
|
||||
"""
|
||||
if len(args) == 1:
|
||||
return args[0]
|
||||
|
|
@ -119,10 +127,12 @@ def jitchol_old(A, maxtries=5):
|
|||
|
||||
:rval L: the Cholesky decomposition of A
|
||||
|
||||
.. Note:
|
||||
.. note:
|
||||
|
||||
Adds jitter to K, to enforce positive-definiteness
|
||||
if stuff breaks, please check:
|
||||
np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T)
|
||||
|
||||
"""
|
||||
try:
|
||||
return linalg.cholesky(A, lower=True)
|
||||
|
|
@ -142,6 +152,7 @@ def jitchol_old(A, maxtries=5):
|
|||
|
||||
def pdinv(A, *args):
|
||||
"""
|
||||
|
||||
:param A: A DxD pd numpy array
|
||||
|
||||
:rval Ai: the inverse of A
|
||||
|
|
@ -152,6 +163,7 @@ def pdinv(A, *args):
|
|||
:rtype Li: np.ndarray
|
||||
:rval logdet: the log of the determinant of A
|
||||
:rtype logdet: float64
|
||||
|
||||
"""
|
||||
L = jitchol(A, *args)
|
||||
logdet = 2.*np.sum(np.log(np.diag(L)))
|
||||
|
|
@ -177,14 +189,13 @@ def chol_inv(L):
|
|||
|
||||
def multiple_pdinv(A):
|
||||
"""
|
||||
Arguments
|
||||
---------
|
||||
:param A: A DxDxN numpy array (each A[:,:,i] is pd)
|
||||
|
||||
Returns
|
||||
-------
|
||||
invs : the inverses of A
|
||||
hld: 0.5* the log of the determinants of A
|
||||
:rval invs: the inverses of A
|
||||
:rtype invs: np.ndarray
|
||||
:rval hld: 0.5* the log of the determinants of A
|
||||
:rtype hld: np.array
|
||||
|
||||
"""
|
||||
N = A.shape[-1]
|
||||
chols = [jitchol(A[:, :, i]) for i in range(N)]
|
||||
|
|
@ -198,15 +209,13 @@ def PCA(Y, input_dim):
|
|||
"""
|
||||
Principal component analysis: maximum likelihood solution by SVD
|
||||
|
||||
Arguments
|
||||
---------
|
||||
:param Y: NxD np.array of data
|
||||
:param input_dim: int, dimension of projection
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
:rval X: - Nxinput_dim np.array of dimensionality reduced data
|
||||
W - input_dimxD mapping from X to Y
|
||||
:rval W: - input_dimxD mapping from X to Y
|
||||
|
||||
"""
|
||||
if not np.allclose(Y.mean(axis=0), 0.0):
|
||||
print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
|
||||
|
|
@ -273,11 +282,10 @@ def DSYR_blas(A, x, alpha=1.):
|
|||
Performs a symmetric rank-1 update operation:
|
||||
A <- A + alpha * np.dot(x,x.T)
|
||||
|
||||
Arguments
|
||||
---------
|
||||
:param A: Symmetric NxN np.array
|
||||
:param x: Nx1 np.array
|
||||
:param alpha: scalar
|
||||
|
||||
"""
|
||||
N = c_int(A.shape[0])
|
||||
LDA = c_int(A.shape[0])
|
||||
|
|
@ -295,11 +303,10 @@ def DSYR_numpy(A, x, alpha=1.):
|
|||
Performs a symmetric rank-1 update operation:
|
||||
A <- A + alpha * np.dot(x,x.T)
|
||||
|
||||
Arguments
|
||||
---------
|
||||
:param A: Symmetric NxN np.array
|
||||
:param x: Nx1 np.array
|
||||
:param alpha: scalar
|
||||
|
||||
"""
|
||||
A += alpha * np.dot(x[:, None], x[None, :])
|
||||
|
||||
|
|
@ -363,8 +370,9 @@ def cholupdate(L, x):
|
|||
"""
|
||||
update the LOWER cholesky factor of a pd matrix IN PLACE
|
||||
|
||||
if L is the lower chol. of K, then this function computes L_
|
||||
where L_ is the lower chol of K + x*x^T
|
||||
if L is the lower chol. of K, then this function computes L\_
|
||||
where L\_ is the lower chol of K + x*x^T
|
||||
|
||||
"""
|
||||
support_code = """
|
||||
#include <math.h>
|
||||
|
|
|
|||
|
|
@ -92,13 +92,15 @@ class tree:
|
|||
|
||||
|
||||
def swap_vertices(self, i, j):
|
||||
"""Swap two vertices in the tree structure array.
|
||||
"""
|
||||
Swap two vertices in the tree structure array.
|
||||
swap_vertex swaps the location of two vertices in a tree structure array.
|
||||
ARG tree : the tree for which two vertices are to be swapped.
|
||||
ARG i : the index of the first vertex to be swapped.
|
||||
ARG j : the index of the second vertex to be swapped.
|
||||
RETURN tree : the tree structure with the two vertex locations
|
||||
swapped.
|
||||
|
||||
:param tree: the tree for which two vertices are to be swapped.
|
||||
:param i: the index of the first vertex to be swapped.
|
||||
:param j: the index of the second vertex to be swapped.
|
||||
:rval tree: the tree structure with the two vertex locations swapped.
|
||||
|
||||
"""
|
||||
store_vertex_i = self.vertices[i]
|
||||
store_vertex_j = self.vertices[j]
|
||||
|
|
@ -117,12 +119,16 @@ class tree:
|
|||
|
||||
def rotation_matrix(xangle, yangle, zangle, order='zxy', degrees=False):
|
||||
|
||||
"""Compute the rotation matrix for an angle in each direction.
|
||||
"""
|
||||
Compute the rotation matrix for an angle in each direction.
|
||||
This is a helper function for computing the rotation matrix for a given set of angles in a given order.
|
||||
ARG xangle : rotation for x-axis.
|
||||
ARG yangle : rotation for y-axis.
|
||||
ARG zangle : rotation for z-axis.
|
||||
ARG order : the order for the rotations."""
|
||||
|
||||
:param xangle: rotation for x-axis.
|
||||
:param yangle: rotation for y-axis.
|
||||
:param zangle: rotation for z-axis.
|
||||
:param order: the order for the rotations.
|
||||
|
||||
"""
|
||||
if degrees:
|
||||
xangle = math.radians(xangle)
|
||||
yangle = math.radians(yangle)
|
||||
|
|
@ -301,10 +307,14 @@ class acclaim_skeleton(skeleton):
|
|||
|
||||
def load_skel(self, file_name):
|
||||
|
||||
"""Loads an ASF file into a skeleton structure.
|
||||
"""
|
||||
Loads an ASF file into a skeleton structure.
|
||||
loads skeleton structure from an acclaim skeleton file.
|
||||
ARG file_name : the file name to load in.
|
||||
RETURN skel : the skeleton for the file."""
|
||||
|
||||
:param file_name: the file name to load in.
|
||||
:rval skel: the skeleton for the file. - TODO isn't returning this?
|
||||
|
||||
"""
|
||||
|
||||
fid = open(file_name, 'r')
|
||||
self.read_skel(fid)
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ def most_significant_input_dimensions(model, which_indices):
|
|||
try:
|
||||
input_1, input_2 = np.argsort(model.input_sensitivity())[::-1][:2]
|
||||
except:
|
||||
raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
|
||||
raise ValueError, "cannot automatically determine which dimensions to plot, please pass 'which_indices'"
|
||||
else:
|
||||
input_1, input_2 = which_indices
|
||||
return input_1, input_2
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue