Merging changed files.

This commit is contained in:
Neil Lawrence 2013-09-21 12:17:59 +01:00
commit 4154a4afb6
40 changed files with 555 additions and 415 deletions

View file

@ -11,25 +11,27 @@ from sparse_gp import SparseGP
class FITC(SparseGP): class FITC(SparseGP):
""" """
sparse FITC approximation
Sparse FITC approximation
:param X: inputs :param X: inputs
:type X: np.ndarray (num_data x Q) :type X: np.ndarray (num_data x Q)
:param likelihood: a likelihood instance, containing the observed data :param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP) :type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel : the kernel (covariance function). See link kernels :param kernel: the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance :type kernel: a GPy.kern.kern instance
:param Z: inducing inputs (optional, see note) :param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None :type Z: np.ndarray (M x Q) | None
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool :type normalize_(X|Y): bool
""" """
def __init__(self, X, likelihood, kernel, Z, normalize_X=False): def __init__(self, X, likelihood, kernel, Z, normalize_X=False):
SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False) SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False)
assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs" assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs"
def update_likelihood_approximation(self): def update_likelihood_approximation(self, **kwargs):
""" """
Approximates a non-Gaussian likelihood using Expectation Propagation Approximates a non-Gaussian likelihood using Expectation Propagation
@ -37,7 +39,7 @@ class FITC(SparseGP):
this function does nothing this function does nothing
""" """
self.likelihood.restart() self.likelihood.restart()
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0, **kwargs)
self._set_params(self._get_params()) self._set_params(self._get_params())
def _compute_kernel_matrices(self): def _compute_kernel_matrices(self):
@ -120,7 +122,7 @@ class FITC(SparseGP):
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z) self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z) self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z) self._dKmm_dX += self.kern.dK_dX(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:]) self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood # the partial derivative vector for the likelihood

View file

@ -15,7 +15,7 @@ class GP(GPBase):
:param X: input observations :param X: input observations
:param kernel: a GPy kernel, defaults to rbf+white :param kernel: a GPy kernel, defaults to rbf+white
:parm likelihood: a GPy likelihood :param likelihood: a GPy likelihood
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True :type normalize_X: False|True
:rtype: model object :rtype: model object
@ -62,7 +62,7 @@ class GP(GPBase):
def _get_param_names(self): def _get_param_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names() return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self): def update_likelihood_approximation(self, **kwargs):
""" """
Approximates a non-gaussian likelihood using Expectation Propagation Approximates a non-gaussian likelihood using Expectation Propagation
@ -70,7 +70,7 @@ class GP(GPBase):
this function does nothing this function does nothing
""" """
self.likelihood.restart() self.likelihood.restart()
self.likelihood.fit_full(self.kern.K(self.X)) self.likelihood.fit_full(self.kern.K(self.X), **kwargs)
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
def _model_fit_term(self): def _model_fit_term(self):
@ -132,17 +132,16 @@ class GP(GPBase):
def predict(self, Xnew, which_parts='all', full_cov=False, likelihood_args=dict()): def predict(self, Xnew, which_parts='all', full_cov=False, likelihood_args=dict()):
""" """
Predict the function(s) at the new point(s) Xnew. Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction :param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim :type Xnew: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction :param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools) :type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal :param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool :type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim :returns: mean: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise :returns: var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim :returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
@ -160,8 +159,7 @@ class GP(GPBase):
def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False): def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
""" """
For a specific output, predict the function at the new point(s) Xnew. For a specific output, predict the function at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction :param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim :type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict :param output: output to predict
@ -170,9 +168,9 @@ class GP(GPBase):
:type which_parts: ('all', list of bools) :type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal :param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool :type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim :returns: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise :returns: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim :returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
.. Note:: For multiple output models only .. Note:: For multiple output models only
""" """

View file

@ -49,6 +49,7 @@ class Mapping(Parameterized):
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue']): def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue']):
""" """
Plot the mapping. Plot the mapping.
Plots the mapping associated with the model. Plots the mapping associated with the model.
@ -79,8 +80,7 @@ class Mapping(Parameterized):
:type fixed_inputs: a list of tuples :type fixed_inputs: a list of tuples
:param linecol: color of line to plot. :param linecol: color of line to plot.
:type linecol: :type linecol:
:param levels: for 2D plotting, the number of contour levels to use :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
is ax is None, create a new figure
""" """
# TODO include samples # TODO include samples

View file

@ -47,6 +47,7 @@ class Model(Parameterized):
:param state: the state of the model. :param state: the state of the model.
:type state: list as returned from getstate. :type state: list as returned from getstate.
""" """
self.preferred_optimizer = state.pop() self.preferred_optimizer = state.pop()
self.sampling_runs = state.pop() self.sampling_runs = state.pop()
@ -56,10 +57,11 @@ class Model(Parameterized):
def set_prior(self, regexp, what): def set_prior(self, regexp, what):
""" """
Sets priors on the model parameters. Sets priors on the model parameters.
Notes **Notes**
-----
Asserts that the prior is suitable for the constraint. If the Asserts that the prior is suitable for the constraint. If the
wrong constraint is in place, an error is raised. If no wrong constraint is in place, an error is raised. If no
constraint is in place, one is added (warning printed). constraint is in place, one is added (warning printed).
@ -185,8 +187,8 @@ class Model(Parameterized):
be handled silently. If _all_ runs fail, the model is reset to the be handled silently. If _all_ runs fail, the model is reset to the
existing parameter values. existing parameter values.
Notes **Notes**
-----
:param num_restarts: number of restarts to use (default 10) :param num_restarts: number of restarts to use (default 10)
:type num_restarts: int :type num_restarts: int
:param robust: whether to handle exceptions silently or not (default False) :param robust: whether to handle exceptions silently or not (default False)
@ -195,7 +197,9 @@ class Model(Parameterized):
:type parallel: bool :type parallel: bool
:param num_processes: number of workers in the multiprocessing pool :param num_processes: number of workers in the multiprocessing pool
:type numprocesses: int :type numprocesses: int
**kwargs are passed to the optimizer. They can be:
\*\*kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations :param max_f_eval: maximum number of function evaluations
:type max_f_eval: int :type max_f_eval: int
:param max_iters: maximum number of iterations :param max_iters: maximum number of iterations
@ -203,9 +207,7 @@ class Model(Parameterized):
:param messages: whether to display during optimisation :param messages: whether to display during optimisation
:type messages: bool :type messages: bool
..Note: If num_processes is None, the number of workes in the multiprocessing pool is automatically .. note:: If num_processes is None, the number of workes in the multiprocessing pool is automatically set to the number of processors on the current machine.
set to the number of processors on the current machine.
""" """
initial_parameters = self._get_params_transformed() initial_parameters = self._get_params_transformed()
@ -538,22 +540,17 @@ class Model(Parameterized):
return k.variances return k.variances
def pseudo_EM(self, epsilon=.1, **kwargs): def pseudo_EM(self, stop_crit=.1, **kwargs):
""" """
TODO: Should this not bein the GP class?
EM - like algorithm for Expectation Propagation and Laplace approximation EM - like algorithm for Expectation Propagation and Laplace approximation
kwargs are passed to the optimize function. They can be: :param stop_crit: convergence criterion
:type stop_crit: float
:epsilon: convergence criterion
:max_f_eval: maximum number of function evaluations
:messages: whether to display during optimisation
:param optimzer: whice optimizer to use (defaults to self.preferred optimizer)
:type optimzer: string TODO: valid strings?
.. Note: kwargs are passed to update_likelihood and optimize functions.
""" """
assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods" assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods"
ll_change = epsilon + 1. ll_change = stop_crit + 1.
iteration = 0 iteration = 0
last_ll = -np.inf last_ll = -np.inf
@ -561,9 +558,24 @@ class Model(Parameterized):
alpha = 0 alpha = 0
stop = False stop = False
#Handle **kwargs
ep_args = {}
for arg in kwargs.keys():
if arg in ('epsilon','power_ep'):
ep_args[arg] = kwargs[arg]
del kwargs[arg]
while not stop: while not stop:
last_approximation = self.likelihood.copy() last_approximation = self.likelihood.copy()
last_params = self._get_params() last_params = self._get_params()
if len(ep_args) == 2:
self.update_likelihood_approximation(epsilon=ep_args['epsilon'],power_ep=ep_args['power_ep'])
elif len(ep_args) == 1:
if ep_args.keys()[0] == 'epsilon':
self.update_likelihood_approximation(epsilon=ep_args['epsilon'])
elif ep_args.keys()[0] == 'power_ep':
self.update_likelihood_approximation(power_ep=ep_args['power_ep'])
else:
self.update_likelihood_approximation() self.update_likelihood_approximation()
new_ll = self.log_likelihood() new_ll = self.log_likelihood()
ll_change = new_ll - last_ll ll_change = new_ll - last_ll
@ -576,7 +588,7 @@ class Model(Parameterized):
else: else:
self.optimize(**kwargs) self.optimize(**kwargs)
last_ll = self.log_likelihood() last_ll = self.log_likelihood()
if ll_change < epsilon: if ll_change < stop_crit:
stop = True stop = True
iteration += 1 iteration += 1
if stop: if stop:

View file

@ -231,17 +231,19 @@ class Parameterized(object):
def constrain_fixed(self, regexp, value=None): def constrain_fixed(self, regexp, value=None):
""" """
Arguments
---------
:param regexp: which parameters need to be fixed. :param regexp: which parameters need to be fixed.
:type regexp: ndarray(dtype=int) or regular expression object or string :type regexp: ndarray(dtype=int) or regular expression object or string
:param value: the vlaue to fix the parameters to. If the value is not specified, :param value: the vlaue to fix the parameters to. If the value is not specified,
the parameter is fixed to the current value the parameter is fixed to the current value
:type value: float :type value: float
Notes
----- **Notes**
Fixing a parameter which is tied to another, or constrained in some way will result in an error. Fixing a parameter which is tied to another, or constrained in some way will result in an error.
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes.
""" """
matches = self.grep_param_names(regexp) matches = self.grep_param_names(regexp)
overlap = set(matches).intersection(set(self.all_constrained_indices())) overlap = set(matches).intersection(set(self.all_constrained_indices()))

View file

@ -16,16 +16,17 @@ class SparseGP(GPBase):
:type X: np.ndarray (num_data x input_dim) :type X: np.ndarray (num_data x input_dim)
:param likelihood: a likelihood instance, containing the observed data :param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace) :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
:param kernel : the kernel (covariance function). See link kernels :param kernel: the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance :type kernel: a GPy.kern.kern instance
:param X_variance: The uncertainty in the measurements of X (Gaussian variance) :param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (num_data x input_dim) | None :type X_variance: np.ndarray (num_data x input_dim) | None
:param Z: inducing inputs (optional, see note) :param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None :type Z: np.ndarray (num_inducing x input_dim) | None
:param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None) :param num_inducing: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type num_inducing: int :type num_inducing: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool :type normalize_(X|Y): bool
""" """
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
@ -215,7 +216,7 @@ class SparseGP(GPBase):
#def _get_print_names(self): #def _get_print_names(self):
# return self.kern._get_param_names_transformed() + self.likelihood._get_param_names() # return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self): def update_likelihood_approximation(self, **kwargs):
""" """
Approximates a non-gaussian likelihood using Expectation Propagation Approximates a non-gaussian likelihood using Expectation Propagation
@ -229,10 +230,10 @@ class SparseGP(GPBase):
Kmmi = tdot(Lmi.T) Kmmi = tdot(Lmi.T)
diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)]) diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)])
self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi, **kwargs) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
# raise NotImplementedError, "EP approximation not implemented for uncertain inputs" # raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else: else:
self.likelihood.fit_DTC(self.Kmm, self.psi1.T) self.likelihood.fit_DTC(self.Kmm, self.psi1.T, **kwargs)
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) # self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
@ -306,10 +307,11 @@ class SparseGP(GPBase):
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False): def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
""" """
Predict the function(s) at the new point(s) Xnew. Predict the function(s) at the new point(s) Xnew.
Arguments **Arguments**
---------
:param Xnew: The points at which to make a prediction :param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim :type Xnew: np.ndarray, Nnew x self.input_dim
:param X_variance_new: The uncertainty in the prediction points :param X_variance_new: The uncertainty in the prediction points
@ -391,8 +393,7 @@ class SparseGP(GPBase):
def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False): def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
""" """
For a specific output, predict the function at the new point(s) Xnew. For a specific output, predict the function at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction :param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim :type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict :param output: output to predict

View file

@ -14,6 +14,7 @@ import sys
class SVIGP(GPBase): class SVIGP(GPBase):
""" """
Stochastic Variational inference in a Gaussian Process Stochastic Variational inference in a Gaussian Process
:param X: inputs :param X: inputs
@ -22,25 +23,26 @@ class SVIGP(GPBase):
:type Y: np.ndarray of observations (N x D) :type Y: np.ndarray of observations (N x D)
:param batchsize: the size of a h :param batchsize: the size of a h
Additional kwargs are used as for a sparse GP. They include Additional kwargs are used as for a sparse GP. They include:
:param q_u: canonical parameters of the distribution squasehd into a 1D array :param q_u: canonical parameters of the distribution squasehd into a 1D array
:type q_u: np.ndarray :type q_u: np.ndarray
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None) :param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int :type M: int
:param kernel : the kernel/covariance function. See link kernels :param kernel: the kernel/covariance function. See link kernels
:type kernel: a GPy kernel :type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note) :param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None :type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance) :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None :type X_uncertainty: np.ndarray (N x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link) :param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None) :param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int :type M: int
:param beta: noise precision. TODO> ignore beta if doing EP :param beta: noise precision. TODO: ignore beta if doing EP
:type beta: float :type beta: float
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool :type normalize_(X|Y): bool
""" """

View file

@ -10,31 +10,11 @@ import numpy as np
import GPy import GPy
default_seed = 10000 default_seed = 10000
def crescent_data(seed=default_seed, kernel=None): # FIXME
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation.
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1] = 0
m = GPy.models.GPClassification(data['X'], Y)
#m.update_likelihood_approximation()
#m.optimize()
m.pseudo_EM()
print(m)
m.plot()
return m
def oil(num_inducing=50, max_iters=100, kernel=None): def oil(num_inducing=50, max_iters=100, kernel=None):
""" """
Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
""" """
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
X = data['X'] X = data['X']
@ -64,8 +44,10 @@ def oil(num_inducing=50, max_iters=100, kernel=None):
def toy_linear_1d_classification(seed=default_seed): def toy_linear_1d_classification(seed=default_seed):
""" """
Simple 1D classification example Simple 1D classification example
:param seed : seed value for data generation (default is 4).
:param seed: seed value for data generation (default is 4).
:type seed: int :type seed: int
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
@ -92,8 +74,10 @@ def toy_linear_1d_classification(seed=default_seed):
def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed): def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
""" """
Sparse 1D classification example Sparse 1D classification example
:param seed : seed value for data generation (default is 4).
:param seed: seed value for data generation (default is 4).
:type seed: int :type seed: int
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
@ -118,61 +102,13 @@ def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
return m return m
def sparse_crescent_data(num_inducing=10, seed=default_seed, kernel=None):
"""
Run a Gaussian process classification with DTC approxiamtion on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation.
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1]=0
m = GPy.models.SparseGPClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
m['.*len'] = 10.
#m.update_likelihood_approximation()
#m.optimize()
m.pseudo_EM()
print(m)
m.plot()
return m
def FITC_crescent_data(num_inducing=10, seed=default_seed):
"""
Run a Gaussian process classification with FITC approximation on the crescent data. The demonstration uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation.
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type num_inducing: int
"""
data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1]=0
m = GPy.models.FITCClassification(data['X'], Y,num_inducing=num_inducing)
m.constrain_bounded('.*len',1.,1e3)
m['.*len'] = 3.
#m.update_likelihood_approximation()
#m.optimize()
m.pseudo_EM()
print(m)
m.plot()
return m
def toy_heaviside(seed=default_seed): def toy_heaviside(seed=default_seed):
""" """
Simple 1D classification example using a heavy side gp transformation Simple 1D classification example using a heavy side gp transformation
:param seed : seed value for data generation (default is 4).
:param seed: seed value for data generation (default is 4).
:type seed: int :type seed: int
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
@ -198,3 +134,35 @@ def toy_heaviside(seed=default_seed):
return m return m
def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None):
"""
Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param inducing: number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
:param seed: seed value for data generation.
:type seed: int
:param kernel: kernel to use in the model
:type kernel: a GPy kernel
"""
data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1] = 0
if model_type == 'Full':
m = GPy.models.GPClassification(data['X'], Y,kernel=kernel)
elif model_type == 'DTC':
m = GPy.models.SparseGPClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
m['.*len'] = 10.
elif model_type == 'FITC':
m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
m['.*len'] = 3.
m.pseudo_EM()
print(m)
m.plot()
return m

View file

@ -233,7 +233,7 @@ class CGD(Async_Optimize):
""" """
opt_async(self, f, df, x0, callback, update_rule=FletcherReeves, opt_async(self, f, df, x0, callback, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6, messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs) report_every=10, \*args, \*\*kwargs)
callback gets called every `report_every` iterations callback gets called every `report_every` iterations
@ -244,16 +244,14 @@ class CGD(Async_Optimize):
f, and df will be called with f, and df will be called with
f(xi, *args, **kwargs) f(xi, \*args, \*\*kwargs)
df(xi, *args, **kwargs) df(xi, \*args, \*\*kwargs)
**returns** **Returns:**
-----------
Started `Process` object, optimizing asynchronously Started `Process` object, optimizing asynchronously
**calls** **Calls:**
---------
callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message) callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
@ -265,7 +263,7 @@ class CGD(Async_Optimize):
""" """
opt(self, f, df, x0, callback=None, update_rule=FletcherReeves, opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6, messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs) report_every=10, \*args, \*\*kwargs)
Minimize f, calling callback every `report_every` iterations with following syntax: Minimize f, calling callback every `report_every` iterations with following syntax:
@ -276,11 +274,10 @@ class CGD(Async_Optimize):
f, and df will be called with f, and df will be called with
f(xi, *args, **kwargs) f(xi, \*args, \*\*kwargs)
df(xi, *args, **kwargs) df(xi, \*args, \*\*kwargs)
**returns** **returns**
---------
x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message

View file

@ -10,11 +10,10 @@ class opt_SGD(Optimizer):
""" """
Optimize using stochastic gradient descent. Optimize using stochastic gradient descent.
*** Parameters *** :param Model: reference to the Model object
Model: reference to the Model object :param iterations: number of iterations
iterations: number of iterations :param learning_rate: learning rate
learning_rate: learning rate :param momentum: momentum
momentum: momentum
""" """

View file

@ -17,6 +17,7 @@ def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False):
:type lengthscale: float :type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension) :param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.rbf_inv.RBFInv(input_dim,variance,inv_lengthscale,ARD) part = parts.rbf_inv.RBFInv(input_dim,variance,inv_lengthscale,ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -33,6 +34,7 @@ def rbf(input_dim,variance=1., lengthscale=None,ARD=False):
:type lengthscale: float :type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension) :param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.rbf.RBF(input_dim,variance,lengthscale,ARD) part = parts.rbf.RBF(input_dim,variance,lengthscale,ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -41,11 +43,13 @@ def linear(input_dim,variances=None,ARD=False):
""" """
Construct a linear kernel. Construct a linear kernel.
Arguments :param input_dim: dimensionality of the kernel, obligatory
--------- :type input_dim: int
input_dimD (int), obligatory :param variances:
variances (np.ndarray) :type variances: np.ndarray
ARD (boolean) :param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
""" """
part = parts.linear.Linear(input_dim,variances,ARD) part = parts.linear.Linear(input_dim,variances,ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -64,37 +68,40 @@ def mlp(input_dim,variance=1., weight_variance=None,bias_variance=100.,ARD=False
:type bias_variance: float :type bias_variance: float
:param ARD: Auto Relevance Determination (allows for ARD version of covariance) :param ARD: Auto Relevance Determination (allows for ARD version of covariance)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD) part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
def gibbs(input_dim,variance=1., mapping=None): def gibbs(input_dim,variance=1., mapping=None):
""" """
Gibbs and MacKay non-stationary covariance function. Gibbs and MacKay non-stationary covariance function.
.. math:: .. math::
r = sqrt((x_i - x_j)'*(x_i - x_j)) r = \\sqrt{((x_i - x_j)'*(x_i - x_j))}
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x'))) k(x_i, x_j) = \\sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')} Z = \\sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
Where :math:`l(x)` is a function giving the length scale as a function of space.
where :math:`l(x)` is a function giving the length scale as a function of space.
This is the non stationary kernel proposed by Mark Gibbs in his 1997 This is the non stationary kernel proposed by Mark Gibbs in his 1997
thesis. It is similar to an RBF but has a length scale that varies thesis. It is similar to an RBF but has a length scale that varies
with input location. This leads to an additional term in front of with input location. This leads to an additional term in front of
the kernel. the kernel.
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used. The parameters are :math:`\\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
:param variance: the variance :math:`\sigma^2` :param variance: the variance :math:`\\sigma^2`
:type variance: float :type variance: float
:param mapping: the mapping that gives the lengthscale across the input space. :param mapping: the mapping that gives the lengthscale across the input space.
:type mapping: GPy.core.Mapping :type mapping: GPy.core.Mapping
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension. :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean :type ARD: Boolean
:rtype: Kernpart object :rtype: Kernpart object
@ -124,6 +131,7 @@ def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2,
:type degree: int :type degree: int
:param ARD: Auto Relevance Determination (allows for ARD version of covariance) :param ARD: Auto Relevance Determination (allows for ARD version of covariance)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.poly.POLY(input_dim,variance,weight_variance,bias_variance,degree,ARD) part = parts.poly.POLY(input_dim,variance,weight_variance,bias_variance,degree,ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -132,10 +140,11 @@ def white(input_dim,variance=1.):
""" """
Construct a white kernel. Construct a white kernel.
Arguments :param input_dim: dimensionality of the kernel, obligatory
--------- :type input_dim: int
input_dimD (int), obligatory :param variance: the variance of the kernel
variance (float) :type variance: float
""" """
part = parts.white.White(input_dim,variance) part = parts.white.White(input_dim,variance)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -180,6 +189,7 @@ def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
:type lengthscale: float :type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension) :param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.exponential.Exponential(input_dim,variance, lengthscale, ARD) part = parts.exponential.Exponential(input_dim,variance, lengthscale, ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -196,6 +206,7 @@ def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
:type lengthscale: float :type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension) :param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.Matern32.Matern32(input_dim,variance, lengthscale, ARD) part = parts.Matern32.Matern32(input_dim,variance, lengthscale, ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -212,6 +223,7 @@ def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
:type lengthscale: float :type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension) :param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.Matern52.Matern52(input_dim, variance, lengthscale, ARD) part = parts.Matern52.Matern52(input_dim, variance, lengthscale, ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -220,10 +232,11 @@ def bias(input_dim, variance=1.):
""" """
Construct a bias kernel. Construct a bias kernel.
Arguments :param input_dim: dimensionality of the kernel, obligatory
--------- :type input_dim: int
input_dim (int), obligatory :param variance: the variance of the kernel
variance (float) :type variance: float
""" """
part = parts.bias.Bias(input_dim, variance) part = parts.bias.Bias(input_dim, variance)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -231,10 +244,15 @@ def bias(input_dim, variance=1.):
def finite_dimensional(input_dim, F, G, variances=1., weights=None): def finite_dimensional(input_dim, F, G, variances=1., weights=None):
""" """
Construct a finite dimensional kernel. Construct a finite dimensional kernel.
input_dim: int - the number of input dimensions
F: np.array of functions with shape (n,) - the n basis functions :param input_dim: the number of input dimensions
G: np.array with shape (n,n) - the Gram matrix associated to F :type input_dim: int
variances : np.ndarray with shape (n,) :param F: np.array of functions with shape (n,) - the n basis functions
:type F: np.array
:param G: np.array with shape (n,n) - the Gram matrix associated to F
:type G: np.array
:param variances: np.ndarray with shape (n,)
:type: np.ndarray
""" """
part = parts.finite_dimensional.FiniteDimensional(input_dim, F, G, variances, weights) part = parts.finite_dimensional.FiniteDimensional(input_dim, F, G, variances, weights)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -247,6 +265,7 @@ def spline(input_dim, variance=1.):
:type input_dim: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
""" """
part = parts.spline.Spline(input_dim, variance) part = parts.spline.Spline(input_dim, variance)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -259,6 +278,7 @@ def Brownian(input_dim, variance=1.):
:type input_dim: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
""" """
part = parts.Brownian.Brownian(input_dim, variance) part = parts.Brownian.Brownian(input_dim, variance)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -312,6 +332,7 @@ def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 *
:type period: float :type period: float
:param n_freq: the number of frequencies considered for the periodic subspace :param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int :type n_freq: int
""" """
part = parts.periodic_exponential.PeriodicExponential(input_dim, variance, lengthscale, period, n_freq, lower, upper) part = parts.periodic_exponential.PeriodicExponential(input_dim, variance, lengthscale, period, n_freq, lower, upper)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -330,6 +351,7 @@ def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi
:type period: float :type period: float
:param n_freq: the number of frequencies considered for the periodic subspace :param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int :type n_freq: int
""" """
part = parts.periodic_Matern32.PeriodicMatern32(input_dim, variance, lengthscale, period, n_freq, lower, upper) part = parts.periodic_Matern32.PeriodicMatern32(input_dim, variance, lengthscale, period, n_freq, lower, upper)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -348,6 +370,7 @@ def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi
:type period: float :type period: float
:param n_freq: the number of frequencies considered for the periodic subspace :param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int :type n_freq: int
""" """
part = parts.periodic_Matern52.PeriodicMatern52(input_dim, variance, lengthscale, period, n_freq, lower, upper) part = parts.periodic_Matern52.PeriodicMatern52(input_dim, variance, lengthscale, period, n_freq, lower, upper)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -361,6 +384,7 @@ def prod(k1,k2,tensor=False):
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces :param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean :type tensor: Boolean
:rtype: kernel object :rtype: kernel object
""" """
part = parts.prod.Prod(k1, k2, tensor) part = parts.prod.Prod(k1, k2, tensor)
return kern(part.input_dim, [part]) return kern(part.input_dim, [part])
@ -376,10 +400,12 @@ def symmetric(k):
def coregionalize(output_dim,rank=1, W=None, kappa=None): def coregionalize(output_dim,rank=1, W=None, kappa=None):
""" """
Coregionlization matrix B, of the form: Coregionlization matrix B, of the form:
.. math:: .. math::
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I} \mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
An intrinsic/linear coregionalization kernel of the form An intrinsic/linear coregionalization kernel of the form:
.. math:: .. math::
k_2(x, y)=\mathbf{B} k(x, y) k_2(x, y)=\mathbf{B} k(x, y)
@ -449,7 +475,7 @@ def independent_outputs(k):
def hierarchical(k): def hierarchical(k):
""" """
TODO THis can't be right! Construct a kernel with independent outputs from an existing kernel TODO This can't be right! Construct a kernel with independent outputs from an existing kernel
""" """
# for sl in k.input_slices: # for sl in k.input_slices:
# assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)" # assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
@ -467,7 +493,8 @@ def build_lcm(input_dim, output_dim, kernel_list = [], rank=1,W=None,kappa=None)
:param rank: number tuples of the corregionalization parameters 'coregion_W' :param rank: number tuples of the corregionalization parameters 'coregion_W'
:type rank: integer :type rank: integer
..Note the kernels dimensionality is overwritten to fit input_dim ..note the kernels dimensionality is overwritten to fit input_dim
""" """
for k in kernel_list: for k in kernel_list:

View file

@ -78,13 +78,15 @@ class kern(Parameterized):
def plot_ARD(self, fignum=None, ax=None, title='', legend=False): def plot_ARD(self, fignum=None, ax=None, title='', legend=False):
"""If an ARD kernel is present, it bar-plots the ARD parameters, """If an ARD kernel is present, it bar-plots the ARD parameters.
:param fignum: figure number of the plot :param fignum: figure number of the plot
:param ax: matplotlib axis to plot on :param ax: matplotlib axis to plot on
:param title: :param title:
title of the plot, title of the plot,
pass '' to not print a title pass '' to not print a title
pass None for a generic title pass None for a generic title
""" """
if ax is None: if ax is None:
fig = pb.figure(fignum) fig = pb.figure(fignum)
@ -175,8 +177,10 @@ class kern(Parameterized):
def add(self, other, tensor=False): def add(self, other, tensor=False):
""" """
Add another kernel to this one. Both kernels are defined on the same _space_ Add another kernel to this one. Both kernels are defined on the same _space_
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern
""" """
if tensor: if tensor:
D = self.input_dim + other.input_dim D = self.input_dim + other.input_dim
@ -218,11 +222,13 @@ class kern(Parameterized):
def prod(self, other, tensor=False): def prod(self, other, tensor=False):
""" """
multiply two kernels (either on the same space, or on the tensor product of the input space). Multiply two kernels (either on the same space, or on the tensor product of the input space).
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false). :param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool :type tensor: bool
""" """
K1 = self.copy() K1 = self.copy()
K2 = other.copy() K2 = other.copy()
@ -321,6 +327,7 @@ class kern(Parameterized):
:type X: np.ndarray (num_samples x input_dim) :type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X) :param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim) :type X2: np.ndarray (num_inducing x input_dim)
""" """
assert X.shape[1] == self.input_dim assert X.shape[1] == self.input_dim
target = np.zeros(self.num_params) target = np.zeros(self.num_params)
@ -340,6 +347,7 @@ class kern(Parameterized):
:type X: np.ndarray (num_samples x input_dim) :type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X) :param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)""" :type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros_like(X) target = np.zeros_like(X)
if X2 is None: if X2 is None:
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] [p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
@ -413,6 +421,7 @@ class kern(Parameterized):
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim) :param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
""" """
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
@ -568,7 +577,7 @@ class Kern_check_model(Model):
def is_positive_definite(self): def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0] v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<0): if any(v<-1e-6):
return False return False
else: else:
return True return True
@ -657,6 +666,7 @@ def kern_test(kern, X=None, X2=None, verbose=False):
:type X: ndarray :type X: ndarray
:param X2: X2 input values to test the covariance function. :param X2: X2 input values to test the covariance function.
:type X2: ndarray :type X2: ndarray
""" """
pass_checks = True pass_checks = True
if X==None: if X==None:
@ -708,7 +718,12 @@ def kern_test(kern, X=None, X2=None, verbose=False):
if verbose: if verbose:
print("Checking gradients of K(X, X) wrt X.") print("Checking gradients of K(X, X) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("dK_dX not implemented for " + kern.name)
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:
@ -719,7 +734,12 @@ def kern_test(kern, X=None, X2=None, verbose=False):
if verbose: if verbose:
print("Checking gradients of K(X, X2) wrt X.") print("Checking gradients of K(X, X2) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("dK_dX not implemented for " + kern.name)
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:
@ -730,7 +750,12 @@ def kern_test(kern, X=None, X2=None, verbose=False):
if verbose: if verbose:
print("Checking gradients of Kdiag(X) wrt X.") print("Checking gradients of Kdiag(X) wrt X.")
try:
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose) result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("dK_dX not implemented for " + kern.name)
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:

View file

@ -11,12 +11,13 @@ class Coregionalize(Kernpart):
""" """
Covariance function for intrinsic/linear coregionalization models Covariance function for intrinsic/linear coregionalization models
This covariance has the form This covariance has the form:
.. math:: .. math::
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + \text{diag}(kappa) \mathbf{B} = \mathbf{W}\mathbf{W}^\top + \text{diag}(kappa)
An intrinsic/linear coregionalization covariance function of the form An intrinsic/linear coregionalization covariance function of the form:
.. math:: .. math::
k_2(x, y)=\mathbf{B} k(x, y) k_2(x, y)=\mathbf{B} k(x, y)
it is obtained as the tensor product between a covariance function it is obtained as the tensor product between a covariance function
@ -31,7 +32,7 @@ class Coregionalize(Kernpart):
:param kappa: a vector which allows the outputs to behave independently :param kappa: a vector which allows the outputs to behave independently
:type kappa: numpy array of dimensionality (output_dim,) :type kappa: numpy array of dimensionality (output_dim,)
.. Note: see coregionalization examples in GPy.examples.regression for some usage. .. note: see coregionalization examples in GPy.examples.regression for some usage.
""" """
def __init__(self, output_dim, rank=1, W=None, kappa=None): def __init__(self, output_dim, rank=1, W=None, kappa=None):
self.input_dim = 1 self.input_dim = 1

View file

@ -10,9 +10,12 @@ import GPy
class Hetero(Kernpart): class Hetero(Kernpart):
""" """
TODO: Need to constrain the function outputs positive (still thinking of best way of doing this!!! Yes, intend to use transformations, but what's the *best* way). Currently just squaring output. TODO: Need to constrain the function outputs
positive (still thinking of best way of doing this!!! Yes, intend to use
transformations, but what's the *best* way). Currently just squaring output.
Heteroschedastic noise which depends on input location. See, for example, this paper by Goldberg et al. Heteroschedastic noise which depends on input location. See, for example,
this paper by Goldberg et al.
.. math:: .. math::
@ -66,7 +69,7 @@ class Hetero(Kernpart):
def K(self, X, X2, target): def K(self, X, X2, target):
"""Return covariance between X and X2.""" """Return covariance between X and X2."""
if X2==None or X2 is X: if (X2 is None) or (X2 is X):
target[np.diag_indices_from(target)] += self._Kdiag(X) target[np.diag_indices_from(target)] += self._Kdiag(X)
def Kdiag(self, X, target): def Kdiag(self, X, target):
@ -79,13 +82,13 @@ class Hetero(Kernpart):
def dK_dtheta(self, dL_dK, X, X2, target): def dK_dtheta(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters.""" """Derivative of the covariance with respect to the parameters."""
if X2==None or X2 is X: if (X2 is None) or (X2 is X):
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1] dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
self.dKdiag_dtheta(dL_dKdiag, X, target) self.dKdiag_dtheta(dL_dKdiag, X, target)
def dKdiag_dtheta(self, dL_dKdiag, X, target): def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to parameters.""" """Gradient of diagonal of covariance with respect to parameters."""
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None], X)*self.mapping.f(X) target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
def dK_dX(self, dL_dK, X, X2, target): def dK_dX(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X.""" """Derivative of the covariance matrix with respect to X."""

View file

@ -58,6 +58,8 @@ class Kernpart(object):
raise NotImplementedError raise NotImplementedError
def dK_dX(self, dL_dK, X, X2, target): def dK_dX(self, dL_dK, X, X2, target):
raise NotImplementedError raise NotImplementedError
def dKdiag_dX(self, dL_dK, X, target):
raise NotImplementedError
@ -97,6 +99,9 @@ class Kernpart_stationary(Kernpart):
# wrt lengthscale is 0. # wrt lengthscale is 0.
target[0] += np.sum(dL_dKdiag) target[0] += np.sum(dL_dKdiag)
def dKdiag_dX(self, dL_dK, X, target):
pass # true for all stationary kernels
class Kernpart_inner(Kernpart): class Kernpart_inner(Kernpart):
def __init__(self,input_dim): def __init__(self,input_dim):

View file

@ -7,11 +7,13 @@ four_over_tau = 2./np.pi
class MLP(Kernpart): class MLP(Kernpart):
""" """
multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
Multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
.. math:: .. math::
k(x,y) = \sigma^2 \frac{2}{\pi} \text{asin} \left(\frac{\sigma_w^2 x^\top y+\sigma_b^2}{\sqrt{\sigma_w^2x^\top x + \sigma_b^2 + 1}\sqrt{\sigma_w^2 y^\top y \sigma_b^2 +1}} \right) k(x,y) = \\sigma^{2}\\frac{2}{\\pi } \\text{asin} \\left ( \\frac{ \\sigma_w^2 x^\\top y+\\sigma_b^2}{\\sqrt{\\sigma_w^2x^\\top x + \\sigma_b^2 + 1}\\sqrt{\\sigma_w^2 y^\\top y \\sigma_b^2 +1}} \\right )
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
@ -24,6 +26,7 @@ class MLP(Kernpart):
:type ARD: Boolean :type ARD: Boolean
:rtype: Kernpart object :rtype: Kernpart object
""" """
def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=100., ARD=False): def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=100., ARD=False):

View file

@ -7,22 +7,22 @@ four_over_tau = 2./np.pi
class POLY(Kernpart): class POLY(Kernpart):
""" """
polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel,
Polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel:
.. math:: .. math::
k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
k(x, y) = \sigma^2*(\sigma_w^2 x'y+\sigma_b^b)^d The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
(weight_variance), :math:`\sigma^2_b` (bias_variance) and d
The kernel parameters are \sigma^2 (variance), \sigma^2_w
(weight_variance), \sigma^2_b (bias_variance) and d
(degree). Only gradients of the first three are provided for (degree). Only gradients of the first three are provided for
kernel optimisation, it is assumed that polynomial degree would kernel optimisation, it is assumed that polynomial degree would
be set by hand. be set by hand.
The kernel is not recommended as it is badly behaved when the The kernel is not recommended as it is badly behaved when the
\sigma^2_w*x'*y + \sigma^2_b has a magnitude greater than one. For completeness :math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
there will be an automatic relevance determination version of this there is an automatic relevance determination version of this
kernel provided (NOT YET IMPLEMENTED!). kernel provided (NOTE YET IMPLEMENTED!).
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
:param variance: the variance :math:`\sigma^2` :param variance: the variance :math:`\sigma^2`
@ -32,7 +32,7 @@ class POLY(Kernpart):
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b` :param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
:param degree: the degree of the polynomial. :param degree: the degree of the polynomial.
:type degree: int :type degree: int
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension. :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean :type ARD: Boolean
:rtype: Kernpart object :rtype: Kernpart object

View file

@ -4,18 +4,17 @@ from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
from likelihood import likelihood from likelihood import likelihood
class EP(likelihood): class EP(likelihood):
def __init__(self,data,noise_model,epsilon=1e-3,power_ep=[1.,1.]): def __init__(self,data,noise_model):
""" """
Expectation Propagation Expectation Propagation
Arguments :param data: data to model
--------- :type data: numpy array
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) :param noise_model: noise distribution
noise_model : a likelihood function (see likelihood_functions.py) :type noise_model: A GPy noise model
""" """
self.noise_model = noise_model self.noise_model = noise_model
self.epsilon = epsilon
self.eta, self.delta = power_ep
self.data = data self.data = data
self.N, self.output_dim = self.data.shape self.N, self.output_dim = self.data.shape
self.is_heteroscedastic = True self.is_heteroscedastic = True
@ -87,11 +86,20 @@ class EP(likelihood):
self.VVT_factor = self.V self.VVT_factor = self.V
self.trYYT = np.trace(self.YYT) self.trYYT = np.trace(self.YYT)
def fit_full(self,K): def fit_full(self, K, epsilon=1e-3,power_ep=[1.,1.]):
""" """
The expectation-propagation algorithm. The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006. For nomenclature see Rasmussen & Williams 2006.
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float
:param power_ep: Power EP parameters
:type power_ep: list of floats
""" """
self.epsilon = epsilon
self.eta, self.delta = power_ep
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.N) mu = np.zeros(self.N)
Sigma = K.copy() Sigma = K.copy()
@ -149,11 +157,20 @@ class EP(likelihood):
return self._compute_GP_variables() return self._compute_GP_variables()
def fit_DTC(self, Kmm, Kmn): def fit_DTC(self, Kmm, Kmn, epsilon=1e-3,power_ep=[1.,1.]):
""" """
The expectation-propagation algorithm with sparse pseudo-input. The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013. For nomenclature see ... 2013.
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float
:param power_ep: Power EP parameters
:type power_ep: list of floats
""" """
self.epsilon = epsilon
self.eta, self.delta = power_ep
num_inducing = Kmm.shape[0] num_inducing = Kmm.shape[0]
#TODO: this doesn't work with uncertain inputs! #TODO: this doesn't work with uncertain inputs!
@ -245,11 +262,19 @@ class EP(likelihood):
self._compute_GP_variables() self._compute_GP_variables()
def fit_FITC(self, Kmm, Kmn, Knn_diag): def fit_FITC(self, Kmm, Kmn, Knn_diag, epsilon=1e-3,power_ep=[1.,1.]):
""" """
The expectation-propagation algorithm with sparse pseudo-input. The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008. For nomenclature see Naish-Guzman and Holden, 2008.
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float
:param power_ep: Power EP parameters
:type power_ep: list of floats
""" """
self.epsilon = epsilon
self.eta, self.delta = power_ep
num_inducing = Kmm.shape[0] num_inducing = Kmm.shape[0]
""" """

View file

@ -10,14 +10,16 @@ class likelihood(Parameterized):
(Gaussian) inherits directly from this, as does the EP algorithm (Gaussian) inherits directly from this, as does the EP algorithm
Some things must be defined for this to work properly: Some things must be defined for this to work properly:
self.Y : the effective Gaussian target of the GP
self.N, self.D : Y.shape - self.Y : the effective Gaussian target of the GP
self.covariance_matrix : the effective (noise) covariance of the GP targets - self.N, self.D : Y.shape
self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP) - self.covariance_matrix : the effective (noise) covariance of the GP targets
self.is_heteroscedastic : enables significant computational savings in GP - self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
self.precision : a scalar or vector representation of the effective target precision - self.is_heteroscedastic : enables significant computational savings in GP
self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N - self.precision : a scalar or vector representation of the effective target precision
self.V : self.precision * self.Y - self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
- self.V : self.precision * self.Y
""" """
def __init__(self): def __init__(self):
Parameterized.__init__(self) Parameterized.__init__(self)

View file

@ -1,6 +1,7 @@
# Copyright (c) 2012, 2013 Ricardo Andrade # Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from scipy import stats,special from scipy import stats,special
import scipy as sp import scipy as sp
@ -116,18 +117,3 @@ class Binomial(NoiseDistribution):
def _d2variance_dgp2(self,gp): def _d2variance_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2 return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2
"""
def predictive_values(self,mu,var): #TODO remove
mu = mu.flatten()
var = var.flatten()
#mean = stats.norm.cdf(mu/np.sqrt(1+var))
mean = self._predictive_mean_analytical(mu,np.sqrt(var))
norm_025 = [stats.norm.ppf(.025,m,v) for m,v in zip(mu,var)]
norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)]
#p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var))
#p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var))
p_025 = self._predictive_mean_analytical(norm_025,np.sqrt(var))
p_975 = self._predictive_mean_analytical(norm_975,np.sqrt(var))
return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var
"""

View file

@ -11,7 +11,7 @@ from noise_distributions import NoiseDistribution
class Exponential(NoiseDistribution): class Exponential(NoiseDistribution):
""" """
Gamma likelihood Expoential likelihood
Y is expected to take values in {0,1,2,...} Y is expected to take values in {0,1,2,...}
----- -----
$$ $$

View file

@ -57,12 +57,12 @@ class Gaussian(NoiseDistribution):
new_sigma2 = self.predictive_variance(mu,sigma) new_sigma2 = self.predictive_variance(mu,sigma)
return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance) return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance)
def _predictive_variance_analytical(self,mu,sigma,*args): #TODO *args? def _predictive_variance_analytical(self,mu,sigma):
return 1./(1./self.variance + 1./sigma**2) return 1./(1./self.variance + 1./sigma**2)
def _mass(self,gp,obs): def _mass(self,gp,obs):
#return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) ) #return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) )
return stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance)) #FIXME return stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance))
def _nlog_mass(self,gp,obs): def _nlog_mass(self,gp,obs):
return .5*((self.gp_link.transf(gp)-obs)**2/self.variance + np.log(2.*np.pi*self.variance)) return .5*((self.gp_link.transf(gp)-obs)**2/self.variance + np.log(2.*np.pi*self.variance))

View file

@ -13,20 +13,38 @@ class GPTransformation(object):
Link function class for doing non-Gaussian likelihoods approximation Link function class for doing non-Gaussian likelihoods approximation
:param Y: observed output (Nx1 numpy.darray) :param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
.. note:: Y values allowed depend on the likelihood_function used
""" """
def __init__(self): def __init__(self):
pass pass
def transf(self,f):
"""
Gaussian process tranformation function, latent space -> output space
"""
pass
def dtransf_df(self,f):
"""
derivative of transf(f) w.r.t. f
"""
pass
def d2transf_df2(self,f):
"""
second derivative of transf(f) w.r.t. f
"""
pass
class Identity(GPTransformation): class Identity(GPTransformation):
""" """
$$ .. math::
g(f) = f
$$
"""
#def transf(self,mu):
# return mu
g(f) = f
"""
def transf(self,f): def transf(self,f):
return f return f
@ -39,13 +57,11 @@ class Identity(GPTransformation):
class Probit(GPTransformation): class Probit(GPTransformation):
""" """
$$ .. math::
g(f) = \\Phi^{-1} (mu)
$$
"""
#def transf(self,mu):
# return inv_std_norm_cdf(mu)
g(f) = \\Phi^{-1} (mu)
"""
def transf(self,f): def transf(self,f):
return std_norm_cdf(f) return std_norm_cdf(f)
@ -57,13 +73,11 @@ class Probit(GPTransformation):
class Log(GPTransformation): class Log(GPTransformation):
""" """
$$ .. math::
g(f) = \log(\mu)
$$
"""
#def transf(self,mu):
# return np.log(mu)
g(f) = \\log(\\mu)
"""
def transf(self,f): def transf(self,f):
return np.exp(f) return np.exp(f)
@ -75,20 +89,12 @@ class Log(GPTransformation):
class Log_ex_1(GPTransformation): class Log_ex_1(GPTransformation):
""" """
$$ .. math::
g(f) = \log(\exp(\mu) - 1)
$$
"""
#def transf(self,mu):
# """
# function: output space -> latent space
# """
# return np.log(np.exp(mu) - 1)
g(f) = \\log(\\exp(\\mu) - 1)
"""
def transf(self,f): def transf(self,f):
"""
function: latent space -> output space
"""
return np.log(1.+np.exp(f)) return np.log(1.+np.exp(f))
def dtransf_df(self,f): def dtransf_df(self,f):
@ -110,9 +116,11 @@ class Reciprocal(GPTransformation):
class Heaviside(GPTransformation): class Heaviside(GPTransformation):
""" """
$$
g(f) = I_{x \in A} .. math::
$$
g(f) = I_{x \\in A}
""" """
def transf(self,f): def transf(self,f):
#transformation goes here #transformation goes here

View file

@ -16,10 +16,11 @@ class NoiseDistribution(object):
Likelihood class for doing Expectation propagation Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray) :param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the LikelihoodFunction used
.. note:: Y values allowed depend on the LikelihoodFunction used
""" """
def __init__(self,gp_link,analytical_mean=False,analytical_variance=False): def __init__(self,gp_link,analytical_mean=False,analytical_variance=False):
#assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."#FIXME assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."
self.gp_link = gp_link self.gp_link = gp_link
self.analytical_mean = analytical_mean self.analytical_mean = analytical_mean
self.analytical_variance = analytical_variance self.analytical_variance = analytical_variance
@ -50,7 +51,9 @@ class NoiseDistribution(object):
""" """
In case it is needed, this function assess the output values or makes any pertinent transformation on them. In case it is needed, this function assess the output values or makes any pertinent transformation on them.
:param Y: observed output (Nx1 numpy.darray) :param Y: observed output
:type Y: Nx1 numpy.darray
""" """
return Y return Y
@ -62,18 +65,21 @@ class NoiseDistribution(object):
:param obs: observed output :param obs: observed output
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
""" """
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs) return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs)
def _nlog_product_scaled(self,gp,obs,mu,sigma): def _nlog_product_scaled(self,gp,obs,mu,sigma):
""" """
Negative log-product between the cavity distribution and a likelihood factor. Negative log-product between the cavity distribution and a likelihood factor.
..Note:: The constant term in the Gaussian distribution is ignored.
.. note:: The constant term in the Gaussian distribution is ignored.
:param gp: latent variable :param gp: latent variable
:param obs: observed output :param obs: observed output
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
""" """
return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs) return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs)
@ -85,6 +91,7 @@ class NoiseDistribution(object):
:param obs: observed output :param obs: observed output
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
""" """
return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs) return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs)
@ -96,6 +103,7 @@ class NoiseDistribution(object):
:param obs: observed output :param obs: observed output
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
""" """
return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs) return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs)
@ -106,6 +114,7 @@ class NoiseDistribution(object):
:param obs: observed output :param obs: observed output
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
""" """
return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma),disp=False) return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma),disp=False)
@ -122,6 +131,7 @@ class NoiseDistribution(object):
:param obs: observed output :param obs: observed output
:param tau: cavity distribution 1st natural parameter (precision) :param tau: cavity distribution 1st natural parameter (precision)
:param v: cavity distribution 2nd natural paramenter (mu*precision) :param v: cavity distribution 2nd natural paramenter (mu*precision)
""" """
mu = v/tau mu = v/tau
mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau)) mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau))
@ -137,7 +147,8 @@ class NoiseDistribution(object):
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
..Note:: This function helps computing E(Y_star) = E(E(Y_star|f_star)) .. note:: This function helps computing E(Y_star) = E(E(Y_star|f_star))
""" """
return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp)) return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp))
@ -148,6 +159,7 @@ class NoiseDistribution(object):
:param gp: latent variable :param gp: latent variable
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
""" """
return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp) return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp)
@ -158,6 +170,7 @@ class NoiseDistribution(object):
:param gp: latent variable :param gp: latent variable
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
""" """
return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2 return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2
@ -169,7 +182,8 @@ class NoiseDistribution(object):
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
..Note:: This function helps computing E(V(Y_star|f_star)) .. note:: This function helps computing E(V(Y_star|f_star))
""" """
return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp)) return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp))
@ -180,6 +194,7 @@ class NoiseDistribution(object):
:param gp: latent variable :param gp: latent variable
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
""" """
return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp) return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp)
@ -190,6 +205,7 @@ class NoiseDistribution(object):
:param gp: latent variable :param gp: latent variable
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
""" """
return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2 return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2
@ -201,7 +217,8 @@ class NoiseDistribution(object):
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
..Note:: This function helps computing E( E(Y_star|f_star)**2 ) .. note:: This function helps computing E( E(Y_star|f_star)**2 )
""" """
return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp)) return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp))
@ -212,6 +229,7 @@ class NoiseDistribution(object):
:param gp: latent variable :param gp: latent variable
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
""" """
return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp) return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp)
@ -222,6 +240,7 @@ class NoiseDistribution(object):
:param gp: latent variable :param gp: latent variable
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
""" """
return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 ) return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 )
@ -243,6 +262,7 @@ class NoiseDistribution(object):
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
""" """
maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma),disp=False) maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma),disp=False)
mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma) mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma)
@ -266,6 +286,7 @@ class NoiseDistribution(object):
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
""" """
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma),disp=False) maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma),disp=False)
mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma) mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma)
@ -278,6 +299,7 @@ class NoiseDistribution(object):
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called. :predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
""" """
# E( V(Y_star|f_star) ) # E( V(Y_star|f_star) )
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma),disp=False) maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma),disp=False)
@ -310,6 +332,7 @@ class NoiseDistribution(object):
:param mu: cavity distribution mean :param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation :param sigma: cavity distribution standard deviation
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called. :predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
""" """
qf = stats.norm.ppf(p,mu,sigma) qf = stats.norm.ppf(p,mu,sigma)
return self.gp_link.transf(qf) return self.gp_link.transf(qf)
@ -321,6 +344,7 @@ class NoiseDistribution(object):
:param x: tuple (latent variable,output) :param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean :param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation :param sigma: latent variable's predictive standard deviation
""" """
return self._nlog_product_scaled(x[0],x[1],mu,sigma) return self._nlog_product_scaled(x[0],x[1],mu,sigma)
@ -331,7 +355,9 @@ class NoiseDistribution(object):
:param x: tuple (latent variable,output) :param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean :param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation :param sigma: latent variable's predictive standard deviation
..Note: Only avilable when the output is continuous
.. note: Only available when the output is continuous
""" """
assert not self.discrete, "Gradient not available for discrete outputs." assert not self.discrete, "Gradient not available for discrete outputs."
return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0]))) return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0])))
@ -343,7 +369,9 @@ class NoiseDistribution(object):
:param x: tuple (latent variable,output) :param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean :param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation :param sigma: latent variable's predictive standard deviation
..Note: Only avilable when the output is continuous
.. note: Only available when the output is continuous
""" """
assert not self.discrete, "Hessian not available for discrete outputs." assert not self.discrete, "Hessian not available for discrete outputs."
cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1]) cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1])
@ -356,14 +384,17 @@ class NoiseDistribution(object):
:param x: tuple (latent variable,output) :param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean :param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation :param sigma: latent variable's predictive standard deviation
""" """
return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma),disp=False) return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma),disp=False)
def predictive_values(self,mu,var): def predictive_values(self,mu,var):
""" """
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
:param mu: mean of the latent variable :param mu: mean of the latent variable
:param var: variance of the latent variable :param var: variance of the latent variable
""" """
if isinstance(mu,float) or isinstance(mu,int): if isinstance(mu,float) or isinstance(mu,int):
mu = [mu] mu = [mu]

View file

@ -12,29 +12,22 @@ from noise_distributions import NoiseDistribution
class Poisson(NoiseDistribution): class Poisson(NoiseDistribution):
""" """
Poisson likelihood Poisson likelihood
Y is expected to take values in {0,1,2,...}
----- .. math::
$$ L(x) = \\exp(\\lambda) * \\frac{\\lambda^Y_i}{Y_i!}
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$ ..Note: Y is expected to take values in {0,1,2,...}
""" """
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
#self.discrete = True
#self.support_limits = (0,np.inf)
#self.analytical_mean = False
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance) super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y): #TODO def _preprocess_values(self,Y): #TODO
#self.scale = .5*Y.max() return Y
#self.shift = Y.mean()
return Y #(Y - self.shift)/self.scale
def _mass(self,gp,obs): def _mass(self,gp,obs):
""" """
Mass (or density) function Mass (or density) function
""" """
#obs = obs*self.scale + self.shift
return stats.poisson.pmf(obs,self.gp_link.transf(gp)) return stats.poisson.pmf(obs,self.gp_link.transf(gp))
def _nlog_mass(self,gp,obs): def _nlog_mass(self,gp,obs):
@ -51,15 +44,6 @@ class Poisson(NoiseDistribution):
transf = self.gp_link.transf(gp) transf = self.gp_link.transf(gp)
return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
def _dnlog_mass_dobs(self,obs,gp): #TODO not needed
return special.psi(obs+1) - np.log(self.gp_link.transf(gp))
def _d2nlog_mass_dobs2(self,obs,gp=None): #TODO not needed
return special.polygamma(1,obs)
def _d2nlog_mass_dcross(self,obs,gp): #TODO not needed
return -self.gp_link.dtransf_df(gp)/self.gp_link.transf(gp)
def _mean(self,gp): def _mean(self,gp):
""" """
Mass (or density) function Mass (or density) function

View file

@ -10,11 +10,13 @@ class MLP(Mapping):
.. math:: .. math::
f(\mathbf{x}*) = \mathbf{W}^0\boldsymbol{\phi}(\mathbf{W}^1\mathbf{x}+\mathb{b}^1)^* + \mathbf{b}^0 f(\\mathbf{x}*) = \\mathbf{W}^0\\boldsymbol{\\phi}(\\mathbf{W}^1\\mathbf{x}+\\mathbf{b}^1)^* + \\mathbf{b}^0
where where
..math::
\phi(\cdot) = \text{tanh}(\cdot) .. math::
\\phi(\\cdot) = \\text{tanh}(\\cdot)
:param X: input observations :param X: input observations
:type X: ndarray :type X: ndarray
@ -22,6 +24,7 @@ class MLP(Mapping):
:type output_dim: int :type output_dim: int
:param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it. :param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it.
:type hidden_dim: int or list of ints. :type hidden_dim: int or list of ints.
""" """
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3): def __init__(self, input_dim=1, output_dim=1, hidden_dim=3):

View file

@ -245,12 +245,13 @@ class BayesianGPLVM(SparseGP, GPLVM):
""" """
Plot latent space X in 1D: Plot latent space X in 1D:
-if fig is given, create input_dim subplots in fig and plot in these - if fig is given, create input_dim subplots in fig and plot in these
-if ax is given plot input_dim 1D latent space plots of X into each `axis` - if ax is given plot input_dim 1D latent space plots of X into each `axis`
-if neither fig nor ax is given create a figure with fignum and plot in there - if neither fig nor ax is given create a figure with fignum and plot in there
colors: colors:
colors of different latent space dimensions input_dim colors of different latent space dimensions input_dim
""" """
import pylab import pylab
if ax is None: if ax is None:

View file

@ -39,6 +39,7 @@ class MRD(Model):
:param num_inducing: number of inducing inputs to use :param num_inducing: number of inducing inputs to use
:param kernels: list of kernels or kernel shared for all BGPLVMS :param kernels: list of kernels or kernel shared for all BGPLVMS
:type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default) :type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default)
""" """
def __init__(self, likelihood_or_Y_list, input_dim, num_inducing=10, names=None, def __init__(self, likelihood_or_Y_list, input_dim, num_inducing=10, names=None,
kernels=None, initx='PCA', kernels=None, initx='PCA',
@ -338,8 +339,11 @@ class MRD(Model):
def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs): def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs):
""" """
:param:`titles` :
titles for axes of datasets TODO: Explain other parameters
:param titles: titles for axes of datasets
""" """
if titles is None: if titles is None:
titles = [r'${}$'.format(name) for name in self.names] titles = [r'${}$'.format(name) for name in self.names]

View file

@ -55,7 +55,18 @@ class BGPLVMTests(unittest.TestCase):
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
#@unittest.skip('psi2 cross terms are NotImplemented for this combination') def test_rbf_line_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_linear_bias_kern(self): def test_linear_bias_kern(self):
N, num_inducing, input_dim, D = 30, 5, 4, 30 N, num_inducing, input_dim, D = 30, 5, 4, 30
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)

View file

@ -238,6 +238,18 @@ class GradientTests(unittest.TestCase):
m.constrain_fixed('.*rbf_var', 1.) m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def multioutput_sparse_regression_1D(self):
X1 = np.random.rand(500, 1) * 8
X2 = np.random.rand(300, 1) * 5
X = np.vstack((X1, X2))
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.rbf(1)
m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad())
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests, please be (very) patient..." print "Running unit tests, please be (very) patient..."

View file

@ -524,11 +524,14 @@ def simulation_BGPLVM():
'info': "Simulated test dataset generated in MATLAB to compare BGPLVM between python and MATLAB"} 'info': "Simulated test dataset generated in MATLAB to compare BGPLVM between python and MATLAB"}
def toy_rbf_1d(seed=default_seed, num_samples=500): def toy_rbf_1d(seed=default_seed, num_samples=500):
"""Samples values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1. """
Samples values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1.
:param seed: seed to use for random sampling. :param seed: seed to use for random sampling.
:type seed: int :type seed: int
:param num_samples: number of samples to sample in the function (default 500). :param num_samples: number of samples to sample in the function (default 500).
:type num_samples: int :type num_samples: int
""" """
np.random.seed(seed=seed) np.random.seed(seed=seed)
num_in = 1 num_in = 1
@ -631,11 +634,15 @@ def olympic_marathon_men(data_set='olympic_marathon_men'):
def crescent_data(num_data=200, seed=default_seed): def crescent_data(num_data=200, seed=default_seed):
"""Data set formed from a mixture of four Gaussians. In each class two of the Gaussians are elongated at right angles to each other and offset to form an approximation to the crescent data that is popular in semi-supervised learning as a toy problem. """
Data set formed from a mixture of four Gaussians. In each class two of the Gaussians are elongated at right angles to each other and offset to form an approximation to the crescent data that is popular in semi-supervised learning as a toy problem.
:param num_data_part: number of data to be sampled (default is 200). :param num_data_part: number of data to be sampled (default is 200).
:type num_data: int :type num_data: int
:param seed: random seed to be used for data generation. :param seed: random seed to be used for data generation.
:type seed: int""" :type seed: int
"""
np.random.seed(seed=seed) np.random.seed(seed=seed)
sqrt2 = np.sqrt(2) sqrt2 = np.sqrt(2)
# Rotation matrix # Rotation matrix

View file

@ -27,48 +27,56 @@ except:
_blas_available = False _blas_available = False
def dtrtrs(A, B, lower=0, trans=0, unitdiag=0): def dtrtrs(A, B, lower=0, trans=0, unitdiag=0):
"""Wrapper for lapack dtrtrs function """
Wrapper for lapack dtrtrs function
:param A: Matrix A :param A: Matrix A
:param B: Matrix B :param B: Matrix B
:param lower: is matrix lower (true) or upper (false) :param lower: is matrix lower (true) or upper (false)
:returns: :returns:
""" """
return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag) return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag)
def dpotrs(A, B, lower=0): def dpotrs(A, B, lower=0):
"""Wrapper for lapack dpotrs function """
Wrapper for lapack dpotrs function
:param A: Matrix A :param A: Matrix A
:param B: Matrix B :param B: Matrix B
:param lower: is matrix lower (true) or upper (false) :param lower: is matrix lower (true) or upper (false)
:returns: :returns:
""" """
return lapack.dpotrs(A, B, lower=lower) return lapack.dpotrs(A, B, lower=lower)
def dpotri(A, lower=0): def dpotri(A, lower=0):
"""Wrapper for lapack dpotri function """
Wrapper for lapack dpotri function
:param A: Matrix A :param A: Matrix A
:param lower: is matrix lower (true) or upper (false) :param lower: is matrix lower (true) or upper (false)
:returns: A inverse :returns: A inverse
""" """
return lapack.dpotri(A, lower=lower) return lapack.dpotri(A, lower=lower)
def trace_dot(a, b): def trace_dot(a, b):
""" """
efficiently compute the trace of the matrix product of a and b Efficiently compute the trace of the matrix product of a and b
""" """
return np.sum(a * b) return np.sum(a * b)
def mdot(*args): def mdot(*args):
"""Multiply all the arguments using matrix product rules. """
Multiply all the arguments using matrix product rules.
The output is equivalent to multiplying the arguments one by one The output is equivalent to multiplying the arguments one by one
from left to right using dot(). from left to right using dot().
Precedence can be controlled by creating tuples of arguments, Precedence can be controlled by creating tuples of arguments,
for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)). for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)).
Note that this means the output of dot(a,b) and mdot(a,b) will differ if Note that this means the output of dot(a,b) and mdot(a,b) will differ if
a or b is a pure tuple of numbers. a or b is a pure tuple of numbers.
""" """
if len(args) == 1: if len(args) == 1:
return args[0] return args[0]
@ -115,14 +123,16 @@ def jitchol(A, maxtries=5):
def jitchol_old(A, maxtries=5): def jitchol_old(A, maxtries=5):
""" """
:param A : An almost pd square matrix :param A: An almost pd square matrix
:rval L: the Cholesky decomposition of A :rval L: the Cholesky decomposition of A
.. Note: .. note:
Adds jitter to K, to enforce positive-definiteness Adds jitter to K, to enforce positive-definiteness
if stuff breaks, please check: if stuff breaks, please check:
np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T) np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T)
""" """
try: try:
return linalg.cholesky(A, lower=True) return linalg.cholesky(A, lower=True)
@ -142,6 +152,7 @@ def jitchol_old(A, maxtries=5):
def pdinv(A, *args): def pdinv(A, *args):
""" """
:param A: A DxD pd numpy array :param A: A DxD pd numpy array
:rval Ai: the inverse of A :rval Ai: the inverse of A
@ -152,6 +163,7 @@ def pdinv(A, *args):
:rtype Li: np.ndarray :rtype Li: np.ndarray
:rval logdet: the log of the determinant of A :rval logdet: the log of the determinant of A
:rtype logdet: float64 :rtype logdet: float64
""" """
L = jitchol(A, *args) L = jitchol(A, *args)
logdet = 2.*np.sum(np.log(np.diag(L))) logdet = 2.*np.sum(np.log(np.diag(L)))
@ -177,14 +189,13 @@ def chol_inv(L):
def multiple_pdinv(A): def multiple_pdinv(A):
""" """
Arguments
---------
:param A: A DxDxN numpy array (each A[:,:,i] is pd) :param A: A DxDxN numpy array (each A[:,:,i] is pd)
Returns :rval invs: the inverses of A
------- :rtype invs: np.ndarray
invs : the inverses of A :rval hld: 0.5* the log of the determinants of A
hld: 0.5* the log of the determinants of A :rtype hld: np.array
""" """
N = A.shape[-1] N = A.shape[-1]
chols = [jitchol(A[:, :, i]) for i in range(N)] chols = [jitchol(A[:, :, i]) for i in range(N)]
@ -198,15 +209,13 @@ def PCA(Y, input_dim):
""" """
Principal component analysis: maximum likelihood solution by SVD Principal component analysis: maximum likelihood solution by SVD
Arguments
---------
:param Y: NxD np.array of data :param Y: NxD np.array of data
:param input_dim: int, dimension of projection :param input_dim: int, dimension of projection
Returns
-------
:rval X: - Nxinput_dim np.array of dimensionality reduced data :rval X: - Nxinput_dim np.array of dimensionality reduced data
W - input_dimxD mapping from X to Y :rval W: - input_dimxD mapping from X to Y
""" """
if not np.allclose(Y.mean(axis=0), 0.0): if not np.allclose(Y.mean(axis=0), 0.0):
print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)" print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
@ -273,11 +282,10 @@ def DSYR_blas(A, x, alpha=1.):
Performs a symmetric rank-1 update operation: Performs a symmetric rank-1 update operation:
A <- A + alpha * np.dot(x,x.T) A <- A + alpha * np.dot(x,x.T)
Arguments
---------
:param A: Symmetric NxN np.array :param A: Symmetric NxN np.array
:param x: Nx1 np.array :param x: Nx1 np.array
:param alpha: scalar :param alpha: scalar
""" """
N = c_int(A.shape[0]) N = c_int(A.shape[0])
LDA = c_int(A.shape[0]) LDA = c_int(A.shape[0])
@ -295,11 +303,10 @@ def DSYR_numpy(A, x, alpha=1.):
Performs a symmetric rank-1 update operation: Performs a symmetric rank-1 update operation:
A <- A + alpha * np.dot(x,x.T) A <- A + alpha * np.dot(x,x.T)
Arguments
---------
:param A: Symmetric NxN np.array :param A: Symmetric NxN np.array
:param x: Nx1 np.array :param x: Nx1 np.array
:param alpha: scalar :param alpha: scalar
""" """
A += alpha * np.dot(x[:, None], x[None, :]) A += alpha * np.dot(x[:, None], x[None, :])
@ -363,8 +370,9 @@ def cholupdate(L, x):
""" """
update the LOWER cholesky factor of a pd matrix IN PLACE update the LOWER cholesky factor of a pd matrix IN PLACE
if L is the lower chol. of K, then this function computes L_ if L is the lower chol. of K, then this function computes L\_
where L_ is the lower chol of K + x*x^T where L\_ is the lower chol of K + x*x^T
""" """
support_code = """ support_code = """
#include <math.h> #include <math.h>

View file

@ -17,12 +17,9 @@ def linear_grid(D, n = 100, min_max = (-100, 100)):
""" """
Creates a D-dimensional grid of n linearly spaced points Creates a D-dimensional grid of n linearly spaced points
Parameters: :param D: dimension of the grid
:param n: number of points
D: dimension of the grid :param min_max: (min, max) list
n: number of points
min_max: (min, max) list
""" """
@ -39,6 +36,7 @@ def kmm_init(X, m = 10):
:param X: data :param X: data
:param m: number of inducing points :param m: number of inducing points
""" """
# compute the distances # compute the distances

View file

@ -92,13 +92,15 @@ class tree:
def swap_vertices(self, i, j): def swap_vertices(self, i, j):
"""Swap two vertices in the tree structure array. """
Swap two vertices in the tree structure array.
swap_vertex swaps the location of two vertices in a tree structure array. swap_vertex swaps the location of two vertices in a tree structure array.
ARG tree : the tree for which two vertices are to be swapped.
ARG i : the index of the first vertex to be swapped. :param tree: the tree for which two vertices are to be swapped.
ARG j : the index of the second vertex to be swapped. :param i: the index of the first vertex to be swapped.
RETURN tree : the tree structure with the two vertex locations :param j: the index of the second vertex to be swapped.
swapped. :rval tree: the tree structure with the two vertex locations swapped.
""" """
store_vertex_i = self.vertices[i] store_vertex_i = self.vertices[i]
store_vertex_j = self.vertices[j] store_vertex_j = self.vertices[j]
@ -117,12 +119,17 @@ class tree:
def rotation_matrix(xangle, yangle, zangle, order='zxy', degrees=False): def rotation_matrix(xangle, yangle, zangle, order='zxy', degrees=False):
"""Compute the rotation matrix for an angle in each direction. """
Compute the rotation matrix for an angle in each direction.
This is a helper function for computing the rotation matrix for a given set of angles in a given order. This is a helper function for computing the rotation matrix for a given set of angles in a given order.
ARG xangle : rotation for x-axis.
ARG yangle : rotation for y-axis. :param xangle: rotation for x-axis.
ARG zangle : rotation for z-axis. :param yangle: rotation for y-axis.
ARG order : the order for the rotations.""" :param zangle: rotation for z-axis.
:param order: the order for the rotations.
"""
if degrees: if degrees:
xangle = math.radians(xangle) xangle = math.radians(xangle)
yangle = math.radians(yangle) yangle = math.radians(yangle)
@ -301,10 +308,12 @@ class acclaim_skeleton(skeleton):
def load_skel(self, file_name): def load_skel(self, file_name):
"""Loads an ASF file into a skeleton structure. """
loads skeleton structure from an acclaim skeleton file. Loads an ASF file into a skeleton structure.
ARG file_name : the file name to load in.
RETURN skel : the skeleton for the file.""" :param file_name: The file name to load in.
"""
fid = open(file_name, 'r') fid = open(file_name, 'r')
self.read_skel(fid) self.read_skel(fid)

View file

@ -15,7 +15,7 @@ def most_significant_input_dimensions(model, which_indices):
try: try:
input_1, input_2 = np.argsort(model.input_sensitivity())[::-1][:2] input_1, input_2 = np.argsort(model.input_sensitivity())[::-1][:2]
except: except:
raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" raise ValueError, "cannot automatically determine which dimensions to plot, please pass 'which_indices'"
else: else:
input_1, input_2 = which_indices input_1, input_2 = which_indices
return input_1, input_2 return input_1, input_2

View file

@ -502,11 +502,14 @@ def data_play(Y, visualizer, frame_rate=30):
This example loads in the CMU mocap database (http://mocap.cs.cmu.edu) subject number 35 motion number 01. It then plays it using the mocap_show visualize object. This example loads in the CMU mocap database (http://mocap.cs.cmu.edu) subject number 35 motion number 01. It then plays it using the mocap_show visualize object.
.. code-block:: python
data = GPy.util.datasets.cmu_mocap(subject='35', train_motions=['01']) data = GPy.util.datasets.cmu_mocap(subject='35', train_motions=['01'])
Y = data['Y'] Y = data['Y']
Y[:, 0:3] = 0. # Make figure walk in place Y[:, 0:3] = 0. # Make figure walk in place
visualize = GPy.util.visualize.skeleton_show(Y[0, :], data['skel']) visualize = GPy.util.visualize.skeleton_show(Y[0, :], data['skel'])
GPy.util.visualize.data_play(Y, visualize) GPy.util.visualize.data_play(Y, visualize)
""" """

View file

@ -53,9 +53,11 @@ class TanhWarpingFunction(WarpingFunction):
self.num_parameters = 3 * self.n_terms self.num_parameters = 3 * self.n_terms
def f(self,y,psi): def f(self,y,psi):
"""transform y with f using parameter vector psi """
transform y with f using parameter vector psi
psi = [[a,b,c]] psi = [[a,b,c]]
f = \sum_{terms} a * tanh(b*(y+c)) ::math::`f = \\sum_{terms} a * tanh(b*(y+c))`
""" """
#1. check that number of params is consistent #1. check that number of params is consistent
@ -77,8 +79,7 @@ class TanhWarpingFunction(WarpingFunction):
""" """
calculate the numerical inverse of f calculate the numerical inverse of f
== input == :param iterations: number of N.R. iterations
iterations: number of N.R. iterations
""" """
@ -165,9 +166,11 @@ class TanhWarpingFunction_d(WarpingFunction):
self.num_parameters = 3 * self.n_terms + 1 self.num_parameters = 3 * self.n_terms + 1
def f(self,y,psi): def f(self,y,psi):
"""transform y with f using parameter vector psi """
Transform y with f using parameter vector psi
psi = [[a,b,c]] psi = [[a,b,c]]
f = \sum_{terms} a * tanh(b*(y+c))
:math:`f = \\sum_{terms} a * tanh(b*(y+c))`
""" """
#1. check that number of params is consistent #1. check that number of params is consistent
@ -189,8 +192,7 @@ class TanhWarpingFunction_d(WarpingFunction):
""" """
calculate the numerical inverse of f calculate the numerical inverse of f
== input == :param max_iterations: maximum number of N.R. iterations
iterations: number of N.R. iterations
""" """
@ -214,12 +216,13 @@ class TanhWarpingFunction_d(WarpingFunction):
def fgrad_y(self, y, psi, return_precalc = False): def fgrad_y(self, y, psi, return_precalc = False):
""" """
gradient of f w.r.t to y ([N x 1]) gradient of f w.r.t to y ([N x 1])
returns: Nx1 vector of derivatives, unless return_precalc is true,
then it also returns the precomputed stuff :returns: Nx1 vector of derivatives, unless return_precalc is true, then it also returns the precomputed stuff
""" """
mpsi = psi.copy() mpsi = psi.coSpy()
d = psi[-1] d = psi[-1]
mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3) mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
@ -242,7 +245,7 @@ class TanhWarpingFunction_d(WarpingFunction):
""" """
gradient of f w.r.t to y and psi gradient of f w.r.t to y and psi
returns: NxIx4 tensor of partial derivatives :returns: NxIx4 tensor of partial derivatives
""" """

View file

@ -2,7 +2,7 @@
# #
# You can set these variables from the command line. # You can set these variables from the command line.
SPHINXOPTS = SPHINXOPTS = -a -w log.txt -E
SPHINXBUILD = sphinx-build SPHINXBUILD = sphinx-build
PAPER = PAPER =
BUILDDIR = _build BUILDDIR = _build

View file

@ -106,7 +106,7 @@ class Mock(object):
print "Mocking" print "Mocking"
MOCK_MODULES = ['sympy', MOCK_MODULES = ['sympy',
'sympy.utilities', 'sympy.utilities.codegen', 'sympy.core.cache', 'sympy.utilities', 'sympy.utilities.codegen', 'sympy.core.cache',
'sympy.core', 'sympy.parsing', 'sympy.parsing.sympy_parser' 'sympy.core', 'sympy.parsing', 'sympy.parsing.sympy_parser', 'Tango', 'numdifftools'
] ]
for mod_name in MOCK_MODULES: for mod_name in MOCK_MODULES:
sys.modules[mod_name] = Mock() sys.modules[mod_name] = Mock()

View file

@ -107,7 +107,7 @@ inputs: ::
m['iip'] = np.arange(-5,0) m['iip'] = np.arange(-5,0)
Getting the model's likelihood and gradients Getting the model's likelihood and gradients
=========================================== =============================================
Appart form the printing the model, the marginal Appart form the printing the model, the marginal
log-likelihood can be obtained by using the function log-likelihood can be obtained by using the function
``log_likelihood()``. Also, the log-likelihood gradients ``log_likelihood()``. Also, the log-likelihood gradients