diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 750788e9..0979a04e 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -6,8 +6,8 @@ from GPy.core.model import Model class GPBase(Model): """ - Gaussian Process Model for holding shared behaviour between - sprase_GP and GP models + Gaussian process base model for holding shared behaviour between + sparse_GP and GP models. """ def __init__(self, X, likelihood, kernel, normalize_X=False): @@ -35,8 +35,7 @@ class GPBase(Model): def getstate(self): """ - Get the current state of the class, - here just all the indices, rest can get recomputed + Get the current state of the class, here we return everything that is needed to recompute the model. """ return Model.getstate(self) + [self.X, self.num_data, @@ -63,14 +62,6 @@ class GPBase(Model): Plot the GP's view of the world, where the data is normalized and the likelihood is Gaussian. - :param samples: the number of a posteriori samples to plot - :param which_data: which if the training data to plot (default all) - :type which_data: 'all' or a slice object to slice self.X, self.Y - :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits - :param which_parts: which of the kernel functions to plot (additively) - :type which_parts: 'all', or list of bools - :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D - Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. - In two dimsensions, a contour-plot shows the mean predicted function @@ -78,6 +69,22 @@ class GPBase(Model): Can plot only part of the data and part of the posterior functions using which_data and which_functions + + :param samples: the number of a posteriori samples to plot + :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits + :param which_data: which if the training data to plot (default all) + :type which_data: 'all' or a slice object to slice self.X, self.Y + :param which_parts: which of the kernel functions to plot (additively) + :type which_parts: 'all', or list of bools + :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D + :type resolution: int + :param full_cov: + :type full_cov: bool + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + """ if which_data == 'all': which_data = slice(None) @@ -118,12 +125,41 @@ class GPBase(Model): def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): """ - TODO: Docstrings! - + Plot the GP with noise where the likelihood is Gaussian. + + Plot the posterior of the GP. + - In one dimension, the function is plotted with a shaded region identifying two standard deviations. + - In two dimsensions, a contour-plot shows the mean predicted function + - In higher dimensions, we've no implemented this yet !TODO! + + Can plot only part of the data and part of the posterior functions + using which_data and which_functions + + :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits + :type plot_limits: np.array + :param which_data: which if the training data to plot (default all) + :type which_data: 'all' or a slice object to slice self.X, self.Y + :param which_parts: which of the kernel functions to plot (additively) + :type which_parts: 'all', or list of bools + :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D + :type resolution: int + :param levels: number of levels to plot in a contour plot. + :type levels: int + :param samples: the number of a posteriori samples to plot + :type samples: int + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. + :type fixed_inputs: a list of tuples + :param linecol: color of line to plot. + :type linecol: + :param fillcol: color of fill + :type fillcol: :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure - fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. """ # TODO include samples if which_data == 'all': diff --git a/GPy/core/model.py b/GPy/core/model.py index 635c06e6..0487e8f8 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -24,15 +24,17 @@ class Model(Parameterized): self.preferred_optimizer = 'scg' # self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes def log_likelihood(self): - raise NotImplementedError, "this needs to be implemented to use the Model class" + raise NotImplementedError, "this needs to be implemented to use the model class" def _log_likelihood_gradients(self): - raise NotImplementedError, "this needs to be implemented to use the Model class" + raise NotImplementedError, "this needs to be implemented to use the model class" def getstate(self): """ Get the current state of the class. Inherited from Parameterized, so add those parameters to the state + :return: list of states from the model. + """ return Parameterized.getstate(self) + \ [self.priors, self.optimization_runs, @@ -41,8 +43,10 @@ class Model(Parameterized): def setstate(self, state): """ set state from previous call to getstate - call Parameterized with the rest of the state + + :param state: the state of the model. + :type state: list as returned from getstate. """ self.preferred_optimizer = state.pop() self.sampling_runs = state.pop() @@ -52,21 +56,22 @@ class Model(Parameterized): def set_prior(self, regexp, what): """ - Sets priors on the Model parameters. - - Arguments - --------- - regexp -- string, regexp, or integer array - what -- instance of a Prior class + Sets priors on the model parameters. Notes ----- - Asserts that the Prior is suitable for the constraint. If the + Asserts that the prior is suitable for the constraint. If the wrong constraint is in place, an error is raised. If no constraint is in place, one is added (warning printed). - For tied parameters, the Prior will only be "counted" once, thus - a Prior object is only inserted on the first tied index + For tied parameters, the prior will only be "counted" once, thus + a prior object is only inserted on the first tied index + + :param regexp: regular expression of parameters on which priors need to be set. + :type param: string, regexp, or integer array + :param what: prior to set on parameter. + :type what: GPy.core.Prior type + """ if self.priors is None: self.priors = [None for i in range(self._get_params().size)] @@ -76,12 +81,12 @@ class Model(Parameterized): # check tied situation tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))] if len(tie_partial_matches): - raise ValueError, "cannot place Prior across partial ties" + raise ValueError, "cannot place prior across partial ties" tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ] if len(tie_matches) > 1: - raise ValueError, "cannot place Prior across multiple ties" + raise ValueError, "cannot place prior across multiple ties" elif len(tie_matches) == 1: - which = which[:1] # just place a Prior object on the first parameter + which = which[:1] # just place a prior object on the first parameter # check constraints are okay @@ -93,7 +98,7 @@ class Model(Parameterized): else: constrained_positive_indices = np.zeros(shape=(0,)) bad_constraints = np.setdiff1d(self.all_constrained_indices(), constrained_positive_indices) - assert not np.any(which[:, None] == bad_constraints), "constraint and Prior incompatible" + assert not np.any(which[:, None] == bad_constraints), "constraint and prior incompatible" unconst = np.setdiff1d(which, constrained_positive_indices) if len(unconst): print "Warning: constraining parameters to be positive:" @@ -101,17 +106,22 @@ class Model(Parameterized): print '\n' self.constrain_positive(unconst) elif what.domain is REAL: - assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and Prior incompatible" + assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible" else: - raise ValueError, "Prior not recognised" + raise ValueError, "prior not recognised" - # store the Prior in a local list + # store the prior in a local list for w in which: self.priors[w] = what def get_gradient(self, name, return_names=False): """ - Get Model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. + Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. + + :param name: the name of parameters required (as a regular expression). + :type name: regular expression + :param return_names: whether or not to return the names matched (default False) + :type return_names: bool """ matches = self.grep_param_names(name) if len(matches): @@ -151,14 +161,14 @@ class Model(Parameterized): def randomize(self): """ - Randomize the Model. - Make this draw from the Prior if one exists, else draw from N(0,1) + Randomize the model. + Make this draw from the prior if one exists, else draw from N(0,1) """ # first take care of all parameters (from N(0,1)) x = self._get_params_transformed() x = np.random.randn(x.size) self._set_params_transformed(x) - # now draw from Prior where possible + # now draw from prior where possible x = self._get_params() if self.priors is not None: [np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None] @@ -168,21 +178,30 @@ class Model(Parameterized): def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): """ - Perform random restarts of the Model, and set the Model to the best + Perform random restarts of the model, and set the model to the best seen solution. If the robust flag is set, exceptions raised during optimizations will - be handled silently. If _all_ runs fail, the Model is reset to the + be handled silently. If _all_ runs fail, the model is reset to the existing parameter values. Notes ----- + :param num_restarts: number of restarts to use (default 10) + :type num_restarts: int + :param robust: whether to handle exceptions silently or not (default False) + :type robust: bool + :param parallel: whether to run each restart as a separate process. It relies on the multiprocessing module. + :type parallel: bool + :param num_processes: number of workers in the multiprocessing pool + :type numprocesses: int **kwargs are passed to the optimizer. They can be: - :max_f_eval: maximum number of function evaluations - :messages: whether to display during optimisation - :verbose: whether to show informations about the current restart - :parallel: whether to run each restart as a separate process. It relies on the multiprocessing module. - :num_processes: number of workers in the multiprocessing pool + :param max_f_eval: maximum number of function evaluations + :type max_f_eval: int + :param max_iters: maximum number of iterations + :type max_iters: int + :param messages: whether to display during optimisation + :type messages: bool ..Note: If num_processes is None, the number of workes in the multiprocessing pool is automatically set to the number of processors on the current machine. @@ -230,8 +249,13 @@ class Model(Parameterized): self._set_params_transformed(initial_parameters) def ensure_default_constraints(self): - """ - Ensure that any variables which should clearly be positive have been constrained somehow. + """ + Ensure that any variables which should clearly be positive + have been constrained somehow. The method performs a regular + expression search on parameter names looking for the terms + 'variance', 'lengthscale', 'precision' and 'kappa'. If any of + these terms are present in the name the parameter is + constrained positive. """ positive_strings = ['variance', 'lengthscale', 'precision', 'kappa'] # param_names = self._get_param_names() @@ -246,11 +270,15 @@ class Model(Parameterized): def objective_function(self, x): """ - The objective function passed to the optimizer. It combines the likelihood and the priors. + The objective function passed to the optimizer. It combines + the likelihood and the priors. Failures are handled robustly. The algorithm will try several times to return the objective, and will raise the original exception if it the objective cannot be computed. + + :param x: the parameters of the model. + :parameter type: np.array """ try: self._set_params_transformed(x) @@ -267,8 +295,11 @@ class Model(Parameterized): Gets the gradients from the likelihood and the priors. Failures are handled robustly. The algorithm will try several times to - return the objective, and will raise the original exception if it + return the gradients, and will raise the original exception if it the objective cannot be computed. + + :param x: the parameters of the model. + :parameter type: np.array """ try: self._set_params_transformed(x) @@ -282,6 +313,13 @@ class Model(Parameterized): return obj_grads def objective_and_gradients(self, x): + """ + Compute the objective function of the model and the gradient of the model at the point given by x. + + :param x: the point at which gradients are to be computed. + :type np.array: + """ + try: self._set_params_transformed(x) obj_f = -self.log_likelihood() - self.log_prior() @@ -297,11 +335,13 @@ class Model(Parameterized): def optimize(self, optimizer=None, start=None, **kwargs): """ - Optimize the Model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. + Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. kwargs are passed to the optimizer. They can be: - :max_f_eval: maximum number of function evaluations + :param max_f_eval: maximum number of function evaluations + :type max_f_eval: int :messages: whether to display during optimisation + :type messages: bool :param optimzer: which optimizer to use (defaults to self.preferred optimizer) :type optimzer: string TODO: valid strings? """ @@ -327,14 +367,14 @@ class Model(Parameterized): self.optimization_runs.append(sgd) def Laplace_covariance(self): - """return the covariance matric of a Laplace approximatino at the current (stationary) point""" - # TODO add in the Prior contributions for MAP estimation + """return the covariance matrix of a Laplace approximation at the current (stationary) point.""" + # TODO add in the prior contributions for MAP estimation # TODO fix the hessian for tied, constrained and fixed components if hasattr(self, 'log_likelihood_hessian'): A = -self.log_likelihood_hessian() else: - print "numerically calculating hessian. please be patient!" + print "numerically calculating Hessian. please be patient!" x = self._get_params() def f(x): self._set_params(x) @@ -348,8 +388,8 @@ class Model(Parameterized): return A def Laplace_evidence(self): - """Returns an estiamte of the Model evidence based on the Laplace approximation. - Uses a numerical estimate of the hessian if none is available analytically""" + """Returns an estiamte of the model evidence based on the Laplace approximation. + Uses a numerical estimate of the Hessian if none is available analytically.""" A = self.Laplace_covariance() try: hld = np.sum(np.log(np.diag(jitchol(A)[0]))) @@ -370,27 +410,28 @@ class Model(Parameterized): log_prior = self.log_prior() obj_funct = '\nLog-likelihood: {0:.3e}'.format(log_like) if len(''.join(strs)) != 0: - obj_funct += ', Log Prior: {0:.3e}, LL+Prior = {0:.3e}'.format(log_prior, log_like + log_prior) + obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior) obj_funct += '\n\n' s[0] = obj_funct + s[0] - s[0] += "|{h:^{col}}".format(h='Prior', col=width) + s[0] += "|{h:^{col}}".format(h='prior', col=width) s[1] += '-' * (width + 1) for p in range(2, len(strs) + 2): - s[p] += '|{Prior:^{width}}'.format(Prior=strs[p - 2], width=width) + s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width) return '\n'.join(s) def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3): """ - Check the gradient of the Model by comparing to a numerical estimate. - If the verbose flag is passed, invividual components are tested (and printed) + Check the gradient of the ,odel by comparing to a numerical + estimate. If the verbose flag is passed, invividual + components are tested (and printed) :param verbose: If True, print a "full" checking of each parameter :type verbose: bool :param step: The size of the step around which to linearise the objective - :type step: float (defaul 1e-6) + :type step: float (default 1e-6) :param tolerance: the tolerance allowed (see note) :type tolerance: float (default 1e-3) @@ -467,15 +508,15 @@ class Model(Parameterized): def input_sensitivity(self): """ - return an array describing the sesitivity of the Model to each input + return an array describing the sesitivity of the model to each input NB. Right now, we're basing this on the lengthscales (or variances) of the kernel. TODO: proper sensitivity analysis - where we integrate across the Model inputs and evaluate the - effect on the variance of the Model output. """ + where we integrate across the model inputs and evaluate the + effect on the variance of the model output. """ if not hasattr(self, 'kern'): - raise ValueError, "this Model has no kernel" + raise ValueError, "this model has no kernel" k = [p for p in self.kern.parts if p.name in ['rbf', 'linear', 'rbf_inv']] if (not len(k) == 1) or (not k[0].ARD): diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index be715335..d78297d0 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -14,7 +14,10 @@ class kern(Parameterized): """ This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where. - The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This obnject contains a list of parts, which are computed additively. For multiplication, special _prod_ parts are used. + The technical code for kernels is divided into _parts_ (see + e.g. rbf.py). This object contains a list of parts, which are + computed additively. For multiplication, special _prod_ parts + are used. :param input_dim: The dimensionality of the kernel's input space :type input_dim: int @@ -149,7 +152,7 @@ class kern(Parameterized): return g def compute_param_slices(self): - """create a set of slices that can index the parameters of each part""" + """create a set of slices that can index the parameters of each part.""" self.param_slices = [] count = 0 for p in self.parts: @@ -200,11 +203,19 @@ class kern(Parameterized): """ return self.prod(other) + def __pow__(self, other, tensor=False): + """ + Shortcut for tensor `prod`. + """ + return self.prod(other, tensor=True) + def prod(self, other, tensor=False): """ - multiply two kernels (either on the same space, or on the tensor product of the input space) + multiply two kernels (either on the same space, or on the tensor product of the input space). :param other: the other kernel to be added :type other: GPy.kern + :param tensor: whether or not to use the tensor space (default is false). + :type tensor: bool """ K1 = self.copy() K2 = other.copy() @@ -273,7 +284,7 @@ class kern(Parameterized): [p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)] def _get_param_names(self): - # this is a bit nasty: we wat to distinguish between parts with the same name by appending a count + # this is a bit nasty: we want to distinguish between parts with the same name by appending a count part_names = np.array([k.name for k in self.parts], dtype=np.str) counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)] cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)] @@ -295,11 +306,13 @@ class kern(Parameterized): def dK_dtheta(self, dL_dK, X, X2=None): """ - :param dL_dK: An array of dL_dK derivaties, dL_dK - :type dL_dK: Np.ndarray (N x num_inducing) + Compute the gradient of the covariance function with respect to the parameters. + + :param dL_dK: An array of gradients of the objective function with respect to the covariance function. + :type dL_dK: Np.ndarray (num_samples x num_inducing) :param X: Observed data inputs - :type X: np.ndarray (N x input_dim) - :param X2: Observed dara inputs (optional, defaults to X) + :type X: np.ndarray (num_samples x input_dim) + :param X2: Observed data inputs (optional, defaults to X) :type X2: np.ndarray (num_inducing x input_dim) """ assert X.shape[1] == self.input_dim @@ -312,6 +325,14 @@ class kern(Parameterized): return self._transform_gradients(target) def dK_dX(self, dL_dK, X, X2=None): + """Compute the gradient of the covariance function with respect to X. + + :param dL_dK: An array of gradients of the objective function with respect to the covariance function. + :type dL_dK: np.ndarray (num_samples x num_inducing) + :param X: Observed data inputs + :type X: np.ndarray (num_samples x input_dim) + :param X2: Observed data inputs (optional, defaults to X) + :type X2: np.ndarray (num_inducing x input_dim)""" if X2 is None: X2 = X target = np.zeros_like(X) @@ -322,6 +343,7 @@ class kern(Parameterized): return target def Kdiag(self, X, which_parts='all'): + """Compute the diagonal of the covariance function for inputs X.""" if which_parts == 'all': which_parts = [True] * self.Nparts assert X.shape[1] == self.input_dim @@ -330,6 +352,7 @@ class kern(Parameterized): return target def dKdiag_dtheta(self, dL_dKdiag, X): + """Compute the gradient of the diagonal of the covariance function with respect to the parameters.""" assert X.shape[1] == self.input_dim assert dL_dKdiag.size == X.shape[0] target = np.zeros(self.num_params) @@ -373,16 +396,18 @@ class kern(Parameterized): return target def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): - """return shapes are N,num_inducing,input_dim""" + """return shapes are num_samples,num_inducing,input_dim""" target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] return target_mu, target_S def psi2(self, Z, mu, S): """ + Computer the psi2 statistics for the covariance function. + :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) - :param mu, S: np.ndarrays of means and variances (each N x input_dim) - :returns psi2: np.ndarray (N,num_inducing,num_inducing) + :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) + :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) """ target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] @@ -406,6 +431,7 @@ class kern(Parameterized): return target def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): + """Gradient of the psi2 statistics with respect to the parameters.""" target = np.zeros(self.num_params) [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] @@ -509,3 +535,36 @@ class kern(Parameterized): pb.title("k(x1,x2 ; %0.1f,%0.1f)" % (x[0, 0], x[0, 1])) else: raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions" + + def objective_and_gradients_dK_dtheta(self, param, X, X2=None): + self._set_param(param) + K = self.K(X, X2) + f = K.sum() + dL_dK = np.ones_like(K) + g = self.dK_dtheta(param, dL_dK, X, X2) + return f, g + + def objective_and_gradients_dK_dX(self, param, X, X2=None): + self._set_param(param) + K = self.K(X, X2) + f = K.sum() + dL_dK = np.ones_like(K) + g = self.dK_dX(param, dL_dK, X, X2) + return f, g + + def objective_and_gradients_dKdiag_dtheta(self, param, X, X2=None): + self._set_param(param) + Kdiag = self.Kdiag(X) + f = Kdiag.sum() + dL_dK = np.ones_like(Kdiag) + g = self.dKdiag_dtheta(param, dL_dK, X) + return f, g + + def objective_and_gradients_dKdiag_dX(self, param, X, X2=None): + self._set_param(param) + Kdiag = self.Kdiag(X) + f = Kdiag.sum() + dL_dK = np.ones_like(Kdiag) + g = self.dK_dX(param, dL_dK, X) + return f, g + diff --git a/GPy/notes.txt b/GPy/notes.txt index 011dc432..cae1f337 100644 --- a/GPy/notes.txt +++ b/GPy/notes.txt @@ -23,3 +23,7 @@ Can we comment up some of the list incomprehensions in hierarchical.py?? Need to tidy up classification.py, many examples include help that doesn't apply (it is suggested that you can try different approximation types) + +Shall we overload the ** operator to have tensor products? + +People aren't filling the doc strings in as they go *everyone* needs to get in the habit of this (and modifying them as they edit, or correcting them when there is a problem). diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 8a4f8b16..cd58a7e0 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -42,8 +42,7 @@ class KernelTests(unittest.TestCase): self.assertTrue(m.checkgrad()) - - + if __name__ == "__main__": print "Running unit tests, please be (very) patient..." unittest.main()