Merge branch 'devel' of github.com:SheffieldML/GPy into devel

Conflicts: GPy/core/fitc.py
2026-06-08 15:05:15 +02:00 · 2013-06-05 16:37:57 +01:00 · 2013-06-05 16:37:57 +01:00 · c774432fee
commit c774432fee
parent d3f84816df b129e6a669
56 changed files with 783 additions and 807 deletions
--- a/GPy/core/fitc.py
+++ b/GPy/core/fitc.py
@ -14,7 +14,7 @@ class FITC(SparseGP):
    sparse FITC approximation

    :param X: inputs
-    :type X: np.ndarray (N x Q)
+    :type X: np.ndarray (num_data x Q)
    :param likelihood: a likelihood instance, containing the observed data
    :type likelihood: GPy.likelihood.(Gaussian | EP)
    :param kernel : the kernel (covariance function). See link kernels
@ -57,7 +57,7 @@ class FITC(SparseGP):
        self.V_star = self.beta_star * self.likelihood.Y

        # The rather complex computations of self.A
-        tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
+        tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data)))
        tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
        self.A = tdot(tmp)

@ -113,7 +113,7 @@ class FITC(SparseGP):
        self._dpsi1_dX_jkj = 0
        self._dpsi1_dtheta_jkj = 0

-        for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3):
+        for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.num_data),self.V_star,alpha,gamma_2,gamma_3):
            K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
            _dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
            _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
@ -137,14 +137,14 @@ class FITC(SparseGP):
            aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
            aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)

-            dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
+            dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
            dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) *  Lmi_psi1 * dbstar_dnoise.T)
            dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) *  Lmi_psi1 * dbstar_dnoise.T)

            dD_dnoise_1 =  mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
            alpha = mdot(LBiLmipsi1,self.V_star)
            alpha_ = mdot(LBiLmipsi1.T,alpha)
-            dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise )
+            dD_dnoise_2 = -0.5 * self.input_dim * np.sum(alpha_**2 * dbstar_dnoise )

            dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
            dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
@ -154,7 +154,7 @@ class FITC(SparseGP):

    def log_likelihood(self):
        """ Compute the (lower bound on the) log marginal likelihood """
-        A = -0.5 * self.N * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
+        A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
        C = -self.output_dim * (np.sum(np.log(np.diag(self.LB))))
        D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
        return A + C + D
@ -204,8 +204,8 @@ class FITC(SparseGP):
            # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)

            # Ci = I + (RPT0)Di(RPT0).T
-            # C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
-            #   = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
+            # C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T
+            #   = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T
            #   = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
            #   = I - V.T * V
            U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -33,8 +33,8 @@ class GP(GPBase):
        self._set_params(self._get_params())

    def _set_params(self, p):
-        self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
-        self.likelihood._set_params(p[self.kern.Nparam_transformed():])
+        self.kern._set_params_transformed(p[:self.kern.num_params_transformed()])
+        self.likelihood._set_params(p[self.kern.num_params_transformed():])

        self.K = self.kern.K(self.X)
        self.K += self.likelihood.covariance_matrix
@ -46,12 +46,12 @@ class GP(GPBase):
            #alpha = np.dot(self.Ki, self.likelihood.Y)
            alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1)

-            self.dL_dK = 0.5 * (tdot(alpha) - self.input_dim * self.Ki)
+            self.dL_dK = 0.5 * (tdot(alpha) - self.output_dim * self.Ki)
        else:
            #tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
            tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
            tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
-            self.dL_dK = 0.5 * (tmp - self.input_dim * self.Ki)
+            self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki)

    def _get_params(self):
        return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@ -1,24 +1,24 @@
 import numpy as np
-import model
 from .. import kern
 from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D
 import pylab as pb
+from GPy.core.model import Model

-class GPBase(model.model):
+class GPBase(Model):
    """
-    Gaussian Process model for holding shared behaviour between
+    Gaussian Process Model for holding shared behaviour between
    sprase_GP and GP models
    """

    def __init__(self, X, likelihood, kernel, normalize_X=False):
        self.X = X
        assert len(self.X.shape) == 2
-        self.N, self.input_dim = self.X.shape
+        self.num_data, self.input_dim = self.X.shape
        assert isinstance(kernel, kern.kern)
        self.kern = kernel
        self.likelihood = likelihood
        assert self.X.shape[0] == self.likelihood.data.shape[0]
-        self.N, self.output_dim = self.likelihood.data.shape
+        self.num_data, self.output_dim = self.likelihood.data.shape

        if normalize_X:
            self._Xmean = X.mean(0)[None, :]
@ -28,7 +28,7 @@ class GPBase(model.model):
            self._Xmean = np.zeros((1, self.input_dim))
            self._Xstd = np.ones((1, self.input_dim))

-        model.model.__init__(self)
+        Model.__init__(self)

        # All leaf nodes should call self._set_params(self._get_params()) at
        # the end
@ -84,8 +84,8 @@ class GPBase(model.model):
            Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
            m, v = self._raw_predict(Xnew, which_parts=which_parts)
            m = m.reshape(resolution, resolution).T
-            ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
-            ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max())
+            ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
+            ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
            ax.set_xlim(xmin[0], xmax[0])
            ax.set_ylim(xmin[1], xmax[1])
        else:
@ -94,9 +94,9 @@ class GPBase(model.model):
    def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None):
        """
        TODO: Docstrings!
+        
        :param levels: for 2D plotting, the number of contour levels to use
        is ax is None, create a new figure
-
        """
        # TODO include samples
        if which_data == 'all':
@ -111,7 +111,7 @@ class GPBase(model.model):
            Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now

            Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
-            m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
+            m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
            for d in range(m.shape[1]):
                gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
                ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5)
@ -122,13 +122,13 @@ class GPBase(model.model):

        elif self.X.shape[1] == 2: # FIXME
            resolution = resolution or 50
-            Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
+            Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
            x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
-            m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
+            m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
            m = m.reshape(resolution, resolution).T
-            ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
+            ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
            Yf = self.likelihood.Y.flatten()
-            ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
+            ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
            ax.set_xlim(xmin[0], xmax[0])
            ax.set_ylim(xmin[1], xmax[1])

--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -6,37 +6,32 @@ from .. import likelihoods
 from ..inference import optimization
 from ..util.linalg import jitchol
 from GPy.util.misc import opt_wrapper
-from parameterised import parameterised
-from scipy import optimize
+from parameterised import Parameterised
 import multiprocessing as mp
 import numpy as np
-import priors
-import re
-import sys
-import pdb
 from GPy.core.domains import POSITIVE, REAL
 # import numdifftools as ndt

-class model(parameterised):
+class Model(Parameterised):
    def __init__(self):
-        parameterised.__init__(self)
+        Parameterised.__init__(self)
        self.priors = None
        self.optimization_runs = []
        self.sampling_runs = []
        self.preferred_optimizer = 'scg'
-        #self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
+        # self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
    def _get_params(self):
-        raise NotImplementedError, "this needs to be implemented to use the model class"
+        raise NotImplementedError, "this needs to be implemented to use the Model class"
    def _set_params(self, x):
-        raise NotImplementedError, "this needs to be implemented to use the model class"
+        raise NotImplementedError, "this needs to be implemented to use the Model class"
    def log_likelihood(self):
-        raise NotImplementedError, "this needs to be implemented to use the model class"
+        raise NotImplementedError, "this needs to be implemented to use the Model class"
    def _log_likelihood_gradients(self):
-        raise NotImplementedError, "this needs to be implemented to use the model class"
+        raise NotImplementedError, "this needs to be implemented to use the Model class"

    def set_prior(self, regexp, what):
        """
-        Sets priors on the model parameters.
+        Sets priors on the Model parameters.

        Arguments
        ---------
@ -65,7 +60,7 @@ class model(parameterised):
        if len(tie_matches) > 1:
            raise ValueError, "cannot place Prior across multiple ties"
        elif len(tie_matches) == 1:
-            which = which[:1]  # just place a Prior object on the first parameter
+            which = which[:1] # just place a Prior object on the first parameter


        # check constraints are okay
@ -95,7 +90,7 @@ class model(parameterised):

    def get_gradient(self, name, return_names=False):
        """
-        Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
+        Get Model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
        """
        matches = self.grep_param_names(name)
        if len(matches):
@ -135,7 +130,7 @@ class model(parameterised):

    def randomize(self):
        """
-        Randomize the model.
+        Randomize the Model.
        Make this draw from the Prior if one exists, else draw from N(0,1)
        """
        # first take care of all parameters (from N(0,1))
@ -147,16 +142,16 @@ class model(parameterised):
        if self.priors is not None:
            [np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
        self._set_params(x)
-        self._set_params_transformed(self._get_params_transformed())  # makes sure all of the tied parameters get the same init (since there's only one prior object...)
+        self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)


-    def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
+    def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
        """
-        Perform random restarts of the model, and set the model to the best
+        Perform random restarts of the Model, and set the Model to the best
        seen solution.

        If the robust flag is set, exceptions raised during optimizations will
-        be handled silently.  If _all_ runs fail, the model is reset to the
+        be handled silently.  If _all_ runs fail, the Model is reset to the
        existing parameter values.

        Notes
@ -179,19 +174,19 @@ class model(parameterised):
            try:
                jobs = []
                pool = mp.Pool(processes=num_processes)
-                for i in range(Nrestarts):
+                for i in range(num_restarts):
                    self.randomize()
                    job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
                    jobs.append(job)

-                pool.close()  # signal that no more data coming in
-                pool.join()  # wait for all the tasks to complete
+                pool.close() # signal that no more data coming in
+                pool.join() # wait for all the tasks to complete
            except KeyboardInterrupt:
                print "Ctrl+c received, terminating and joining pool."
                pool.terminate()
                pool.join()

-        for i in range(Nrestarts):
+        for i in range(num_restarts):
            try:
                if not parallel:
                    self.randomize()
@ -200,10 +195,10 @@ class model(parameterised):
                    self.optimization_runs.append(jobs[i].get())

                if verbose:
-                    print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt))
+                    print("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt))
            except Exception as e:
                if robust:
-                    print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts))
+                    print("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts))
                else:
                    raise e

@ -218,11 +213,11 @@ class model(parameterised):
        Ensure that any variables which should clearly be positive have been constrained somehow.
        """
        positive_strings = ['variance', 'lengthscale', 'precision', 'kappa']
-        param_names = self._get_param_names()
+        # param_names = self._get_param_names()
        currently_constrained = self.all_constrained_indices()
        to_make_positive = []
        for s in positive_strings:
-            for i in self.grep_param_names(".*"+s):
+            for i in self.grep_param_names(".*" + s):
                if not (i in currently_constrained):
                    to_make_positive.append(i)
        if len(to_make_positive):
@ -240,18 +235,18 @@ class model(parameterised):
        Gets the gradients from the likelihood and the priors.
        """
        self._set_params_transformed(x)
-        obj_grads = - self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
+        obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
        return obj_grads

    def objective_and_gradients(self, x):
        self._set_params_transformed(x)
        obj_f = -self.log_likelihood() - self.log_prior()
-        obj_grads = - self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
+        obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
        return obj_f, obj_grads

    def optimize(self, optimizer=None, start=None, **kwargs):
        """
-        Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
+        Optimize the Model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
        kwargs are passed to the optimizer. They can be:

        :max_f_eval: maximum number of function evaluations
@ -274,7 +269,7 @@ class model(parameterised):

    def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
        # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
-        sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
+        sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) # @UndefinedVariable
        sgd.run()
        self.optimization_runs.append(sgd)

@ -291,7 +286,7 @@ class model(parameterised):
            def f(x):
                self._set_params(x)
                return self.log_likelihood()
-            h = ndt.Hessian(f)
+            h = ndt.Hessian(f) # @UndefinedVariable
            A = -h(x)
            self._set_params(x)
        # check for almost zero components on the diagonal which screw up the cholesky
@ -300,7 +295,7 @@ class model(parameterised):
        return A

    def Laplace_evidence(self):
-        """Returns an estiamte of the model evidence based on the Laplace approximation.
+        """Returns an estiamte of the Model evidence based on the Laplace approximation.
        Uses a numerical estimate of the hessian if none is available analytically"""
        A = self.Laplace_covariance()
        try:
@ -310,12 +305,12 @@ class model(parameterised):
        return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld

    def __str__(self):
-        s = parameterised.__str__(self).split('\n')
+        s = Parameterised.__str__(self).split('\n')
        # add priors to the string
        if self.priors is not None:
            strs = [str(p) if p is not None else '' for p in self.priors]
        else:
-            strs = ['']*len(self._get_params())
+            strs = [''] * len(self._get_params())
        width = np.array(max([len(p) for p in strs] + [5])) + 4

        log_like = self.log_likelihood()
@ -336,7 +331,7 @@ class model(parameterised):

    def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
        """
-        Check the gradient of the model by comparing to a numerical estimate.
+        Check the gradient of the Model by comparing to a numerical estimate.
        If the verbose flag is passed, invividual components are tested (and printed)

        :param verbose: If True, print a "full" checking of each parameter
@ -389,7 +384,7 @@ class model(parameterised):
                param_list = range(len(x))
            else:
                param_list = self.grep_param_names(target_param, transformed=True, search=True)
-                if not param_list:
+                if not np.any(param_list):
                    print "No free parameters to check"
                    return

@ -419,15 +414,15 @@ class model(parameterised):

    def input_sensitivity(self):
        """
-        return an array describing the sesitivity of the model to each input
+        return an array describing the sesitivity of the Model to each input

        NB. Right now, we're basing this on the lengthscales (or
        variances) of the kernel.  TODO: proper sensitivity analysis
-        where we integrate across the model inputs and evaluate the
-        effect on the variance of the model output.  """
+        where we integrate across the Model inputs and evaluate the
+        effect on the variance of the Model output.  """

        if not hasattr(self, 'kern'):
-            raise ValueError, "this model has no kernel"
+            raise ValueError, "this Model has no kernel"

        k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
        if (not len(k) == 1) or (not k[0].ARD):
@ -474,8 +469,8 @@ class model(parameterised):
            ll_change = new_ll - last_ll

            if ll_change < 0:
-                self.likelihood = last_approximation  # restore previous likelihood approximation
-                self._set_params(last_params)  # restore model parameters
+                self.likelihood = last_approximation # restore previous likelihood approximation
+                self._set_params(last_params) # restore Model parameters
                print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
                stop = True
            else:
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@ -6,12 +6,10 @@ import numpy as np
 import re
 import copy
 import cPickle
-import os
-from ..util.squashers import sigmoid
 import warnings
 import transformations

-class parameterised(object):
+class Parameterised(object):
    def __init__(self):
        """
        This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters
@ -36,7 +34,7 @@ class parameterised(object):
        """
        Returns a **copy** of parameters in non transformed space

-        :see_also: :py:func:`GPy.core.parameterised.params_transformed`
+        :see_also: :py:func:`GPy.core.Parameterised.params_transformed`
        """
        return self._get_params()

@ -49,7 +47,7 @@ class parameterised(object):
        """
        Returns a **copy** of parameters in transformed space

-        :see_also: :py:func:`GPy.core.parameterised.params`
+        :see_also: :py:func:`GPy.core.Parameterised.params`
        """
        return self._get_params_transformed()

@ -113,7 +111,7 @@ class parameterised(object):
        if hasattr(self, 'prior'):
            pass

-        self._set_params_transformed(self._get_params_transformed())  # sets tied parameters to single value
+        self._set_params_transformed(self._get_params_transformed()) # sets tied parameters to single value

    def untie_everything(self):
        """Unties all parameters by setting tied_indices to an empty list."""
@ -145,7 +143,7 @@ class parameterised(object):
        else:
            return np.nonzero([regexp.match(name) for name in names])[0]

-    def Nparam_transformed(self):
+    def num_params_transformed(self):
        removed = 0
        for tie in self.tied_indices:
            removed += tie.size - 1
@ -159,18 +157,18 @@ class parameterised(object):
        """Unconstrain matching parameters.  does not untie parameters"""
        matches = self.grep_param_names(regexp)

-        #tranformed contraints:
+        # tranformed contraints:
        for match in matches:
-            self.constrained_indices = [i[i<>match] for i in self.constrained_indices]
+            self.constrained_indices = [i[i <> match] for i in self.constrained_indices]

-        #remove empty constraints
-        tmp = zip(*[(i,t) for i,t in zip(self.constrained_indices,self.constraints) if len(i)])
+        # remove empty constraints
+        tmp = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)])
        if tmp:
-            self.constrained_indices, self.constraints = zip(*[(i,t) for i,t in zip(self.constrained_indices,self.constraints) if len(i)])
+            self.constrained_indices, self.constraints = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)])
            self.constrained_indices, self.constraints = list(self.constrained_indices), list(self.constraints)

        # fixed:
-        self.fixed_values = [np.delete(values, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices,values in zip(self.fixed_indices,self.fixed_values)]
+        self.fixed_values = [np.delete(values, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices, values in zip(self.fixed_indices, self.fixed_values)]
        self.fixed_indices = [np.delete(indices, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices in self.fixed_indices]

        # remove empty elements
@ -189,7 +187,7 @@ class parameterised(object):
        """ Set positive constraints. """
        self.constrain(regexp, transformations.logexp())

-    def constrain_bounded(self, regexp,lower, upper):
+    def constrain_bounded(self, regexp, lower, upper):
        """ Set bounded constraints. """
        self.constrain(regexp, transformations.logistic(lower, upper))

@ -199,8 +197,8 @@ class parameterised(object):
        else:
            return np.empty(shape=(0,))

-    def constrain(self,regexp,transform):
-        assert isinstance(transform,transformations.transformation)
+    def constrain(self, regexp, transform):
+        assert isinstance(transform, transformations.transformation)

        matches = self.grep_param_names(regexp)
        overlap = set(matches).intersection(set(self.all_constrained_indices()))
@ -251,7 +249,7 @@ class parameterised(object):
    def _get_params_transformed(self):
        """use self._get_params to get the 'true' parameters of the model, which are then tied, constrained and fixed"""
        x = self._get_params()
-        [np.put(x,i,t.finv(x[i])) for i,t in zip(self.constrained_indices,self.constraints)]
+        [np.put(x, i, t.finv(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]

        to_remove = self.fixed_indices + [t[1:] for t in self.tied_indices]
        if len(to_remove):
@ -263,7 +261,7 @@ class parameterised(object):
        """ takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params"""
        self._set_params(self._untransform_params(x))

-    def _untransform_params(self,x):
+    def _untransform_params(self, x):
        """
        The transformation required for _set_params_transformed.

@ -290,9 +288,9 @@ class parameterised(object):
        [np.put(xx, i, v) for i, v in zip(self.fixed_indices, self.fixed_values)]
        [np.put(xx, i, v) for i, v in [(t[1:], xx[t[0]]) for t in self.tied_indices] ]

-        [np.put(xx,i,t.f(xx[i])) for i,t in zip(self.constrained_indices, self.constraints)]
-        if hasattr(self,'debug'):
-            stop
+        [np.put(xx, i, t.f(xx[i])) for i, t in zip(self.constrained_indices, self.constraints)]
+        if hasattr(self, 'debug'):
+            stop # @UndefinedVariable

        return xx

@ -316,7 +314,7 @@ class parameterised(object):
            remove = np.hstack((remove, np.hstack(self.fixed_indices)))

        # add markers to show that some variables are constrained
-        for i,t in zip(self.constrained_indices,self.constraints):
+        for i, t in zip(self.constrained_indices, self.constraints):
            for ii in i:
                n[ii] = n[ii] + t.__str__()

@ -333,10 +331,10 @@ class parameterised(object):
        if not N:
            return "This object has no free parameters."
        header = ['Name', 'Value', 'Constraints', 'Ties']
-        values = self._get_params()  # map(str,self._get_params())
+        values = self._get_params() # map(str,self._get_params())
        # sort out the constraints
        constraints = [''] * len(names)
-        for i,t in zip(self.constrained_indices,self.constraints):
+        for i, t in zip(self.constrained_indices, self.constraints):
            for ii in i:
                constraints[ii] = t.__str__()
        for i in self.fixed_indices:
@ -354,7 +352,7 @@ class parameterised(object):
        max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
        max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])])
        cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
-        columns = cols.sum()
+        # columns = cols.sum()

        header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
        header_string = map(lambda x: '|'.join(x), [header_string])
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -13,13 +13,13 @@ class SparseGP(GPBase):
    Variational sparse GP model

    :param X: inputs
-    :type X: np.ndarray (N x input_dim)
+    :type X: np.ndarray (num_data x input_dim)
    :param likelihood: a likelihood instance, containing the observed data
    :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
    :param kernel : the kernel (covariance function). See link kernels
    :type kernel: a GPy.kern.kern instance
    :param X_variance: The uncertainty in the measurements of X (Gaussian variance)
-    :type X_variance: np.ndarray (N x input_dim) | None
+    :type X_variance: np.ndarray (num_data x input_dim) | None
    :param Z: inducing inputs (optional, see note)
    :type Z: np.ndarray (num_inducing x input_dim) | None
    :param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None)
@ -69,7 +69,7 @@ class SparseGP(GPBase):
        # The rather complex computations of self.A
        if self.has_uncertain_inputs:
            if self.likelihood.is_heteroscedastic:
-                psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0)
+                psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.num_data, 1, 1))).sum(0)
            else:
                psi2_beta = self.psi2.sum(0) * self.likelihood.precision
            evals, evecs = linalg.eigh(psi2_beta)
@ -77,7 +77,7 @@ class SparseGP(GPBase):
            tmp = evecs * np.sqrt(clipped_evals)
        else:
            if self.likelihood.is_heteroscedastic:
-                tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)))
+                tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.num_data)))
            else:
                tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
        tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
@ -99,28 +99,28 @@ class SparseGP(GPBase):

        # Compute dL_dKmm
        tmp = tdot(self._LBi_Lmi_psi1V)
-        self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.input_dim * np.eye(self.num_inducing) + tmp)
+        self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.output_dim * np.eye(self.num_inducing) + tmp)
        tmp = -0.5 * self.DBi_plus_BiPBi
-        tmp += -0.5 * self.B * self.input_dim
-        tmp += self.input_dim * np.eye(self.num_inducing)
+        tmp += -0.5 * self.B * self.output_dim
+        tmp += self.output_dim * np.eye(self.num_inducing)
        self.dL_dKmm = backsub_both_sides(self.Lm, tmp)

        # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
-        self.dL_dpsi0 = -0.5 * self.input_dim * (self.likelihood.precision * np.ones([self.N, 1])).flatten()
+        self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten()
        self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T)
-        dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.input_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
+        dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)

        if self.likelihood.is_heteroscedastic:
            if self.has_uncertain_inputs:
                self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
            else:
-                self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.N))
+                self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.num_data))
                self.dL_dpsi2 = None
        else:
            dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
            if self.has_uncertain_inputs:
                # repeat for each of the N psi_2 matrices
-                self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.N, axis=0)
+                self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.num_data, axis=0)
            else:
                # subsume back into psi1 (==Kmn)
                self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1)
@ -135,26 +135,26 @@ class SparseGP(GPBase):
            raise NotImplementedError, "heteroscedatic derivates not implemented"
        else:
            # likelihood is not heterscedatic
-            self.partial_for_likelihood = -0.5 * self.N * self.input_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
-            self.partial_for_likelihood += 0.5 * self.input_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
+            self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
+            self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
            self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))

    def log_likelihood(self):
        """ Compute the (lower bound on the) log marginal likelihood """
        if self.likelihood.is_heteroscedastic:
-            A = -0.5 * self.N * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
+            A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
            B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
        else:
-            A = -0.5 * self.N * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
+            A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
            B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
        C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
        D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
        return A + B + C + D + self.likelihood.Z

    def _set_params(self, p):
-        self.Z = p[:self.num_inducing * self.output_dim].reshape(self.num_inducing, self.input_dim)
-        self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
-        self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
+        self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim)
+        self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.num_params])
+        self.likelihood._set_params(p[self.Z.size + self.kern.num_params:])
        self._compute_kernel_matrices()
        self._computations()