Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-06-08 15:05:15 +02:00 · 2013-10-08 11:31:13 +01:00 · 2013-10-08 11:31:13 +01:00 · 958e9f7c7a
commit 958e9f7c7a
parent 05a912f40b 87200c6e12
104 changed files with 5704 additions and 1159 deletions
--- a/GPy/core/fitc.py
+++ b/GPy/core/fitc.py
@ -11,25 +11,27 @@ from sparse_gp import SparseGP

 class FITC(SparseGP):
    """
-    sparse FITC approximation
+
+    Sparse FITC approximation

    :param X: inputs
    :type X: np.ndarray (num_data x Q)
    :param likelihood: a likelihood instance, containing the observed data
    :type likelihood: GPy.likelihood.(Gaussian | EP)
-    :param kernel : the kernel (covariance function). See link kernels
+    :param kernel: the kernel (covariance function). See link kernels
    :type kernel: a GPy.kern.kern instance
    :param Z: inducing inputs (optional, see note)
    :type Z: np.ndarray (M x Q) | None
-    :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
+    :param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
    :type normalize_(X|Y): bool
+
    """

    def __init__(self, X, likelihood, kernel, Z, normalize_X=False):
        SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False)
        assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs"

-    def update_likelihood_approximation(self):
+    def update_likelihood_approximation(self, **kwargs):
        """
        Approximates a non-Gaussian likelihood using Expectation Propagation

@ -37,7 +39,7 @@ class FITC(SparseGP):
        this function does nothing
        """
        self.likelihood.restart()
-        self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
+        self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0, **kwargs)
        self._set_params(self._get_params())

    def _compute_kernel_matrices(self):
@ -120,7 +122,7 @@ class FITC(SparseGP):
            _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
            self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
            self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
-            self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z)
+            self._dKmm_dX += self.kern.dK_dX(_dKmm ,self.Z)
            self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])

        # the partial derivative vector for the likelihood
@ -140,7 +142,6 @@ class FITC(SparseGP):

            dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
            dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) *  Lmi_psi1 * dbstar_dnoise.T)
-            dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) *  Lmi_psi1 * dbstar_dnoise.T)

            dD_dnoise_1 =  mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
            alpha = mdot(LBiLmipsi1,self.V_star)
@ -174,7 +175,7 @@ class FITC(SparseGP):

    def dL_dZ(self):
        dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
-        dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z)
+        dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z)
        dL_dZ += self._dpsi1_dX
        dL_dZ += self._dKmm_dX
        return dL_dZ
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -15,13 +15,10 @@ class GP(GPBase):

    :param X: input observations
    :param kernel: a GPy kernel, defaults to rbf+white
-    :parm likelihood: a GPy likelihood
+    :param likelihood: a GPy likelihood
    :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
    :type normalize_X: False|True
    :rtype: model object
-    :param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
-    :param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
-    :type powerep: list

    .. Note:: Multiple independent outputs are allowed using columns of Y

@ -61,11 +58,10 @@ class GP(GPBase):
    def _get_params(self):
        return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))

-
    def _get_param_names(self):
        return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()

-    def update_likelihood_approximation(self):
+    def update_likelihood_approximation(self, **kwargs):
        """
        Approximates a non-gaussian likelihood using Expectation Propagation

@ -73,7 +69,7 @@ class GP(GPBase):
        this function does nothing
        """
        self.likelihood.restart()
-        self.likelihood.fit_full(self.kern.K(self.X))
+        self.likelihood.fit_full(self.kern.K(self.X), **kwargs)
        self._set_params(self._get_params()) # update the GP

    def _model_fit_term(self):
@ -105,7 +101,12 @@ class GP(GPBase):

        Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
        """
-        return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
+        #return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
+        if not isinstance(self.likelihood,EP):
+            tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
+        else:
+            tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
+        return tmp

    def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
        """
@ -127,20 +128,19 @@ class GP(GPBase):
            debug_this # @UndefinedVariable
        return mu, var

-    def predict(self, Xnew, which_parts='all', full_cov=False, likelihood_args=dict()):
+    def predict(self, Xnew, which_parts='all', full_cov=False, **likelihood_args):
        """
        Predict the function(s) at the new point(s) Xnew.
-        Arguments
-        ---------
+
        :param Xnew: The points at which to make a prediction
        :type Xnew: np.ndarray, Nnew x self.input_dim
        :param which_parts:  specifies which outputs kernel(s) to use in prediction
        :type which_parts: ('all', list of bools)
-        :param full_cov: whether to return the folll covariance matrix, or just the diagonal
+        :param full_cov: whether to return the full covariance matrix, or just the diagonal
        :type full_cov: bool
-        :rtype: posterior mean,  a Numpy array, Nnew x self.input_dim
-        :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
-        :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.input_dim
+        :returns: mean: posterior mean,  a Numpy array, Nnew x self.input_dim
+        :returns: var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
+        :returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.input_dim


           If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
@ -153,5 +153,43 @@ class GP(GPBase):

        # now push through likelihood
        mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
-
        return mean, var, _025pm, _975pm
+
+    def _raw_predict_single_output(self, _Xnew, output, which_parts='all', full_cov=False,stop=False):
+        """
+        For a specific output, calls _raw_predict() at the new point(s) _Xnew.
+        This functions calls _add_output_index(), so _Xnew should not have an index column specifying the output.
+        ---------
+
+        :param Xnew: The points at which to make a prediction
+        :type Xnew: np.ndarray, Nnew x self.input_dim
+        :param output: output to predict
+        :type output: integer in {0,..., output_dim-1}
+        :param which_parts:  specifies which outputs kernel(s) to use in prediction
+        :type which_parts: ('all', list of bools)
+        :param full_cov: whether to return the full covariance matrix, or just the diagonal
+
+        .. Note:: For multiple non-independent outputs models only.
+        """
+        _Xnew = self._add_output_index(_Xnew, output)
+        return self._raw_predict(_Xnew, which_parts=which_parts,full_cov=full_cov, stop=stop)
+
+    def predict_single_output(self, Xnew,output=0, which_parts='all', full_cov=False, likelihood_args=dict()):
+        """
+        For a specific output, calls predict() at the new point(s) Xnew.
+        This functions calls _add_output_index(), so Xnew should not have an index column specifying the output.
+
+        :param Xnew: The points at which to make a prediction
+        :type Xnew: np.ndarray, Nnew x self.input_dim
+        :param which_parts:  specifies which outputs kernel(s) to use in prediction
+        :type which_parts: ('all', list of bools)
+        :param full_cov: whether to return the full covariance matrix, or just the diagonal
+        :type full_cov: bool
+        :returns: mean: posterior mean,  a Numpy array, Nnew x self.input_dim
+        :returns: var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
+        :returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.input_dim
+
+        .. Note:: For multiple non-independent outputs models only.
+        """
+        Xnew = self._add_output_index(Xnew, output)
+        return self.predict(Xnew, which_parts=which_parts, full_cov=full_cov, likelihood_args=likelihood_args)
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@ -3,13 +3,14 @@ from .. import kern
 from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D
 import pylab as pb
 from GPy.core.model import Model
+import warnings
+from ..likelihoods import Gaussian, Gaussian_Mixed_Noise

 class GPBase(Model):
    """
    Gaussian process base model for holding shared behaviour between
    sparse_GP and GP models.
    """
-
    def __init__(self, X, likelihood, kernel, normalize_X=False):
        self.X = X
        assert len(self.X.shape) == 2
@ -57,18 +58,64 @@ class GPBase(Model):
        self.X = state.pop()
        Model.setstate(self, state)

+    def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True):
+        """
+        Samples the posterior GP at the points X.
+
+        :param X: The points at which to take the samples.
+        :type X: np.ndarray, Nnew x self.input_dim.
+        :param size: the number of a posteriori samples to plot.
+        :type size: int.
+        :param which_parts: which of the kernel functions to plot (additively).
+        :type which_parts: 'all', or list of bools.
+        :param full_cov: whether to return the full covariance matrix, or just the diagonal.
+        :type full_cov: bool.
+        :returns: Ysim: set of simulations, a Numpy array (N x samples).
+        """
+        m, v = self._raw_predict(X, which_parts=which_parts, full_cov=full_cov)
+        v = v.reshape(m.size,-1) if len(v.shape)==3 else v
+        if not full_cov:
+            Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
+        else:
+            Ysim = np.random.multivariate_normal(m.flatten(), v, size).T
+
+        return Ysim
+
+    def posterior_samples(self,X,size=10,which_parts='all',full_cov=True,noise_model=None):
+        """
+        Samples the posterior GP at the points X.
+
+        :param X: the points at which to take the samples.
+        :type X: np.ndarray, Nnew x self.input_dim.
+        :param size: the number of a posteriori samples to plot.
+        :type size: int.
+        :param which_parts: which of the kernel functions to plot (additively).
+        :type which_parts: 'all', or list of bools.
+        :param full_cov: whether to return the full covariance matrix, or just the diagonal.
+        :type full_cov: bool.
+        :param noise_model: for mixed noise likelihood, the noise model to use in the samples.
+        :type noise_model: integer.
+        :returns: Ysim: set of simulations, a Numpy array (N x samples).
+        """
+        Ysim = self.posterior_samples_f(X, size, which_parts=which_parts, full_cov=full_cov)
+        if isinstance(self.likelihood,Gaussian):
+            noise_std = np.sqrt(self.likelihood._get_params())
+            Ysim += np.random.normal(0,noise_std,Ysim.shape)
+        elif isinstance(self.likelihood,Gaussian_Mixed_Noise):
+            assert noise_model is not None, "A noise model must be specified."
+            noise_std = np.sqrt(self.likelihood._get_params()[noise_model])
+            Ysim += np.random.normal(0,noise_std,Ysim.shape)
+        else:
+            Ysim = self.likelihood.noise_model.samples(Ysim)
+
+        return Ysim
+
    def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
        """
        Plot the GP's view of the world, where the data is normalized and the
-        likelihood is Gaussian.
-
-        Plot the posterior of the GP.
          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
          - In two dimsensions, a contour-plot shows the mean predicted function
-          - In higher dimensions, we've no implemented this yet !TODO!
-
-        Can plot only part of the data and part of the posterior functions
-        using which_data and which_functions
+          - Not implemented in higher dimensions

        :param samples: the number of a posteriori samples to plot
        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
@ -85,6 +132,8 @@ class GPBase(Model):
        :param ax: axes to plot on.
        :type ax: axes handle

+        :param output: which output to plot (for multiple output models only)
+        :type output: integer (first output is 0)
        """
        if which_data == 'all':
            which_data = slice(None)
@ -94,17 +143,16 @@ class GPBase(Model):
            ax = fig.add_subplot(111)

        if self.X.shape[1] == 1:
+            resolution = resolution or 200
            Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
-            if samples == 0:
-                m, v = self._raw_predict(Xnew, which_parts=which_parts)
-                gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
-                ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
-            else:
-                m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
-                Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
-                gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
-                for i in range(samples):
-                    ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
+
+            m, v = self._raw_predict(Xnew, which_parts=which_parts)
+            if samples:
+                Ysim = self.posterior_samples_f(Xnew, samples, which_parts=which_parts, full_cov=True)
+                for yi in Ysim.T:
+                    ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
+            gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
+
            ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
            ax.set_xlim(xmin, xmax)
            ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
@ -112,6 +160,7 @@ class GPBase(Model):
            ax.set_ylim(ymin, ymax)

        elif self.X.shape[1] == 2:
+
            resolution = resolution or 50
            Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
            m, v = self._raw_predict(Xnew, which_parts=which_parts)
@ -120,6 +169,10 @@ class GPBase(Model):
            ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
            ax.set_xlim(xmin[0], xmax[0])
            ax.set_ylim(xmin[1], xmax[1])
+
+            if samples:
+                warnings.warn("Samples only implemented for 1 dimensional inputs.")
+
        else:
            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

@ -130,7 +183,7 @@ class GPBase(Model):
        Plot the posterior of the GP.
          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
          - In two dimsensions, a contour-plot shows the mean predicted function
-          - In higher dimensions, we've no implemented this yet !TODO!
+          - Not implemented in higher dimensions

        Can plot only part of the data and part of the posterior functions
        using which_data and which_functions
@ -151,17 +204,14 @@ class GPBase(Model):
        :type fignum: figure number
        :param ax: axes to plot on.
        :type ax: axes handle
+        :type output: integer (first output is 0)
        :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
        :type fixed_inputs: a list of tuples
        :param linecol: color of line to plot.
        :type linecol:
        :param fillcol: color of fill
-        :type fillcol: 
-        :param levels: for 2D plotting, the number of contour levels to use
-        is ax is None, create a new figure
-
+        :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
        """
-        # TODO include samples
        if which_data == 'all':
            which_data = slice(None)

@ -170,10 +220,10 @@ class GPBase(Model):
            ax = fig.add_subplot(111)

        plotdims = self.input_dim - len(fixed_inputs)
-
        if plotdims == 1:
+            resolution = resolution or 200

-            Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
+            Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now

            fixed_dims = np.array([i for i,v in fixed_inputs])
            freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
@ -184,7 +234,14 @@ class GPBase(Model):
            for i,v in fixed_inputs:
                Xgrid[:,i] = v

-            m, _, lower, upper = self.predict(Xgrid, which_parts=which_parts)
+            m, v, lower, upper = self.predict(Xgrid, which_parts=which_parts)
+
+            if samples: #NOTE not tested with fixed_inputs
+                Ysim = self.posterior_samples(Xgrid, samples, which_parts=which_parts, full_cov=True)
+                for yi in Ysim.T:
+                    ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
+                    #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
+
            for d in range(m.shape[1]):
                gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
                ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5)
@ -193,17 +250,171 @@ class GPBase(Model):
            ax.set_xlim(xmin, xmax)
            ax.set_ylim(ymin, ymax)

-        elif self.X.shape[1] == 2: # FIXME
+        elif self.X.shape[1] == 2:
+
            resolution = resolution or 50
            Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
            x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
            m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
            m = m.reshape(resolution, resolution).T
            ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
-            Yf = self.likelihood.data.flatten()
+            Yf = self.likelihood.Y.flatten()
            ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
            ax.set_xlim(xmin[0], xmax[0])
            ax.set_ylim(xmin[1], xmax[1])

+            if samples:
+                warnings.warn("Samples only implemented for 1 dimensional inputs.")
+
        else:
            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+
+    def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
+        """
+        For a specific output, in a multioutput model, this function works just as plot_f on single output models.
+
+        :param output: which output to plot (for multiple output models only)
+        :type output: integer (first output is 0)
+        :param samples: the number of a posteriori samples to plot
+        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
+        :param which_data: which if the training data to plot (default all)
+        :type which_data: 'all' or a slice object to slice self.X, self.Y
+        :param which_parts: which of the kernel functions to plot (additively)
+        :type which_parts: 'all', or list of bools
+        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
+        :type resolution: int
+        :param full_cov:
+        :type full_cov: bool
+                :param fignum: figure to plot on.
+        :type fignum: figure number
+        :param ax: axes to plot on.
+        :type ax: axes handle
+        """
+        assert output is not None, "An output must be specified."
+        assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1)
+
+        if which_data == 'all':
+            which_data = slice(None)
+
+        if ax is None:
+            fig = pb.figure(num=fignum)
+            ax = fig.add_subplot(111)
+
+        if self.X.shape[1] == 2:
+            Xu = self.X[self.X[:,-1]==output ,0:1]
+            Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
+            Xnew_indexed = self._add_output_index(Xnew,output)
+
+            m, v = self._raw_predict(Xnew_indexed, which_parts=which_parts)
+
+            if samples:
+                Ysim = self.posterior_samples_f(Xnew_indexed, samples, which_parts=which_parts, full_cov=True)
+                for yi in Ysim.T:
+                    ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
+
+            gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
+            ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
+            ax.set_xlim(xmin, xmax)
+            ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
+            ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
+            ax.set_ylim(ymin, ymax)
+
+        elif self.X.shape[1] == 3:
+            raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
+            #if samples:
+            #    warnings.warn("Samples only implemented for 1 dimensional inputs.")
+
+        else:
+            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+
+
+    def plot_single_output(self, output=None, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
+        """
+        For a specific output, in a multioutput model, this function works just as plot_f on single output models.
+
+        :param output: which output to plot (for multiple output models only)
+        :type output: integer (first output is 0)
+        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
+        :type plot_limits: np.array
+        :param which_data: which if the training data to plot (default all)
+        :type which_data: 'all' or a slice object to slice self.X, self.Y
+        :param which_parts: which of the kernel functions to plot (additively)
+        :type which_parts: 'all', or list of bools
+        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
+        :type resolution: int
+        :param levels: number of levels to plot in a contour plot.
+        :type levels: int
+        :param samples: the number of a posteriori samples to plot
+        :type samples: int
+        :param fignum: figure to plot on.
+        :type fignum: figure number
+        :param ax: axes to plot on.
+        :type ax: axes handle
+        :type output: integer (first output is 0)
+        :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
+        :type fixed_inputs: a list of tuples
+        :param linecol: color of line to plot.
+        :type linecol:
+        :param fillcol: color of fill
+        :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
+        """
+        assert output is not None, "An output must be specified."
+        assert len(self.likelihood.noise_model_list) > output, "The model has only %s outputs." %(self.output_dim + 1)
+        if which_data == 'all':
+            which_data = slice(None)
+
+        if ax is None:
+            fig = pb.figure(num=fignum)
+            ax = fig.add_subplot(111)
+
+        if self.X.shape[1] == 2:
+            resolution = resolution or 200
+
+            Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest
+            Xu = self.X * self._Xscale + self._Xoffset
+            Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column
+
+            Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
+            Xnew_indexed = self._add_output_index(Xnew,output)
+
+
+            m, v, lower, upper = self.predict(Xnew_indexed, which_parts=which_parts,noise_model=output)
+
+            if samples: #NOTE not tested with fixed_inputs
+                Ysim = self.posterior_samples(Xnew_indexed, samples, which_parts=which_parts, full_cov=True,noise_model=output)
+                for yi in Ysim.T:
+                    ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
+
+            for d in range(m.shape[1]):
+                gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
+                ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5)
+            ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
+            ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
+            ax.set_xlim(xmin, xmax)
+            ax.set_ylim(ymin, ymax)
+
+        elif self.X.shape[1] == 3:
+            raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
+            #if samples:
+            #    warnings.warn("Samples only implemented for 1 dimensional inputs.")
+
+        else:
+            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+
+
+    def _add_output_index(self,X,output):
+        """
+        In a multioutput model, appends an index column to X to specify the output it is related to.
+
+        :param X: Input data
+        :type X: np.ndarray, N x self.input_dim
+        :param output: output X is related to
+        :type output: integer in {0,..., output_dim-1}
+
+        .. Note:: For multiple non-independent outputs models only.
+        """
+
+        assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
+
+        index = np.ones((X.shape[0],1))*output
+        return np.hstack((X,index))
--- a/GPy/core/mapping.py
+++ b/GPy/core/mapping.py
@ -49,6 +49,7 @@ class Mapping(Parameterized):

    def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue']):
        """
+
        Plot the mapping.
        
        Plots the mapping associated with the model.
@ -79,8 +80,7 @@ class Mapping(Parameterized):
        :type fixed_inputs: a list of tuples
        :param linecol: color of line to plot.
        :type linecol:
-        :param levels: for 2D plotting, the number of contour levels to use
-        is ax is None, create a new figure
+        :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure

        """
        # TODO include samples
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -1,4 +1,4 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2012, 2013, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)


@ -31,8 +31,8 @@ class Model(Parameterized):
    def getstate(self):
        """
        Get the current state of the class.
-        
        Inherited from Parameterized, so add those parameters to the state
+
        :return: list of states from the model.

        """
@ -46,7 +46,8 @@ class Model(Parameterized):
        call Parameterized with the rest of the state

        :param state: the state of the model.
-        :type state: list as returned from getstate.        
+        :type state: list as returned from getstate.
+
        """
        self.preferred_optimizer = state.pop()
        self.sampling_runs = state.pop()
@ -56,10 +57,11 @@ class Model(Parameterized):

    def set_prior(self, regexp, what):
        """
+
        Sets priors on the model parameters.

-        Notes
-        -----
+        **Notes**
+
        Asserts that the prior is suitable for the constraint. If the
        wrong constraint is in place, an error is raised.  If no
        constraint is in place, one is added (warning printed).
@ -185,8 +187,8 @@ class Model(Parameterized):
        be handled silently.  If _all_ runs fail, the model is reset to the
        existing parameter values.

-        Notes
-        -----
+        **Notes**
+
        :param num_restarts: number of restarts to use (default 10)
        :type num_restarts: int
        :param robust: whether to handle exceptions silently or not (default False)
@ -195,7 +197,9 @@ class Model(Parameterized):
        :type parallel: bool
        :param num_processes: number of workers in the multiprocessing pool
        :type numprocesses: int
-        **kwargs are passed to the optimizer. They can be:
+
+        \*\*kwargs are passed to the optimizer. They can be:
+
        :param max_f_eval: maximum number of function evaluations
        :type max_f_eval: int
        :param max_iters: maximum number of iterations
@ -203,9 +207,7 @@ class Model(Parameterized):
        :param messages: whether to display during optimisation
        :type messages: bool

-        ..Note: If num_processes is None, the number of workes in the multiprocessing pool is automatically
-        set to the number of processors on the current machine.
-
+        .. note:: If num_processes is None, the number of workes in the multiprocessing pool is automatically set to the number of processors on the current machine.

        """
        initial_parameters = self._get_params_transformed()
@ -397,17 +399,20 @@ class Model(Parameterized):
            return np.nan
        return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld

-    def __str__(self, names=None):
-        if names is None:
-            names = self._get_print_names()
-        s = Parameterized.__str__(self, names=names).split('\n')
+    def __str__(self):
+        s = Parameterized.__str__(self).split('\n')
+        #def __str__(self, names=None):
+        #    if names is None:
+        #        names = self._get_print_names()
+        #s = Parameterized.__str__(self, names=names).split('\n')
        # add priors to the string
        if self.priors is not None:
            strs = [str(p) if p is not None else '' for p in self.priors]
        else:
-            strs = [''] * len(self._get_param_names())
-        name_indices = self.grep_param_names("|".join(names))
-        strs = np.array(strs)[name_indices]
+            strs = [''] * len(self._get_params())
+       #         strs = [''] * len(self._get_param_names())
+       #     name_indices = self.grep_param_names("|".join(names))
+       #     strs = np.array(strs)[name_indices]
        width = np.array(max([len(p) for p in strs] + [5])) + 4

        log_like = self.log_likelihood()
@ -456,9 +461,9 @@ class Model(Parameterized):
            gradient = self.objective_function_gradients(x)

            numerical_gradient = (f1 - f2) / (2 * dx)
-            global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))
-
-            return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() - 1) < tolerance
+            global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient)))
+            
+            return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance)
        else:
            # check the gradient of each parameter individually, and do some pretty printing
            try:
@ -496,7 +501,7 @@ class Model(Parameterized):
                gradient = self.objective_function_gradients(x)[i]

                numerical_gradient = (f1 - f2) / (2 * step)
-                ratio = (f1 - f2) / (2 * step * gradient)
+                ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient))
                difference = np.abs((f1 - f2) / 2 / step - gradient)

                if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
@ -535,22 +540,17 @@ class Model(Parameterized):
            return k.variances


-    def pseudo_EM(self, epsilon=.1, **kwargs):
+    def pseudo_EM(self, stop_crit=.1, **kwargs):
        """
-        TODO: Should this not bein the GP class?
        EM - like algorithm  for Expectation Propagation and Laplace approximation

-        kwargs are passed to the optimize function. They can be:
-
-        :epsilon: convergence criterion
-        :max_f_eval: maximum number of function evaluations
-        :messages: whether to display during optimisation
-        :param optimzer: whice optimizer to use (defaults to self.preferred optimizer)
-        :type optimzer: string TODO: valid strings?
+        :param stop_crit: convergence criterion
+        :type stop_crit: float

+        .. Note: kwargs are passed to update_likelihood and optimize functions. 
        """
-        assert isinstance(self.likelihood, likelihoods.EP), "pseudo_EM is only available for EP likelihoods"
-        ll_change = epsilon + 1.
+        assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods"
+        ll_change = stop_crit + 1.
        iteration = 0
        last_ll = -np.inf

@ -558,10 +558,25 @@ class Model(Parameterized):
        alpha = 0
        stop = False

+        #Handle **kwargs
+        ep_args = {}
+        for arg in kwargs.keys():
+            if arg in ('epsilon','power_ep'):
+                ep_args[arg] = kwargs[arg]
+                del kwargs[arg]
+
        while not stop:
            last_approximation = self.likelihood.copy()
            last_params = self._get_params()
-            self.update_likelihood_approximation()
+            if len(ep_args) == 2:
+                self.update_likelihood_approximation(epsilon=ep_args['epsilon'],power_ep=ep_args['power_ep'])
+            elif len(ep_args) == 1:
+                if  ep_args.keys()[0] == 'epsilon':
+                    self.update_likelihood_approximation(epsilon=ep_args['epsilon'])
+                elif ep_args.keys()[0] == 'power_ep':
+                    self.update_likelihood_approximation(power_ep=ep_args['power_ep'])
+            else:
+                self.update_likelihood_approximation()
            new_ll = self.log_likelihood()
            ll_change = new_ll - last_ll

@ -573,7 +588,7 @@ class Model(Parameterized):
            else:
                self.optimize(**kwargs)
                last_ll = self.log_likelihood()
-                if ll_change < epsilon:
+                if ll_change < stop_crit:
                    stop = True
            iteration += 1
            if stop:
--- a/GPy/core/parameterized.py
+++ b/GPy/core/parameterized.py
@ -27,9 +27,9 @@ class Parameterized(object):

    def _get_param_names(self):
        raise NotImplementedError, "this needs to be implemented to use the Parameterized class"
-    def _get_print_names(self):
-        """ Override for which names to print out, when using print m """
-        return self._get_param_names()
+    #def _get_print_names(self):
+    #    """ Override for which names to print out, when using print m """
+    #    return self._get_param_names()

    def pickle(self, filename, protocol=None):
        if protocol is None:
@ -63,10 +63,10 @@ class Parameterized(object):
        """
        Get the current state of the class,
        here just all the indices, rest can get recomputed
-        
        For inheriting from Parameterized:
-        Allways append the state of the inherited object 
-        and call down to the inherited object in setstate!! 
+
+        Allways append the state of the inherited object
+        and call down to the inherited object in setstate!!
        """
        return [self.tied_indices,
                self.fixed_indices,
@ -231,17 +231,19 @@ class Parameterized(object):

    def constrain_fixed(self, regexp, value=None):
        """
-        Arguments
-        ---------
+
        :param regexp: which parameters need to be fixed.
        :type regexp: ndarray(dtype=int) or regular expression object or string
        :param value: the vlaue to fix the parameters to. If the value is not specified,
                 the parameter is fixed to the current value
        :type value: float
-        Notes
-        -----
+
+        **Notes**
+
        Fixing a parameter which is tied to another, or constrained in some way will result in an error.
-        To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
+
+        To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes.
+
        """
        matches = self.grep_param_names(regexp)
        overlap = set(matches).intersection(set(self.all_constrained_indices()))
@ -336,26 +338,30 @@ class Parameterized(object):
        n = [nn for i, nn in enumerate(n) if not i in remove]
        return n

-    @property
-    def all(self):
-        return self.__str__(self._get_param_names())
+    #@property
+    #def all(self):
+    #    return self.__str__(self._get_param_names())


-    def __str__(self, names=None, nw=30):
+    #def __str__(self, names=None, nw=30):
+    def __str__(self, nw=30):
        """
        Return a string describing the parameter names and their ties and constraints
        """
-        if names is None:
-            names = self._get_print_names()
-        name_indices = self.grep_param_names("|".join(names))
+        names = self._get_param_names()
+        #if names is None:
+        #    names = self._get_print_names()
+        #name_indices = self.grep_param_names("|".join(names))
        N = len(names)

        if not N:
            return "This object has no free parameters."
        header = ['Name', 'Value', 'Constraints', 'Ties']
-        values = self._get_params()[name_indices] # map(str,self._get_params())
+        values = self._get_params() # map(str,self._get_params())
+        #values = self._get_params()[name_indices] # map(str,self._get_params())
        # sort out the constraints
-        constraints = [''] * len(self._get_param_names())
+        constraints = [''] * len(names)
+        #constraints = [''] * len(self._get_param_names())
        for i, t in zip(self.constrained_indices, self.constraints):
            for ii in i:
                constraints[ii] = t.__str__()
@ -368,7 +374,10 @@ class Parameterized(object):
            for j in tie:
                ties[j] = '(' + str(i) + ')'

-        values = ['%.4f' % float(v) for v in values]
+        if values.size == 1:
+            values = ['%.4f' %float(values)]
+        else:
+            values = ['%.4f' % float(v) for v in values]
        max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
        max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])])
        max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
@ -383,3 +392,77 @@ class Parameterized(object):


        return ('\n'.join([header_string[0], separator] + param_string)) + '\n'
+
+    def grep_model(self,regexp):
+        regexp_indices = self.grep_param_names(regexp)
+        all_names = self._get_param_names()
+
+        names = [all_names[pj] for pj in regexp_indices]
+        N = len(names)
+
+        if not N:
+            return "Match not found."
+
+        header = ['Name', 'Value', 'Constraints', 'Ties']
+        all_values = self._get_params()
+        values = np.array([all_values[pj] for pj in regexp_indices])
+        constraints = [''] * len(names)
+
+        _constrained_indices,aux = self._pick_elements(regexp_indices,self.constrained_indices)
+        _constraints = [self.constraints[pj] for pj in aux]
+
+        for i, t in zip(_constrained_indices, _constraints):
+            for ii in i:
+                iii = regexp_indices.tolist().index(ii)
+                constraints[iii] = t.__str__()
+
+        _fixed_indices,aux = self._pick_elements(regexp_indices,self.fixed_indices)
+        for i in _fixed_indices:
+            for ii in i:
+                iii = regexp_indices.tolist().index(ii)
+                constraints[ii] = 'Fixed'
+
+        _tied_indices,aux = self._pick_elements(regexp_indices,self.tied_indices)
+        ties = [''] * len(names)
+        for i,ti in zip(_tied_indices,aux):
+            for ii in i:
+                iii = regexp_indices.tolist().index(ii)
+                ties[iii] = '(' + str(ti) + ')'
+
+        if values.size == 1:
+            values = ['%.4f' %float(values)]
+        else:
+            values = ['%.4f' % float(v) for v in values]
+
+        max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
+        max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])])
+        max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
+        max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])])
+        cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
+
+        header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
+        header_string = map(lambda x: '|'.join(x), [header_string])
+        separator = '-' * len(header_string[0])
+        param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n=names[i], v=values[i], c=constraints[i], t=ties[i], c0=cols[0], c1=cols[1], c2=cols[2], c3=cols[3]) for i in range(len(values))]
+
+        print header_string[0]
+        print separator
+        for string in param_string:
+            print string
+
+    def _pick_elements(self,regexp_ind,array_list):
+        """Removes from array_list the elements different from regexp_ind"""
+        new_array_list = [] #New list with elements matching regexp_ind
+        array_indices = [] #Indices that matches the arrays in new_array_list and array_list
+
+        array_index = 0
+        for array in array_list:
+            _new = []
+            for ai in array:
+                if ai in regexp_ind:
+                    _new.append(ai)
+            if len(_new):
+                new_array_list.append(np.array(_new))
+                array_indices.append(array_index)
+            array_index += 1
+        return new_array_list, array_indices
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -5,7 +5,7 @@ import numpy as np
 import pylab as pb
 from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri
 from scipy import linalg
-from ..likelihoods import Gaussian
+from ..likelihoods import Gaussian, EP,EP_Mixed_Noise
 from gp_base import GPBase

 class SparseGP(GPBase):
@ -16,16 +16,17 @@ class SparseGP(GPBase):
    :type X: np.ndarray (num_data x input_dim)
    :param likelihood: a likelihood instance, containing the observed data
    :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
-    :param kernel : the kernel (covariance function). See link kernels
+    :param kernel: the kernel (covariance function). See link kernels
    :type kernel: a GPy.kern.kern instance
    :param X_variance: The uncertainty in the measurements of X (Gaussian variance)
    :type X_variance: np.ndarray (num_data x input_dim) | None
    :param Z: inducing inputs (optional, see note)
    :type Z: np.ndarray (num_inducing x input_dim) | None
-    :param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None)
+    :param num_inducing: Number of inducing points (optional, default 10. Ignored if Z is not None)
    :type num_inducing: int
-    :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
+    :param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
    :type normalize_(X|Y): bool
+
    """

    def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
@ -33,7 +34,6 @@ class SparseGP(GPBase):

        self.Z = Z
        self.num_inducing = Z.shape[0]
-#         self.likelihood = likelihood

        if X_variance is None:
            self.has_uncertain_inputs = False
@ -109,7 +109,6 @@ class SparseGP(GPBase):
        tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1)
        self._A = tdot(tmp)

-
        # factor B
        self.B = np.eye(self.num_inducing) + self._A
        self.LB = jitchol(self.B)
@ -139,6 +138,7 @@ class SparseGP(GPBase):
        dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)

        if self.likelihood.is_heteroscedastic:
+
            if self.has_uncertain_inputs:
                self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
            else:
@ -160,9 +160,27 @@ class SparseGP(GPBase):
            # save computation here.
            self.partial_for_likelihood = None
        elif self.likelihood.is_heteroscedastic:
-            raise NotImplementedError, "heteroscedatic derivates not implemented"
+
+            if self.has_uncertain_inputs:
+                raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
+
+            else:
+
+                LBi = chol_inv(self.LB)
+                Lmi_psi1, nil = dtrtrs(self._Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0)
+                _LBi_Lmi_psi1, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
+
+
+                self.partial_for_likelihood = -0.5 * self.likelihood.precision + 0.5 * self.likelihood.V**2
+                self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0 - np.sum(Lmi_psi1**2,0))[:,None] * self.likelihood.precision**2
+
+                self.partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*self.likelihood.precision**2
+
+                self.partial_for_likelihood += -np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * self.likelihood.Y * self.likelihood.precision**2
+                self.partial_for_likelihood += 0.5*np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * self.likelihood.precision**2
+
        else:
-            # likelihood is not heterscedatic
+            # likelihood is not heteroscedatic
            self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
            self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision)
            self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit)
@ -194,10 +212,10 @@ class SparseGP(GPBase):
        return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], [])\
            + self.kern._get_param_names_transformed() + self.likelihood._get_param_names()

-    def _get_print_names(self):
-        return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
+    #def _get_print_names(self):
+    #    return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()

-    def update_likelihood_approximation(self):
+    def update_likelihood_approximation(self, **kwargs):
        """
        Approximates a non-gaussian likelihood using Expectation Propagation

@ -211,10 +229,10 @@ class SparseGP(GPBase):
                Kmmi = tdot(Lmi.T)
                diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)])

-                self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
+                self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi, **kwargs) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
                # raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
            else:
-                self.likelihood.fit_DTC(self.Kmm, self.psi1.T)
+                self.likelihood.fit_DTC(self.Kmm, self.psi1.T, **kwargs)
                # self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
                self._set_params(self._get_params()) # update the GP

@ -240,7 +258,7 @@ class SparseGP(GPBase):
        """
        The derivative of the bound wrt the inducing inputs Z
        """
-        dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
+        dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z)
        if self.has_uncertain_inputs:
            dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
            dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
@ -274,7 +292,7 @@ class SparseGP(GPBase):
                Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
                var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
        else:
-            # assert which_p.Tarts=='all', "swithching out parts of variational kernels is not implemented"
+            # assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
            Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
            mu = np.dot(Kx, self.Cpsi1V)
            if full_cov:
@ -286,19 +304,19 @@ class SparseGP(GPBase):

        return mu, var[:, None]

-    def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
+    def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False, **likelihood_args):
        """
        Predict the function(s) at the new point(s) Xnew.

-        Arguments
-        ---------
+        **Arguments**
+
        :param Xnew: The points at which to make a prediction
        :type Xnew: np.ndarray, Nnew x self.input_dim
        :param X_variance_new: The uncertainty in the prediction points
        :type X_variance_new: np.ndarray, Nnew x self.input_dim
        :param which_parts:  specifies which outputs kernel(s) to use in prediction
        :type which_parts: ('all', list of bools)
-        :param full_cov: whether to return the folll covariance matrix, or just the diagonal
+        :param full_cov: whether to return the full covariance matrix, or just the diagonal
        :type full_cov: bool
        :rtype: posterior mean,  a Numpy array, Nnew x self.input_dim
        :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
@ -318,21 +336,46 @@ class SparseGP(GPBase):
        mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)

        # now push through likelihood
-        mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
+        mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)

        return mean, var, _025pm, _975pm

-    def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):

+    def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
+        """
+        Plot the GP's view of the world, where the data is normalized and the
+          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
+          - In two dimsensions, a contour-plot shows the mean predicted function
+          - Not implemented in higher dimensions
+
+        :param samples: the number of a posteriori samples to plot
+        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
+        :param which_data: which if the training data to plot (default all)
+        :type which_data: 'all' or a slice object to slice self.X, self.Y
+        :param which_parts: which of the kernel functions to plot (additively)
+        :type which_parts: 'all', or list of bools
+        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
+        :type resolution: int
+        :param full_cov:
+        :type full_cov: bool
+                :param fignum: figure to plot on.
+        :type fignum: figure number
+        :param ax: axes to plot on.
+        :type ax: axes handle
+
+        :param output: which output to plot (for multiple output models only)
+        :type output: integer (first output is 0)
+        """
        if ax is None:
            fig = pb.figure(num=fignum)
            ax = fig.add_subplot(111)
+        if fignum is None and ax is None:
+                fignum = fig.num
        if which_data is 'all':
            which_data = slice(None)

-        GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax)
+        GPBase.plot_f(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, full_cov=full_cov, fignum=fignum, ax=ax)

-        # add the inducing inputs and some errorbars
        if self.X.shape[1] == 1:
            if self.has_uncertain_inputs:
                Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
@ -345,3 +388,178 @@ class SparseGP(GPBase):
        elif self.X.shape[1] == 2:
            Zu = self.Z * self._Xscale + self._Xoffset
            ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
+
+
+        else:
+            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+
+    def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
+        if ax is None:
+            fig = pb.figure(num=fignum)
+            ax = fig.add_subplot(111)
+        if fignum is None and ax is None:
+                fignum = fig.num
+        if which_data is 'all':
+            which_data = slice(None)
+
+        GPBase.plot(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, levels=20, fignum=fignum, ax=ax)
+
+        if self.X.shape[1] == 1:
+            if self.has_uncertain_inputs:
+                Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
+                ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
+                            xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
+                            ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
+            Zu = self.Z * self._Xscale + self._Xoffset
+            ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
+
+        elif self.X.shape[1] == 2:
+            Zu = self.Z * self._Xscale + self._Xoffset
+            ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
+
+
+        else:
+            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+
+    def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
+        """
+        For a specific output, predict the function at the new point(s) Xnew.
+
+        :param Xnew: The points at which to make a prediction
+        :type Xnew: np.ndarray, Nnew x self.input_dim
+        :param output: output to predict
+        :type output: integer in {0,..., num_outputs-1}
+        :param which_parts:  specifies which outputs kernel(s) to use in prediction
+        :type which_parts: ('all', list of bools)
+        :param full_cov: whether to return the full covariance matrix, or just the diagonal
+        :type full_cov: bool
+        :rtype: posterior mean,  a Numpy array, Nnew x self.input_dim
+        :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
+        :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays,  Nnew x self.input_dim
+
+        .. Note:: For multiple output models only
+        """
+
+        assert hasattr(self,'multioutput')
+        index = np.ones_like(Xnew)*output
+        Xnew = np.hstack((Xnew,index))
+
+        # normalize X values
+        Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
+        mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
+
+        # now push through likelihood
+        mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output)
+        return mean, var, _025pm, _975pm
+
+    def _raw_predict_single_output(self, _Xnew, output=0, X_variance_new=None, which_parts='all', full_cov=False,stop=False):
+        """
+        Internal helper function for making predictions for a specific output,
+        does not account for normalization or likelihood
+        ---------
+
+        :param Xnew: The points at which to make a prediction
+        :type Xnew: np.ndarray, Nnew x self.input_dim
+        :param output: output to predict
+        :type output: integer in {0,..., num_outputs-1}
+        :param which_parts:  specifies which outputs kernel(s) to use in prediction
+        :type which_parts: ('all', list of bools)
+        :param full_cov: whether to return the full covariance matrix, or just the diagonal
+
+        .. Note:: For multiple output models only
+        """
+        Bi, _ = dpotri(self.LB, lower=0)  # WTH? this lower switch should be 1, but that doesn't work!
+        symmetrify(Bi)
+        Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi)
+
+        if self.Cpsi1V is None:
+            psi1V = np.dot(self.psi1.T,self.likelihood.V)
+            tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
+            tmp, _ = dpotrs(self.LB, tmp, lower=1)
+            self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1)
+
+        assert hasattr(self,'multioutput')
+        index = np.ones_like(_Xnew)*output
+        _Xnew = np.hstack((_Xnew,index))
+
+        if X_variance_new is None:
+            Kx = self.kern.K(self.Z, _Xnew, which_parts=which_parts)
+            mu = np.dot(Kx.T, self.Cpsi1V)
+            if full_cov:
+                Kxx = self.kern.K(_Xnew, which_parts=which_parts)
+                var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
+            else:
+                Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
+                var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
+        else:
+            Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new)
+            mu = np.dot(Kx, self.Cpsi1V)
+            if full_cov:
+                raise NotImplementedError, "TODO"
+            else:
+                Kxx = self.kern.psi0(self.Z, _Xnew, X_variance_new)
+                psi2 = self.kern.psi2(self.Z, _Xnew, X_variance_new)
+                var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
+
+        return mu, var[:, None]
+
+
+    def plot_single_output_f(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
+
+        if ax is None:
+            fig = pb.figure(num=fignum)
+            ax = fig.add_subplot(111)
+        if fignum is None and ax is None:
+                fignum = fig.num
+        if which_data is 'all':
+            which_data = slice(None)
+
+        GPBase.plot_single_output_f(self, output=output, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, full_cov=full_cov, fignum=fignum, ax=ax)
+
+        if self.X.shape[1] == 2:
+            if self.has_uncertain_inputs:
+                Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
+                ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
+                            xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
+                            ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
+            Zu = self.Z * self._Xscale + self._Xoffset
+            Zu = Zu[Zu[:,1]==output,0:1]
+            ax.plot(Zu[:,0], np.zeros_like(Zu[:,0]) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
+
+        elif self.X.shape[1] == 2:
+            Zu = self.Z * self._Xscale + self._Xoffset
+            Zu = Zu[Zu[:,1]==output,0:2]
+            ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
+
+
+        else:
+            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+
+    def plot_single_output(self, output=None, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
+        if ax is None:
+            fig = pb.figure(num=fignum)
+            ax = fig.add_subplot(111)
+        if fignum is None and ax is None:
+                fignum = fig.num
+        if which_data is 'all':
+            which_data = slice(None)
+
+        GPBase.plot_single_output(self, samples=samples, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=resolution, levels=20, fignum=fignum, ax=ax, output=output)
+
+        if self.X.shape[1] == 2:
+            if self.has_uncertain_inputs:
+                Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
+                ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
+                            xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
+                            ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
+            Zu = self.Z * self._Xscale + self._Xoffset
+            Zu = Zu[Zu[:,1]==output,0:1]
+            ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
+
+        elif self.X.shape[1] == 3:
+            Zu = self.Z * self._Xscale + self._Xoffset
+            Zu = Zu[Zu[:,1]==output,0:1]
+            ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
+
+        else:
+            raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
--- a/GPy/core/svigp.py
+++ b/GPy/core/svigp.py
@ -14,6 +14,7 @@ import sys

 class SVIGP(GPBase):
    """
+
    Stochastic Variational inference in a Gaussian Process

    :param X: inputs
@ -22,25 +23,26 @@ class SVIGP(GPBase):
    :type Y: np.ndarray of observations (N x D)
    :param batchsize: the size of a h

-    Additional kwargs are used as for a sparse GP. They include
+    Additional kwargs are used as for a sparse GP. They include:

    :param q_u: canonical parameters of the distribution squasehd into a 1D array
    :type q_u: np.ndarray
-    :param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
+    :param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
    :type M: int
-    :param kernel : the kernel/covariance function. See link kernels
+    :param kernel: the kernel/covariance function. See link kernels
    :type kernel: a GPy kernel
    :param Z: inducing inputs (optional, see note)
    :type Z: np.ndarray (M x Q) | None
    :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
    :type X_uncertainty: np.ndarray (N x Q) | None
    :param Zslices: slices for the inducing inputs (see slicing TODO: link)
-    :param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
+    :param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
    :type M: int
-    :param beta: noise precision. TODO> ignore beta if doing EP
+    :param beta: noise precision. TODO: ignore beta if doing EP
    :type beta: float
-    :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
+    :param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
    :type normalize_(X|Y): bool
+
    """


--- a/GPy/core/transformations.py
+++ b/GPy/core/transformations.py
@ -18,9 +18,11 @@ class transformation(object):
    def gradfactor(self, f):
        """ df_dx evaluated at self.f(x)=f"""
        raise NotImplementedError
+
    def initialize(self, f):
        """ produce a sensible initial value for f(x)"""
        raise NotImplementedError
+
    def __str__(self):
        raise NotImplementedError

@ -42,15 +44,13 @@ class logexp(transformation):
 class negative_logexp(transformation):
    domain = NEGATIVE
    def f(self, x):
-        return -logexp.f(x) #np.log(1. + np.exp(x))
+        return -logexp.f(x)
    def finv(self, f):
-        return logexp.finv(-f) #np.log(np.exp(-f) - 1.)
+        return logexp.finv(-f) 
    def gradfactor(self, f):
        return -logexp.gradfactor(-f)
-        #ef = np.exp(-f)
-        #return -(ef - 1.) / ef
    def initialize(self, f):
-        return -logexp.initialize(f) #np.abs(f)
+        return -logexp.initialize(f)
    def __str__(self):
        return '(-ve)'

@ -82,7 +82,6 @@ class logexp_clipped(logexp):
        return '(+ve_c)'

 class exponent(transformation):
-    # TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative exponent below. See old MATLAB code.
    domain = POSITIVE
    def f(self, x):
        return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val))