From d077d28fd1667645f2776d96e2b7914964263821 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 31 Jan 2013 15:02:34 +0000
Subject: [PATCH] very basic functionality is now working

---
 GPy/__init__.py                         |   2 +-
 GPy/likelihoods/EP.py                   |  10 +-
 GPy/likelihoods/Gaussian.py             |  37 ++++-
 GPy/likelihoods/__init__.py             |   1 +
 GPy/likelihoods/likelihood_functions.py |  11 +-
 GPy/models/GP.py                        |  70 ++++----
 GPy/models/GP_regression.py             | 209 +-----------------------
 GPy/models/__init__.py                  |  14 +-
 GPy/models/sparse_GP.py                 |   2 -
 GPy/util/plot.py                        |  16 +-
 10 files changed, 88 insertions(+), 284 deletions(-)

diff --git a/GPy/__init__.py b/GPy/__init__.py
index 381d6232..c0772c27 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -7,5 +7,5 @@ import models
 import inference
 import util
 import examples
-#import examples TODO: discuss!
 from core import priors
+import likelihoods
diff --git a/GPy/likelihoods/EP.py b/GPy/likelihoods/EP.py
index 1519bf3b..3e975436 100644
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@@ -1,12 +1,9 @@
 import numpy as np
 import random
-import pylab as pb #TODO erase me
 from scipy import stats, linalg
-from .likelihoods import likelihood
 from ..core import model
 from ..util.linalg import pdinv,mdot,jitchol
 from ..util.plot import gpplot
-from .. import kern
 
 class EP:
     def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]):
@@ -15,12 +12,8 @@ class EP:
 
         Arguments
         ---------
-        X : input observations
-        likelihood : Output's likelihood (likelihood class)
-        kernel :  a GPy kernel (kern class)
-        inducing : Either an array specifying the inducing points location or a sacalar defining their number. None value for using a non-sparse model is used.
-        power_ep : Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
         epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
+        likelihood_function : a likelihood function (see likelihood_functions.py)
         """
         self.likelihood_function = likelihood_function
         self.epsilon = epsilon
@@ -48,7 +41,6 @@ class EP:
         For nomenclature see Rasmussen & Williams 2006.
         """
         #Prior distribution parameters: p(f|X) = N(f|0,K)
-        #self.K = self.kernel.K(self.X,self.X)
 
         #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
         self.mu = np.zeros(self.N)
diff --git a/GPy/likelihoods/Gaussian.py b/GPy/likelihoods/Gaussian.py
index 2397ce38..fe954b78 100644
--- a/GPy/likelihoods/Gaussian.py
+++ b/GPy/likelihoods/Gaussian.py
@@ -1,16 +1,39 @@
 import numpy as np
 
 class Gaussian:
-    def __init__(self,data,variance=1.,normalise=False):
+    def __init__(self,data,variance=1.,normalize=False):
         self.data = data
-        if normalise:
-            foo
-        self._variance = variance
+        self.N,D = data.shape
+        self.Z = 0. # a correction factor which accounts for the approximation made
+
+        #normalisation
+        if normalize:
+            self._mean = data.mean(0)[None,:]
+            self._std = data.std(0)[None,:]
+            self.Y = (self.data - self._mean)/self._std
+        else:
+            self._mean = np.zeros((1,D))
+            self._std = np.ones((1,D))
+            self.Y = self.data
+
+        self.YYT = np.dot(self.Y,self.Y.T)
+        self._set_params(np.asarray(variance))
+
     def _get_params(self):
-        return np.asarray(self.variance)
+        return np.asarray(self._variance)
+
+    def _get_param_names(self):
+        return ["noise variance"]
+
     def _set_params(self,x):
         self._variance = x
+        self.variance = np.eye(self.N)*self._variance
+
     def fit(self):
+        """
+        No approximations needed
+        """
         pass
-    def _gradients(self,foo):
-        return bar(foo)
+
+    def _gradients(self,partial):
+        return np.sum(np.diag(partial))
diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py
index d1369c43..83413255 100644
--- a/GPy/likelihoods/__init__.py
+++ b/GPy/likelihoods/__init__.py
@@ -1,3 +1,4 @@
 from EP import EP
 from Gaussian import Gaussian
 # TODO: from Laplace import Laplace
+import likelihood_functions as functions
diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py
index 1387c53d..7e6a5ba1 100644
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@@ -192,10 +192,10 @@ class poisson(likelihood):
             pb.plot(Z,Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
 
 class gaussian(likelihood):
-        """
-        Gaussian likelihood
-        Y is expected to take values in (-inf,inf)
-        """
+    """
+    Gaussian likelihood
+    Y is expected to take values in (-inf,inf)
+    """
     def moments_match(self,i,tau_i,v_i):
         """
         Moments match of the marginal approximation in EP algorithm
@@ -221,6 +221,3 @@ class gaussian(likelihood):
 
     def _log_likelihood_gradients():
         raise NotImplementedError
-            else:
-                var = var[:,None] * np.square(self._Ystd)
-
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index f5a0711d..827b94b7 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -8,8 +8,6 @@ from .. import kern
 from ..core import model
 from ..util.linalg import pdinv,mdot
 from ..util.plot import gpplot, Tango
-from ..inference.EP import Full # TODO: tidy
-from ..inference import likelihoods
 
 class GP(model):
     """
@@ -55,8 +53,6 @@ class GP(model):
             self._Xstd = np.ones((1,self.X.shape[1]))
 
         self.likelihood = likelihood
-        self.Y = self.likelihood.Y
-        self.YYT = self.likelihood.YYT # TODO: this is ugly. what about sufficient_stats?
         assert self.X.shape[0] == self.likelihood.Y.shape[0]
         self.N, self.D = self.likelihood.Y.shape
 
@@ -67,18 +63,16 @@ class GP(model):
         self.likelihood._set_params(p[self.kern.Nparam:])
 
         self.K = self.kern.K(self.X,slices1=self.Xslices)
-        self.K += np.diag(self.likelihood_variance)
+        self.K += self.likelihood.variance
 
         self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
 
         #the gradient of the likelihood wrt the covariance matrix
-        if self.YYT is None:
-            self._alpha = np.dot(self.Ki,self.Y)
-            self._alpha2 = np.square(self._alpha)
-            self.dL_dK = 0.5*(np.dot(self._alpha,self._alpha.T)-self.D*self.Ki)
+        if self.likelihood.YYT is None:
+            alpha = np.dot(self.Ki,self.likelihood.Y)
+            self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
         else:
-            tmp = mdot(self.Ki, self.YYT, self.Ki)
-            self._alpha2 = np.diag(tmp)
+            tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
             self.dL_dK = 0.5*(tmp - self.D*self.Ki)
 
     def _get_params(self):
@@ -95,16 +89,15 @@ class GP(model):
         this function does nothing
         """
         self.likelihood.fit(self.K)
-        self.Y, self.YYT, self.likelihood_variance, self.likelihood_Z = self.likelihood.sufficient_stats() # TODO: just store these in the likelihood?
 
     def _model_fit_term(self):
         """
         Computes the model fit using YYT if it's available
         """
-        if self.YYT is None:
-            return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
+        if self.likelihood.YYT is None:
+            return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
         else:
-            return -0.5*np.sum(np.multiply(self.Ki, self.YYT))
+            return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT))
 
     def log_likelihood(self):
         """
@@ -114,7 +107,7 @@ class GP(model):
         model for a new variable Y* = v_tilde/tau_tilde, with a covariance
         matrix K* = K + diag(1./tau_tilde) plus a normalization term.
         """
-        return -0.5*self.D*self.K_logdet + self.model_fit_term() + self.likelihood.Z
+        return -0.5*self.D*self.K_logdet + self._model_fit_term() + self.likelihood.Z
 
 
     def _log_likelihood_gradients(self):
@@ -125,7 +118,7 @@ class GP(model):
 
         For the likelihood parameters, pass in alpha = K^-1 y
         """
-        return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X), self.likelihood._gradients(self.alpha2)))
+        return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X), self.likelihood._gradients(partial=self.dL_dK)))
 
     def _raw_predict(self,_Xnew,slices, full_cov=False):
         """
@@ -133,7 +126,7 @@ class GP(model):
         for normalisation or likelihood
         """
         Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
-        mu = np.dot(np.dot(Kx.T,self.Ki),self.Y)
+        mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
         KiKx = np.dot(self.Ki,Kx)
         if full_cov:
             Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
@@ -177,8 +170,10 @@ class GP(model):
 
         return mean, _5pc, _95pc
 
-    def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
+    def raw_plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
         """
+        Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
+
         :param samples: the number of a posteriori samples to plot
         :param which_data: which if the training data to plot (default all)
         :type which_data: 'all' or a slice object to slice self.X, self.Y
@@ -194,19 +189,17 @@ class GP(model):
 
         Can plot only part of the data and part of the posterior functions using which_data and which_functions
         """
+
         if which_functions=='all':
             which_functions = [True]*self.kern.Nparts
         if which_data=='all':
             which_data = slice(None)
 
         X = self.X[which_data,:]
-        Y = self.Y[which_data,:]
-
-        Xorig = X*self._Xstd + self._Xmean
-        Yorig = Y*self._Ystd + self._Ymean #NOTE For EP this is v_tilde/beta
+        Y = self.likelihood.Y[which_data,:]
 
         if plot_limits is None:
-            xmin,xmax = Xorig.min(0),Xorig.max(0)
+            xmin,xmax = X.min(0),X.max(0)
             xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
         elif len(plot_limits)==2:
             xmin, xmax = plot_limits
@@ -215,27 +208,17 @@ class GP(model):
 
         if self.X.shape[1]==1:
             Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
-            m,v,phi = self.predict(Xnew,slices=which_functions,full_cov=full_cov)
-            if self.EP:
-                pb.subplot(211)
-            gpplot(Xnew,m,v)
+            m,v = self._raw_predict(Xnew,slices=which_functions,full_cov=False)
+            lower, upper = m.flatten() - 2.*np.sqrt(v) , m.flatten()+ 2.*np.sqrt(v)
+            gpplot(Xnew,m,lower,upper)
 
-            if samples: #NOTE why don't we put samples as a parameter of gpplot
-                s = np.random.multivariate_normal(m.flatten(),np.diag(v.flatten()),samples)
-                pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
-            pb.plot(Xorig,Yorig,'kx',mew=1.5)
+            pb.plot(X,Y,'kx',mew=1.5)
             pb.xlim(xmin,xmax)
-
-            if self.EP:
-                pb.subplot(212)
-                self.likelihood.plot(Xnew,m,v,phi,self.X,samples=samples)
-                pb.xlim(xmin,xmax)
-
         elif self.X.shape[1]==2:
-            resolution = 50 or resolution
+            resolution = resolution or 50
             xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
             Xtest = np.vstack((xx.flatten(),yy.flatten())).T
-            zz,vv,phi = self.predict(Xtest,slices=which_functions,full_cov=full_cov)
+            zz,vv = self._raw_predict(Xtest,slices=which_functions,full_cov=False)
             zz = zz.reshape(resolution,resolution)
             pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
             pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
@@ -244,3 +227,10 @@ class GP(model):
 
         else:
             raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
+
+        def plot(self):
+            """
+            Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
+            """
+            pass# TODO!!!!!
+
diff --git a/GPy/models/GP_regression.py b/GPy/models/GP_regression.py
index 72a24307..916e5284 100644
--- a/GPy/models/GP_regression.py
+++ b/GPy/models/GP_regression.py
@@ -1,18 +1,18 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2012, James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
 import numpy as np
-import pylab as pb
+from GP import GP
+from .. import likelihoods
 from .. import kern
-from ..core import model
-from ..util.linalg import pdinv,mdot
-from ..util.plot import gpplot, Tango
 
-class GP_regression(model):
+class GP_regression(GP):
     """
     Gaussian Process model for regression
 
+    This is a thin wrapper around the GP class, with a set of sensible defalts
+
     :param X: input observations
     :param Y: observed values
     :param kernel: a GPy kernel, defaults to rbf+white
@@ -29,199 +29,8 @@ class GP_regression(model):
 
     def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None):
         if kernel is None:
-            kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1])
+            kernel = kern.rbf(X.shape[1])
 
-        # parse arguments
-        self.Xslices = Xslices
-        assert isinstance(kernel, kern.kern)
-        self.kern = kernel
-        self.X = X
-        self.Y = Y
-        assert len(self.X.shape)==2
-        assert len(self.Y.shape)==2
-        assert self.X.shape[0] == self.Y.shape[0]
-        self.N, self.D = self.Y.shape
-        self.N, self.Q = self.X.shape
+        likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
 
-        #here's some simple normalisation
-        if normalize_X:
-            self._Xmean = X.mean(0)[None,:]
-            self._Xstd = X.std(0)[None,:]
-            self.X = (X.copy() - self._Xmean) / self._Xstd
-            if hasattr(self,'Z'):
-                self.Z = (self.Z - self._Xmean) / self._Xstd
-        else:
-            self._Xmean = np.zeros((1,self.X.shape[1]))
-            self._Xstd = np.ones((1,self.X.shape[1]))
-
-        if normalize_Y:
-            self._Ymean = Y.mean(0)[None,:]
-            self._Ystd = Y.std(0)[None,:]
-            self.Y = (Y.copy()- self._Ymean) / self._Ystd
-        else:
-            self._Ymean = np.zeros((1,self.Y.shape[1]))
-            self._Ystd = np.ones((1,self.Y.shape[1]))
-
-        if self.D > self.N:
-            # then it's more efficient to store YYT
-            self.YYT = np.dot(self.Y, self.Y.T)
-        else:
-            self.YYT = None
-
-        model.__init__(self)
-
-    def _set_params(self,p):
-        self.kern._set_params_transformed(p)
-        self.K = self.kern.K(self.X,slices1=self.Xslices)
-        self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
-
-    def _get_params(self):
-        return self.kern._get_params_transformed()
-
-    def _get_param_names(self):
-        return self.kern._get_param_names_transformed()
-
-    def _model_fit_term(self):
-        """
-        Computes the model fit using YYT if it's available
-        """
-        if self.YYT is None:
-            return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
-        else:
-            return -0.5*np.sum(np.multiply(self.Ki, self.YYT))
-
-    def log_likelihood(self):
-        complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - 0.5*self.D*self.K_logdet
-        return complexity_term + self._model_fit_term()
-
-    def dL_dK(self):
-        if self.YYT is None:
-            alpha = np.dot(self.Ki,self.Y)
-            dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
-        else:
-            dL_dK = 0.5*(mdot(self.Ki, self.YYT, self.Ki) - self.D*self.Ki)
-
-        return dL_dK
-
-    def _log_likelihood_gradients(self):
-        return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X)
-
-    def predict(self,Xnew, slices=None, full_cov=False):
-        """
-
-        Predict the function(s) at the new point(s) Xnew.
-
-        Arguments
-        ---------
-        :param Xnew: The points at which to make a prediction
-        :type Xnew: np.ndarray, Nnew x self.Q
-        :param slices:  specifies which outputs kernel(s) the Xnew correspond to (see below)
-        :type slices: (None, list of slice objects, list of ints)
-        :param full_cov: whether to return the folll covariance matrix, or just the diagonal
-        :type full_cov: bool
-        :rtype: posterior mean,  a Numpy array, Nnew x self.D
-        :rtype: posterior variance, a Numpy array, Nnew x Nnew x (self.D)
-
-        .. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
-
-             - If None, the new points covary throigh every kernel part (default)
-             - If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
-             - If a list of booleans, specifying which kernel parts are active
-
-           If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
-           This is to allow for different normalisations of the output dimensions.
-
-
-        """
-
-        #normalise X values
-        Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
-        mu, var = self._raw_predict(Xnew, slices, full_cov)
-
-        #un-normalise
-        mu = mu*self._Ystd + self._Ymean
-        if full_cov:
-            if self.D==1:
-                var *= np.square(self._Ystd)
-            else:
-                var = var[:,:,None] * np.square(self._Ystd)
-        else:
-            if self.D==1:
-                var *= np.square(np.squeeze(self._Ystd))
-            else:
-                var = var[:,None] * np.square(self._Ystd)
-
-        return mu,var
-
-    def _raw_predict(self,_Xnew,slices, full_cov=False):
-        """Internal helper function for making predictions, does not account for normalisation"""
-        Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
-        mu = np.dot(np.dot(Kx.T,self.Ki),self.Y)
-        KiKx = np.dot(self.Ki,Kx)
-        if full_cov:
-            Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
-            var = Kxx - np.dot(KiKx.T,Kx)
-        else:
-            Kxx = self.kern.Kdiag(_Xnew, slices=slices)
-            var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
-        return mu, var
-
-    def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
-        """
-        :param samples: the number of a posteriori samples to plot
-        :param which_data: which if the training data to plot (default all)
-        :type which_data: 'all' or a slice object to slice self.X, self.Y
-        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
-        :param which_functions: which of the kernel functions to plot (additively)
-        :type which_functions: list of bools
-        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
-
-        Plot the posterior of the GP.
-          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
-          - In two dimsensions, a contour-plot shows the mean predicted function
-          - In higher dimensions, we've no implemented this yet !TODO!
-
-        Can plot only part of the data and part of the posterior functions using which_data and which_functions
-        """
-        if which_functions=='all':
-            which_functions = [True]*self.kern.Nparts
-        if which_data=='all':
-            which_data = slice(None)
-
-        X = self.X[which_data,:]
-        Y = self.Y[which_data,:]
-
-        Xorig = X*self._Xstd + self._Xmean
-        Yorig = Y*self._Ystd + self._Ymean
-        if plot_limits is None:
-            xmin,xmax = Xorig.min(0),Xorig.max(0)
-            xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
-        elif len(plot_limits)==2:
-            xmin, xmax = plot_limits
-        else:
-            raise ValueError, "Bad limits for plotting"
-
-
-        if self.X.shape[1]==1:
-            Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
-            m,v = self.predict(Xnew,slices=which_functions)
-            gpplot(Xnew,m,v)
-            if samples:
-                s = np.random.multivariate_normal(m.flatten(),v,samples)
-                pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
-            pb.plot(Xorig,Yorig,'kx',mew=1.5)
-            pb.xlim(xmin,xmax)
-
-        elif self.X.shape[1]==2:
-            resolution = 50 or resolution
-            xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
-            Xtest = np.vstack((xx.flatten(),yy.flatten())).T
-            zz,vv = self.predict(Xtest,slices=which_functions)
-            zz = zz.reshape(resolution,resolution)
-            pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
-            pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
-            pb.xlim(xmin[0],xmax[0])
-            pb.ylim(xmin[1],xmax[1])
-
-        else:
-            raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
+        GP.__init__(self, X, kernel, likelihood, normalize_X=normalize_X, Xslices=Xslices)
diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index d2de84aa..1175eb71 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -2,14 +2,14 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-#from GP_regression import GP_regression
 #from sparse_GP_regression import sparse_GP_regression
-# ^^ remove these?
+# TODO ^^ remove these?
 from GPLVM import GPLVM
 from warped_GP import warpedGP
-from generalized_FITC import generalized_FITC
-from sparse_GPLVM import sparse_GPLVM
-from uncollapsed_sparse_GP import uncollapsed_sparse_GP
+# TODO: from generalized_FITC import generalized_FITC
+#from sparse_GPLVM import sparse_GPLVM
+#from uncollapsed_sparse_GP import uncollapsed_sparse_GP
 from GP import GP
-from sparse_GP import sparse_GP
-from BGPLVM import Bayesian_GPLVM
+from GP_regression import GP_regression
+#from sparse_GP import sparse_GP
+#from BGPLVM import Bayesian_GPLVM
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 7f287174..7b043209 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -7,8 +7,6 @@ from ..util.linalg import mdot, jitchol, chol_inv, pdinv
 from ..util.plot import gpplot
 from .. import kern
 from GP import GP
-from ..inference.EP import Full,DTC,FITC
-from ..inference.likelihoods import likelihood,probit,poisson,gaussian
 
 
 #Still TODO:
diff --git a/GPy/util/plot.py b/GPy/util/plot.py
index 8c06633e..3b4682e4 100644
--- a/GPy/util/plot.py
+++ b/GPy/util/plot.py
@@ -6,7 +6,7 @@ import Tango
 import pylab as pb
 import numpy as np
 
-def gpplot(x,mu,var,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
+def gpplot(x,mu,lower,upper,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
     if axes is None:
         axes = pb.gca()
     mu = mu.flatten()
@@ -15,21 +15,15 @@ def gpplot(x,mu,var,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHe
     #here's the mean
     axes.plot(x,mu,color=edgecol,linewidth=2)
 
-    #ensure variance is a vector
-    if len(var.shape)>1:
-        err = 2*np.sqrt(np.diag(var))
-    else:
-        err = 2*np.sqrt(var)
-
-    #here's the 2*std box
+    #here's the box
     kwargs['linewidth']=0.5
     if not 'alpha' in kwargs.keys():
         kwargs['alpha'] = 0.3
-    axes.fill(np.hstack((x,x[::-1])),np.hstack((mu+err,mu[::-1]-err[::-1])),color=fillcol,**kwargs)
+    axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs)
 
     #this is the edge:
-    axes.plot(x,mu+err,color=edgecol,linewidth=0.2)
-    axes.plot(x,mu-err,color=edgecol,linewidth=0.2)
+    axes.plot(x,upper,color=edgecol,linewidth=0.2)
+    axes.plot(x,lower,color=edgecol,linewidth=0.2)
 
 def removeRightTicks(ax=None):
     ax = ax or pb.gca()