diff --git a/GPy/inference/latent_function_inference/fitc.py b/GPy/inference/latent_function_inference/fitc.py
index 59719d86..82d0973f 100644
--- a/GPy/inference/latent_function_inference/fitc.py
+++ b/GPy/inference/latent_function_inference/fitc.py
@@ -1,73 +1,54 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-import numpy as np
-from ...util.linalg import mdot, jitchol, chol_inv, tdot, dtrtrs
-from ...core import SparseGP
+class VarDTC(object):
+    def __init__(self):
+        self.const_jitter = 1e-6
 
-class FITC(SparseGP):
-    """
+    def inference(self, kern, X, X_variance, Z, likelihood, Y):
 
-    Sparse FITC approximation
+        num_inducing, _ = Z.shape
+        num_data, output_dim = Y.shape
 
-    :param X: inputs
-    :type X: np.ndarray (num_data x Q)
-    :param likelihood: a likelihood instance, containing the observed data
-    :type likelihood: GPy.likelihood.(Gaussian | EP)
-    :param kernel: the kernel (covariance function). See link kernels
-    :type kernel: a GPy.kern.kern instance
-    :param Z: inducing inputs (optional, see note)
-    :type Z: np.ndarray (M x Q) | None
-    :param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
-    :type normalize_(X|Y): bool
+        #make sure we're not using variational uncertain inputs
+        assert = X_variance is None, "variational inducing inputs only for use with varDTC inference"
+        #Note: we can;t do the variational thing after making the GITC conditional approximation because K~ appears in the log determinant.
 
-    """
+        #see whether we've got a different noise variance for each datum
+        sigma2 = np.squeeze(likelihood.variance)
+        het_noise = False
+        if sigma2.size <1:
+            het_noise = True
 
-    def __init__(self, X, likelihood, kernel, Z, normalize_X=False):
-        SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False)
-        assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs"
-
-    def update_likelihood_approximation(self, **kwargs):
-        """
-        Approximates a non-Gaussian likelihood using Expectation Propagation
-
-        For a Gaussian likelihood, no iteration is required:
-        this function does nothing
-        """
-        self.likelihood.restart()
-        self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0, **kwargs)
-        self._set_params(self._get_params())
-
-    def _compute_kernel_matrices(self):
         # kernel computations, using BGPLVM notation
-        self.Kmm = self.kern.K(self.Z)
-        self.psi0 = self.kern.Kdiag(self.X)
-        self.psi1 = self.kern.K(self.Z, self.X)
-        self.psi2 = None
+        Kmm = kern.K(Z)
+        Kdiag = kern.Kdiag(X)
+        Kmn = kern.K(X, Z)
 
-    def _computations(self):
         #factor Kmm
-        self.Lm = jitchol(self.Kmm)
-        self.Lmi,info = dtrtrs(self.Lm,np.eye(self.num_inducing),lower=1)
-        Lmipsi1 = np.dot(self.Lmi,self.psi1)
-        self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
-        self.Diag0 = self.psi0 - np.diag(self.Qnn)
-        self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #NOTE: beta_star contains Diag0 and the precision
-        self.V_star = self.beta_star * self.likelihood.Y
+        Lm = jitchol(Kmm)
+        V = dtrtrs(Lm, Kmn, lower=1)
 
-        # The rather complex computations of self.A
-        tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data)))
-        tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
-        self.A = tdot(tmp)
+        #compute effective noise
+        g_sn2 = sigma2 + Kdiag - np.sum(V*V, 0)
 
-        # factor B
-        self.B = np.eye(self.num_inducing) + self.A
-        self.LB = jitchol(self.B)
-        self.LBi = chol_inv(self.LB)
-        self.psi1V = np.dot(self.psi1, self.V_star)
+        #compute and factor B
+        tmp = Kmn / np.sqrt(g_snd)
+        tmp, _ = dtrtrs(Lm, tmp, lower=1)
+        A = tdot(tmp)
+        B = np.eye(num_inducing) + A
+        Bi, Lb, LBi, log_det_B = pdinv(B)
 
-        Lmi_psi1V, info = dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
-        self._LBi_Lmi_psi1V, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
+        #compute posterior parameters
+        tmp, _ = dtrtrs()
+        woodbury_vec = 
+
+        
+
+
+        psi1V = np.dot(self.psi1, self.V_star)
+        Lmi_psi1V, _ = dtrtrs(Lm, self.psi1V, lower=1, trans=0)
+        LBi_Lmi_psi1V, _ = dtrtrs(LB, Lmi_psi1V, lower=1, trans=0)
 
         Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
         b_psi1_Ki = self.beta_star * Kmmipsi1.T
diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index 1f5b9db4..1a596eff 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 #TODO
 """
-A lot of this code assumes that the link functio nis the identity.
+A lot of this code assumes that the link function is the identity. 
 
 I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity.