diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py
index 286eb0cd..32b6c02d 100644
--- a/GPy/core/__init__.py
+++ b/GPy/core/__init__.py
@@ -6,3 +6,4 @@ from parameterised import *
 import priors
 from GPy.core.gp import GP
 from GPy.core.sparse_gp import SparseGP
+from fitc import FITC
diff --git a/GPy/examples/__init__.py b/GPy/examples/__init__.py
index 551bff54..00bdab67 100644
--- a/GPy/examples/__init__.py
+++ b/GPy/examples/__init__.py
@@ -4,5 +4,5 @@
 import classification
 import regression
 import dimensionality_reduction
-import non_gaussian
+import non_Gaussian
 import tutorials
diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index a1be1cef..edb1c179 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -24,7 +24,7 @@ def crescent_data(seed=default_seed): # FIXME
     Y = data['Y']
     Y[Y.flatten()==-1] = 0
 
-    m = GPy.models.GP_classification(data['X'], Y)
+    m = GPy.models.GPClassification(data['X'], Y)
     m.ensure_default_constraints()
     m.update_likelihood_approximation()
     m.optimize()
@@ -41,7 +41,7 @@ def oil():
     Y[Y.flatten()==-1] = 0
 
     # Create GP model
-    m = GPy.models.GP_classification(data['X'], Y)
+    m = GPy.models.GPClassification(data['X'], Y)
 
     # Contrain all parameters to be positive
     m.constrain_positive('')
@@ -66,7 +66,7 @@ def toy_linear_1d_classification(seed=default_seed):
     Y[Y.flatten() == -1] = 0
 
     # Model definition
-    m = GPy.models.GP_classification(data['X'], Y)
+    m = GPy.models.GPClassification(data['X'], Y)
     m.ensure_default_constraints()
 
     # Optimize
@@ -95,7 +95,7 @@ def sparse_toy_linear_1d_classification(seed=default_seed):
     Y[Y.flatten() == -1] = 0
 
     # Model definition
-    m = GPy.models.sparse_GP_classification(data['X'], Y)
+    m = GPy.models.SparseGPClassification(data['X'], Y)
     m['.*len']= 2.
 
     m.ensure_default_constraints()
@@ -127,7 +127,7 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
     Y = data['Y']
     Y[Y.flatten()==-1]=0
 
-    m = GPy.models.sparse_GP_classification(data['X'], Y)
+    m = GPy.models.SparseGPClassification(data['X'], Y)
     m.ensure_default_constraints()
     m['.*len'] = 10.
     m.update_likelihood_approximation()
@@ -135,3 +135,27 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
     print(m)
     m.plot()
     return m
+
+def FITC_crescent_data(inducing=10, seed=default_seed):
+    """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
+
+    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
+    :param seed : seed value for data generation.
+    :type seed: int
+    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
+    :type inducing: int
+    """
+
+    data = GPy.util.datasets.crescent_data(seed=seed)
+    Y = data['Y']
+    Y[Y.flatten()==-1]=0
+
+    m = GPy.models.FITCClassification(data['X'], Y)
+    m.ensure_default_constraints()
+    m['.*len'] = 10.
+    m.update_likelihood_approximation()
+    m.optimize()
+    print(m)
+    m.plot()
+    return m
+
diff --git a/GPy/examples/non_gaussian.py b/GPy/examples/non_Gaussian.py
similarity index 100%
rename from GPy/examples/non_gaussian.py
rename to GPy/examples/non_Gaussian.py
diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index 63e9e296..f18e89db 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -1,13 +1,12 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
 from gp_regression import GPRegression
+from gp_classification import GPClassification
 from sparse_gp_regression import SparseGPRegression
+from sparse_gp_classification import SparseGPClassification
+from fitc_classification import FITCClassification
 from gplvm import GPLVM
 from warped_gp import WarpedGP
 from bayesian_gplvm import BayesianGPLVM
 from mrd import MRD
-from generalized_fitc import GeneralizedFITC
-from fitc import FITC
-
diff --git a/GPy/models/generalized_fitc.py b/GPy/models/generalized_fitc.py
index 58da609d..a96609cc 100644
--- a/GPy/models/generalized_fitc.py
+++ b/GPy/models/generalized_fitc.py
@@ -7,7 +7,11 @@ from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
 from ..util.plot import gpplot
 from .. import kern
 from scipy import stats, linalg
+<<<<<<< HEAD:GPy/models/generalized_FITC.py
+from sparse_GP import sparse_GP
+=======
 from ..core import SparseGP
+>>>>>>> 7040b26f41f382edfdca3d3f7b689b9bbfc1a54f:GPy/models/generalized_fitc.py
 
 def backsub_both_sides(L,X):
     """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
@@ -36,12 +40,17 @@ class GeneralizedFITC(SparseGP):
     """
 
     def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
+
         self.Z = Z
         self.num_inducing = self.Z.shape[0]
         self.true_precision = likelihood.precision
 
+<<<<<<< HEAD:GPy/models/generalized_FITC.py
+        sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
+=======
         super(GeneralizedFITC, self).__init__(X, likelihood, kernel=kernel, Z=self.Z, X_variance=X_variance, normalize_X=normalize_X)
         self._set_params(self._get_params())
+>>>>>>> 7040b26f41f382edfdca3d3f7b689b9bbfc1a54f:GPy/models/generalized_fitc.py
 
     def _set_params(self, p):
         self.Z = p[:self.num_inducing*self.input_dim].reshape(self.num_inducing, self.input_dim)
diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py
index 74455a2b..376f0005 100644
--- a/GPy/models/gp_classification.py
+++ b/GPy/models/gp_classification.py
@@ -7,11 +7,11 @@ from ..core import GP
 from .. import likelihoods
 from .. import kern
 
-class GP_classification(GP):
+class GPClassification(GP):
     """
     Gaussian Process classification
 
-    This is a thin wrapper around the models.GP class, with a set of sensible defalts
+    This is a thin wrapper around the models.GP class, with a set of sensible defaults
 
     :param X: input observations
     :param Y: observed values
diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py
index 526ef5f5..8d0b02e0 100644
--- a/GPy/models/gp_regression.py
+++ b/GPy/models/gp_regression.py
@@ -11,7 +11,7 @@ class GPRegression(GP):
     """
     Gaussian Process model for regression
 
-    This is a thin wrapper around the models.GP class, with a set of sensible defalts
+    This is a thin wrapper around the models.GP class, with a set of sensible defaults
 
     :param X: input observations
     :param Y: observed values
diff --git a/GPy/models/sparse_gp_classification.py b/GPy/models/sparse_gp_classification.py
index 0e4b8147..f82de00f 100644
--- a/GPy/models/sparse_gp_classification.py
+++ b/GPy/models/sparse_gp_classification.py
@@ -3,17 +3,16 @@
 
 
 import numpy as np
-from ..core import sparse_GP
+from ..core import SparseGP
 from .. import likelihoods
 from .. import kern
 from ..likelihoods import likelihood
-from GPRegression import GPRegression
 
-class sparse_GP_classification(sparse_GP):
+class SparseGPClassification(SparseGP):
     """
     sparse Gaussian Process model for classification
 
-    This is a thin wrapper around the sparse_GP class, with a set of sensible defalts
+    This is a thin wrapper around the sparse_GP class, with a set of sensible defaults
 
     :param X: input observations
     :param Y: observed values
@@ -25,8 +24,6 @@ class sparse_GP_classification(sparse_GP):
     :type normalize_Y: False|True
     :rtype: model object
 
-    .. Note:: Multiple independent outputs are allowed using columns of Y
-
     """
 
     def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10):
@@ -46,5 +43,5 @@ class sparse_GP_classification(sparse_GP):
         else:
             assert Z.shape[1]==X.shape[1]
 
-        sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
+        SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
         self._set_params(self._get_params())
diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index 73b5c800..020f3890 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -194,9 +194,12 @@ class GradientTests(unittest.TestCase):
         N = 20
         X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
         k = GPy.kern.rbf(1) + GPy.kern.white(1)
-        Y = np.hstack([np.ones(N / 2), -np.ones(N / 2)])[:, None]
-        likelihood = Binomial(Y)
-        m = GPy.models.GeneralizedFITC(X, likelihood, k, inducing=4)
+        Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None]
+
+        distribution = GPy.likelihoods.likelihood_functions.binomial()
+        likelihood = GPy.likelihoods.EP(Y, distribution)
+        #likelihood = GPy.inference.likelihoods.binomial(Y)
+        m = GPy.models.generalized_FITC(X,likelihood,k,inducing=4)
         m.constrain_positive('(var|len)')
         m.approximate_likelihood()
         self.assertTrue(m.checkgrad())
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 8f22a7d0..c477f283 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -13,7 +13,7 @@ default_seed = 10000
 # Some general utilities.
 def sample_class(f):
     p = 1. / (1. + np.exp(-f))
-    c = np.random.Binomial(1, p)
+    c = np.random.binomial(1, p)
     c = np.where(c, 1, -1)
     return c
 
@@ -22,7 +22,7 @@ def fetch_dataset(resource, save_name = None, save_file = True, messages = True)
         print "Downloading resource: " , resource, " ... ",
     response = url.urlopen(resource)
     # TODO: Some error checking...
-    # ... 
+    # ...
     html = response.read()
     response.close()
     if save_file:
@@ -33,8 +33,6 @@ def fetch_dataset(resource, save_name = None, save_file = True, messages = True)
             if messages:
                 print "Done!"
     return html
-        
-    
 
 def della_gatta_TRP63_gene_expression(gene_number=None):
     mat_data = scipy.io.loadmat(os.path.join(data_path, 'DellaGattadata.mat'))