diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index ddc4d02f..991c6c7a 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -9,6 +9,7 @@ from domains import _REAL, _POSITIVE
 import warnings
 import weakref
 
+
 class Prior(object):
     domain = None
 
@@ -17,13 +18,16 @@ class Prior(object):
 
     def plot(self):
         import sys
+
         assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
         from ...plotting.matplot_dep import priors_plots
+
         priors_plots.univariate_plot(self)
 
     def __repr__(self, *args, **kwargs):
         return self.__str__()
 
+
 class Gaussian(Prior):
     """
     Implementation of the univariate Gaussian probability function, coupled with random variables.
@@ -36,7 +40,8 @@ class Gaussian(Prior):
     """
     domain = _REAL
     _instances = []
-    def __new__(cls, mu, sigma): # Singleton:
+
+    def __new__(cls, mu, sigma):  # Singleton:
         if cls._instances:
             cls._instances[:] = [instance for instance in cls._instances if instance()]
             for instance in cls._instances:
@@ -45,6 +50,7 @@ class Gaussian(Prior):
         o = super(Prior, cls).__new__(cls, mu, sigma)
         cls._instances.append(weakref.ref(o))
         return cls._instances[-1]()
+
     def __init__(self, mu, sigma):
         self.mu = float(mu)
         self.sigma = float(sigma)
@@ -67,7 +73,8 @@ class Gaussian(Prior):
 class Uniform(Prior):
     domain = _REAL
     _instances = []
-    def __new__(cls, lower, upper): # Singleton:
+
+    def __new__(cls, lower, upper):  # Singleton:
         if cls._instances:
             cls._instances[:] = [instance for instance in cls._instances if instance()]
             for instance in cls._instances:
@@ -85,7 +92,7 @@ class Uniform(Prior):
         return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']'
 
     def lnpdf(self, x):
-        region = (x>=self.lower) * (x<=self.upper)
+        region = (x >= self.lower) * (x <= self.upper)
         return region
 
     def lnpdf_grad(self, x):
@@ -94,6 +101,7 @@ class Uniform(Prior):
     def rvs(self, n):
         return np.random.uniform(self.lower, self.upper, size=n)
 
+
 class LogGaussian(Prior):
     """
     Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.
@@ -106,7 +114,8 @@ class LogGaussian(Prior):
     """
     domain = _POSITIVE
     _instances = []
-    def __new__(cls, mu, sigma): # Singleton:
+
+    def __new__(cls, mu, sigma):  # Singleton:
         if cls._instances:
             cls._instances[:] = [instance for instance in cls._instances if instance()]
             for instance in cls._instances:
@@ -115,6 +124,7 @@ class LogGaussian(Prior):
         o = super(Prior, cls).__new__(cls, mu, sigma)
         cls._instances.append(weakref.ref(o))
         return cls._instances[-1]()
+
     def __init__(self, mu, sigma):
         self.mu = float(mu)
         self.sigma = float(sigma)
@@ -146,7 +156,8 @@ class MultivariateGaussian:
     """
     domain = _REAL
     _instances = []
-    def __new__(cls, mu, var): # Singleton:
+
+    def __new__(cls, mu, var):  # Singleton:
         if cls._instances:
             cls._instances[:] = [instance for instance in cls._instances if instance()]
             for instance in cls._instances:
@@ -155,6 +166,7 @@ class MultivariateGaussian:
         o = super(Prior, cls).__new__(cls, mu, var)
         cls._instances.append(weakref.ref(o))
         return cls._instances[-1]()
+
     def __init__(self, mu, var):
         self.mu = np.array(mu).flatten()
         self.var = np.array(var)
@@ -184,10 +196,13 @@ class MultivariateGaussian:
 
     def plot(self):
         import sys
+
         assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
         from ..plotting.matplot_dep import priors_plots
+
         priors_plots.multivariate_plot(self)
 
+
 def gamma_from_EV(E, V):
     warnings.warn("use Gamma.from_EV to create Gamma Prior", FutureWarning)
     return Gamma.from_EV(E, V)
@@ -205,7 +220,8 @@ class Gamma(Prior):
     """
     domain = _POSITIVE
     _instances = []
-    def __new__(cls, a, b): # Singleton:
+
+    def __new__(cls, a, b):  # Singleton:
         if cls._instances:
             cls._instances[:] = [instance for instance in cls._instances if instance()]
             for instance in cls._instances:
@@ -214,6 +230,7 @@ class Gamma(Prior):
         o = super(Prior, cls).__new__(cls, a, b)
         cls._instances.append(weakref.ref(o))
         return cls._instances[-1]()
+
     def __init__(self, a, b):
         self.a = float(a)
         self.b = float(b)
@@ -224,9 +241,9 @@ class Gamma(Prior):
 
     def summary(self):
         ret = {"E[x]": self.a / self.b, \
-            "E[ln x]": digamma(self.a) - np.log(self.b), \
-            "var[x]": self.a / self.b / self.b, \
-            "Entropy": gammaln(self.a) - (self.a - 1.) * digamma(self.a) - np.log(self.b) + self.a}
+               "E[ln x]": digamma(self.a) - np.log(self.b), \
+               "var[x]": self.a / self.b / self.b, \
+               "Entropy": gammaln(self.a) - (self.a - 1.) * digamma(self.a) - np.log(self.b) + self.a}
         if self.a > 1:
             ret['Mode'] = (self.a - 1.) / self.b
         else:
@@ -241,6 +258,7 @@ class Gamma(Prior):
 
     def rvs(self, n):
         return np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
+
     @staticmethod
     def from_EV(E, V):
         """
@@ -254,6 +272,7 @@ class Gamma(Prior):
         b = E / V
         return Gamma(a, b)
 
+
 class inverse_gamma(Prior):
     """
     Implementation of the inverse-Gamma probability function, coupled with random variables.
@@ -265,7 +284,8 @@ class inverse_gamma(Prior):
 
     """
     domain = _POSITIVE
-    def __new__(cls, a, b): # Singleton:
+
+    def __new__(cls, a, b):  # Singleton:
         if cls._instances:
             cls._instances[:] = [instance for instance in cls._instances if instance()]
             for instance in cls._instances:
@@ -274,6 +294,7 @@ class inverse_gamma(Prior):
         o = super(Prior, cls).__new__(cls, a, b)
         cls._instances.append(weakref.ref(o))
         return cls._instances[-1]()
+
     def __init__(self, a, b):
         self.a = float(a)
         self.b = float(b)
@@ -290,3 +311,348 @@ class inverse_gamma(Prior):
 
     def rvs(self, n):
         return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
+
+
+class DGPLVM_KFDA(Prior):
+    """
+    Implementation of the Discriminative Gaussian Process Latent Variable function using
+    Kernel Fisher Discriminant Analysis by Seung-Jean Kim for implementing Face paper
+    by Chaochao Lu.
+
+    :param lambdaa: constant
+    :param sigma2: constant
+
+    .. Note:: Surpassing Human-Level Face paper dgplvm implementation
+
+    """
+    domain = _REAL
+    # _instances = []
+    # def __new__(cls, lambdaa, sigma2):  # Singleton:
+    #     if cls._instances:
+    #         cls._instances[:] = [instance for instance in cls._instances if instance()]
+    #         for instance in cls._instances:
+    #             if instance().mu == mu and instance().sigma == sigma:
+    #                 return instance()
+    #     o = super(Prior, cls).__new__(cls, mu, sigma)
+    #     cls._instances.append(weakref.ref(o))
+    #     return cls._instances[-1]()
+
+    def __init__(self, lambdaa, sigma2, lbl, kern, x_shape):
+        """A description for init"""
+        self.datanum = lbl.shape[0]
+        self.classnum = lbl.shape[1]
+        self.lambdaa = lambdaa
+        self.sigma2 = sigma2
+        self.lbl = lbl
+        self.kern = kern
+        lst_ni = self.compute_lst_ni()
+        self.a = self.compute_a(lst_ni)
+        self.A = self.compute_A(lst_ni)
+        self.x_shape = x_shape
+
+    def get_class_label(self, y):
+        for idx, v in enumerate(y):
+            if v == 1:
+                return idx
+        return -1
+
+    # This function assigns each data point to its own class
+    # and returns the dictionary which contains the class name and parameters.
+    def compute_cls(self, x):
+        cls = {}
+        # Appending each data point to its proper class
+        for j in xrange(self.datanum):
+            class_label = self.get_class_label(self.lbl[j])
+            if class_label not in cls:
+                cls[class_label] = []
+            cls[class_label].append(x[j])
+        if len(cls) > 2:
+            for i in range(2, self.classnum):
+                del cls[i]
+        return cls
+
+    def x_reduced(self, cls):
+        x1 = cls[0]
+        x2 = cls[1]
+        x = np.concatenate((x1, x2), axis=0)
+        return x
+
+    def compute_lst_ni(self):
+        lst_ni = []
+        lst_ni1 = []
+        lst_ni2 = []
+        f1 = (np.where(self.lbl[:, 0] == 1)[0])
+        f2 = (np.where(self.lbl[:, 1] == 1)[0])
+        for idx in f1:
+            lst_ni1.append(idx)
+        for idx in f2:
+            lst_ni2.append(idx)
+        lst_ni.append(len(lst_ni1))
+        lst_ni.append(len(lst_ni2))
+        return lst_ni
+
+    def compute_a(self, lst_ni):
+        a = np.ones((self.datanum, 1))
+        count = 0
+        for N_i in lst_ni:
+            if N_i == lst_ni[0]:
+                a[count:count + N_i] = (float(1) / N_i) * a[count]
+                count += N_i
+            else:
+                if N_i == lst_ni[1]:
+                    a[count: count + N_i] = -(float(1) / N_i) * a[count]
+                    count += N_i
+        return a
+
+    def compute_A(self, lst_ni):
+        A = np.zeros((self.datanum, self.datanum))
+        idx = 0
+        for N_i in lst_ni:
+            B = float(1) / np.sqrt(N_i) * (np.eye(N_i) - ((float(1) / N_i) * np.ones((N_i, N_i))))
+            A[idx:idx + N_i, idx:idx + N_i] = B
+            idx += N_i
+        return A
+
+    # Here log function
+    def lnpdf(self, x):
+        x = x.reshape(self.x_shape)
+        K = self.kern.K(x)
+        a_trans = np.transpose(self.a)
+        paran = self.lambdaa * np.eye(x.shape[0]) + self.A.dot(K).dot(self.A)
+        inv_part = pdinv(paran)[0]
+        J = a_trans.dot(K).dot(self.a) - a_trans.dot(K).dot(self.A).dot(inv_part).dot(self.A).dot(K).dot(self.a)
+        J_star = (1. / self.lambdaa) * J
+        return (-1. / self.sigma2) * J_star
+
+    # Here gradient function
+    def lnpdf_grad(self, x):
+        x = x.reshape(self.x_shape)
+        K = self.kern.K(x)
+        paran = self.lambdaa * np.eye(x.shape[0]) + self.A.dot(K).dot(self.A)
+        inv_part = pdinv(paran)[0]
+        b = self.A.dot(inv_part).dot(self.A).dot(K).dot(self.a)
+        a_Minus_b = self.a - b
+        a_b_trans = np.transpose(a_Minus_b)
+        DJ_star_DK = (1. / self.lambdaa) * (a_Minus_b.dot(a_b_trans))
+        DJ_star_DX = self.kern.gradients_X(DJ_star_DK, x)
+        return (-1. / self.sigma2) * DJ_star_DX
+
+    def rvs(self, n):
+        return np.random.rand(n)  # A WRONG implementation
+
+    def __str__(self):
+        return 'DGPLVM_prior'
+
+
+class DGPLVM(Prior):
+    """
+    Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
+
+    :param sigma2: constant
+
+    .. Note:: DGPLVM for Classification paper implementation
+
+    """
+    domain = _REAL
+    # _instances = []
+    # def __new__(cls, mu, sigma): # Singleton:
+    #     if cls._instances:
+    #         cls._instances[:] = [instance for instance in cls._instances if instance()]
+    #         for instance in cls._instances:
+    #             if instance().mu == mu and instance().sigma == sigma:
+    #                 return instance()
+    #     o = super(Prior, cls).__new__(cls, mu, sigma)
+    #     cls._instances.append(weakref.ref(o))
+    #     return cls._instances[-1]()
+
+    def __init__(self, sigma2, lbl, x_shape):
+        self.sigma2 = sigma2
+        # self.x = x
+        self.lbl = lbl
+        self.classnum = lbl.shape[1]
+        self.datanum = lbl.shape[0]
+        self.x_shape = x_shape
+        self.dim = x_shape[1]
+
+    def get_class_label(self, y):
+        for idx, v in enumerate(y):
+            if v == 1:
+                return idx
+        return -1
+
+    # This function assigns each data point to its own class
+    # and returns the dictionary which contains the class name and parameters.
+    def compute_cls(self, x):
+        cls = {}
+        # Appending each data point to its proper class
+        for j in xrange(self.datanum):
+            class_label = self.get_class_label(self.lbl[j])
+            if class_label not in cls:
+                cls[class_label] = []
+            cls[class_label].append(x[j])
+        return cls
+
+    # This function computes mean of each class. The mean is calculated through each dimension
+    def compute_Mi(self, cls):
+        M_i = np.zeros((self.classnum, self.dim))
+        for i in cls:
+            # Mean of each class
+            M_i[i] = np.mean(cls[i], axis=0)
+        return M_i
+
+    # Adding data points as tuple to the dictionary so that we can access indices
+    def compute_indices(self, x):
+        data_idx = {}
+        for j in xrange(self.datanum):
+            class_label = self.get_class_label(self.lbl[j])
+            if class_label not in data_idx:
+                data_idx[class_label] = []
+            t = (j, x[j])
+            data_idx[class_label].append(t)
+        return data_idx
+
+    # Adding indices to the list so we can access whole the indices
+    def compute_listIndices(self, data_idx):
+        lst_idx = []
+        lst_idx_all = []
+        for i in data_idx:
+            if len(lst_idx) == 0:
+                pass
+                #Do nothing, because it is the first time list is created so is empty
+            else:
+                lst_idx = []
+            # Here we put indices of each class in to the list called lst_idx_all
+            for m in xrange(len(data_idx[i])):
+                lst_idx.append(data_idx[i][m][0])
+            lst_idx_all.append(lst_idx)
+        return lst_idx_all
+
+    # This function calculates between classes variances
+    def compute_Sb(self, cls, M_i, M_0):
+        Sb = np.zeros((self.dim, self.dim))
+        for i in cls:
+            B = (M_i[i] - M_0).reshape(self.dim, 1)
+            B_trans = B.transpose()
+            Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
+        return Sb
+
+    # This function calculates within classes variances
+    def compute_Sw(self, cls, M_i):
+        Sw = np.zeros((self.dim, self.dim))
+        for i in cls:
+            N_i = float(len(cls[i]))
+            W_WT = np.zeros((self.dim, self.dim))
+            for xk in cls[i]:
+                W = (xk - M_i[i])
+                W_WT += np.outer(W, W)
+            Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
+        return Sw
+
+    # Calculating beta and Bi for Sb
+    def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
+        # import pdb
+        # pdb.set_trace()
+        B_i = np.zeros((self.classnum, self.dim))
+        Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
+        for i in data_idx:
+            # pdb.set_trace()
+            # Calculating Bi
+            B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
+        for k in xrange(self.datanum):
+            for i in data_idx:
+                N_i = float(len(data_idx[i]))
+                if k in lst_idx_all[i]:
+                    beta = (float(1) / N_i) - (float(1) / self.datanum)
+                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
+                else:
+                    beta = -(float(1) / self.datanum)
+                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
+        Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
+        return Sig_beta_B_i_all
+
+
+    # Calculating W_j s separately so we can access all the W_j s anytime
+    def compute_wj(self, data_idx, M_i):
+        W_i = np.zeros((self.datanum, self.dim))
+        for i in data_idx:
+            N_i = float(len(data_idx[i]))
+            for tpl in data_idx[i]:
+                xj = tpl[1]
+                j = tpl[0]
+                W_i[j] = (xj - M_i[i])
+        return W_i
+
+    # Calculating alpha and Wj for Sw
+    def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
+        Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
+        for i in data_idx:
+            N_i = float(len(data_idx[i]))
+            for tpl in data_idx[i]:
+                k = tpl[0]
+                for j in lst_idx_all[i]:
+                    if k == j:
+                        alpha = 1 - (float(1) / N_i)
+                        Sig_alpha_W_i[k] += (alpha * W_i[j])
+                    else:
+                        alpha = 0 - (float(1) / N_i)
+                        Sig_alpha_W_i[k] += (alpha * W_i[j])
+        Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
+        return Sig_alpha_W_i
+
+    # This function calculates log of our prior
+    def lnpdf(self, x):
+        x = x.reshape(self.x_shape)
+        cls = self.compute_cls(x)
+        M_0 = np.mean(x, axis=0)
+        M_i = self.compute_Mi(cls)
+        Sb = self.compute_Sb(cls, M_i, M_0)
+        Sw = self.compute_Sw(cls, M_i)
+        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
+        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
+        Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
+        return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
+
+    # This function calculates derivative of the log of prior function
+    def lnpdf_grad(self, x):
+        x = x.reshape(self.x_shape)
+        cls = self.compute_cls(x)
+        M_0 = np.mean(x, axis=0)
+        M_i = self.compute_Mi(cls)
+        Sb = self.compute_Sb(cls, M_i, M_0)
+        Sw = self.compute_Sw(cls, M_i)
+        data_idx = self.compute_indices(x)
+        lst_idx_all = self.compute_listIndices(data_idx)
+        Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
+        W_i = self.compute_wj(data_idx, M_i)
+        Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
+
+        # Calculating inverse of Sb and its transpose and minus
+        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
+        # Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
+        Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
+        Sb_inv_N_trans = np.transpose(Sb_inv_N)
+        Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
+        Sw_trans = np.transpose(Sw)
+
+        # Calculating DJ/DXk
+        DJ_Dxk = 2 * (
+            Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
+                Sig_alpha_W_i))
+        # Calculating derivative of the log of the prior
+        DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
+        return DPx_Dx.T
+
+    # def frb(self, x):
+    #     from functools import partial
+    #     from GPy.models import GradientChecker
+    #     f = partial(self.lnpdf)
+    #     df = partial(self.lnpdf_grad)
+    #     grad = GradientChecker(f, df, x, 'X')
+    #     grad.checkgrad(verbose=1)
+
+    def rvs(self, n):
+        return np.random.rand(n)  # A WRONG implementation
+
+    def __str__(self):
+        return 'DGPLVM_prior'
+