diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index 84b6357e..83c83dfd 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -5,7 +5,7 @@
 import numpy as np
 from scipy.special import gammaln, digamma
 from ...util.linalg import pdinv
-from domains import _REAL, _POSITIVE
+from .domains import _REAL, _POSITIVE
 import warnings
 import weakref
 
@@ -15,8 +15,12 @@ class Prior(object):
     _instance = None
     def __new__(cls, *args, **kwargs):
         if not cls._instance or cls._instance.__class__ is not cls:
-            cls._instance = super(Prior, cls).__new__(cls, *args, **kwargs)
-        return cls._instance
+                newfunc = super(Prior, cls).__new__
+                if newfunc is object.__new__:
+                    cls._instance = newfunc(cls)  
+                else:
+                    cls._instance = newfunc(cls, *args, **kwargs)
+                return cls._instance
 
     def pdf(self, x):
         return np.exp(self.lnpdf(x))
@@ -52,7 +56,11 @@ class Gaussian(Prior):
             for instance in cls._instances:
                 if instance().mu == mu and instance().sigma == sigma:
                     return instance()
-        o = super(Prior, cls).__new__(cls, mu, sigma)
+        newfunc = super(Prior, cls).__new__
+        if newfunc is object.__new__:
+            o = newfunc(cls)  
+        else:
+            o = newfunc(cls, mu, sigma)            
         cls._instances.append(weakref.ref(o))
         return cls._instances[-1]()
 
@@ -140,7 +148,11 @@ class LogGaussian(Gaussian):
             for instance in cls._instances:
                 if instance().mu == mu and instance().sigma == sigma:
                     return instance()
-        o = super(Prior, cls).__new__(cls, mu, sigma)
+        newfunc = super(Prior, cls).__new__
+        if newfunc is object.__new__:
+            o = newfunc(cls)  
+        else:
+            o = newfunc(cls, mu, sigma)
         cls._instances.append(weakref.ref(o))
         return cls._instances[-1]()
 
@@ -258,7 +270,11 @@ class Gamma(Prior):
             for instance in cls._instances:
                 if instance().a == a and instance().b == b:
                     return instance()
-        o = super(Prior, cls).__new__(cls, a, b)
+        newfunc = super(Prior, cls).__new__
+        if newfunc is object.__new__:
+            o = newfunc(cls)  
+        else:
+            o = newfunc(cls, a, b)
         cls._instances.append(weakref.ref(o))
         return cls._instances[-1]()
 
@@ -398,7 +414,7 @@ class DGPLVM_KFDA(Prior):
     def compute_cls(self, x):
         cls = {}
         # Appending each data point to its proper class
-        for j in xrange(self.datanum):
+        for j in range(self.datanum):
             class_label = self.get_class_label(self.lbl[j])
             if class_label not in cls:
                 cls[class_label] = []
@@ -504,6 +520,219 @@ class DGPLVM(Prior):
 
     .. Note:: DGPLVM for Classification paper implementation
 
+    """
+    domain = _REAL
+    
+    def __new__(cls, sigma2, lbl, x_shape): 
+        return super(Prior, cls).__new__(cls, sigma2, lbl, x_shape)
+
+    def __init__(self, sigma2, lbl, x_shape):
+        self.sigma2 = sigma2
+        # self.x = x
+        self.lbl = lbl
+        self.classnum = lbl.shape[1]
+        self.datanum = lbl.shape[0]
+        self.x_shape = x_shape
+        self.dim = x_shape[1]
+
+    def get_class_label(self, y):
+        for idx, v in enumerate(y):
+            if v == 1:
+                return idx
+        return -1
+
+    # This function assigns each data point to its own class
+    # and returns the dictionary which contains the class name and parameters.
+    def compute_cls(self, x):
+        cls = {}
+        # Appending each data point to its proper class
+        for j in range(self.datanum):
+            class_label = self.get_class_label(self.lbl[j])
+            if class_label not in cls:
+                cls[class_label] = []
+            cls[class_label].append(x[j])
+        return cls
+
+    # This function computes mean of each class. The mean is calculated through each dimension
+    def compute_Mi(self, cls):
+        M_i = np.zeros((self.classnum, self.dim))
+        for i in cls:
+            # Mean of each class
+            class_i = cls[i]
+            M_i[i] = np.mean(class_i, axis=0)
+        return M_i
+
+    # Adding data points as tuple to the dictionary so that we can access indices
+    def compute_indices(self, x):
+        data_idx = {}
+        for j in range(self.datanum):
+            class_label = self.get_class_label(self.lbl[j])
+            if class_label not in data_idx:
+                data_idx[class_label] = []
+            t = (j, x[j])
+            data_idx[class_label].append(t)
+        return data_idx
+
+    # Adding indices to the list so we can access whole the indices
+    def compute_listIndices(self, data_idx):
+        lst_idx = []
+        lst_idx_all = []
+        for i in data_idx:
+            if len(lst_idx) == 0:
+                pass
+                #Do nothing, because it is the first time list is created so is empty
+            else:
+                lst_idx = []
+            # Here we put indices of each class in to the list called lst_idx_all
+            for m in range(len(data_idx[i])):
+                lst_idx.append(data_idx[i][m][0])
+            lst_idx_all.append(lst_idx)
+        return lst_idx_all
+
+    # This function calculates between classes variances
+    def compute_Sb(self, cls, M_i, M_0):
+        Sb = np.zeros((self.dim, self.dim))
+        for i in cls:
+            B = (M_i[i] - M_0).reshape(self.dim, 1)
+            B_trans = B.transpose()
+            Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
+        return Sb
+
+    # This function calculates within classes variances
+    def compute_Sw(self, cls, M_i):
+        Sw = np.zeros((self.dim, self.dim))
+        for i in cls:
+            N_i = float(len(cls[i]))
+            W_WT = np.zeros((self.dim, self.dim))
+            for xk in cls[i]:
+                W = (xk - M_i[i])
+                W_WT += np.outer(W, W)
+            Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
+        return Sw
+
+    # Calculating beta and Bi for Sb
+    def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
+        # import pdb
+        # pdb.set_trace()
+        B_i = np.zeros((self.classnum, self.dim))
+        Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
+        for i in data_idx:
+            # pdb.set_trace()
+            # Calculating Bi
+            B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
+        for k in range(self.datanum):
+            for i in data_idx:
+                N_i = float(len(data_idx[i]))
+                if k in lst_idx_all[i]:
+                    beta = (float(1) / N_i) - (float(1) / self.datanum)
+                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
+                else:
+                    beta = -(float(1) / self.datanum)
+                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
+        Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
+        return Sig_beta_B_i_all
+
+
+    # Calculating W_j s separately so we can access all the W_j s anytime
+    def compute_wj(self, data_idx, M_i):
+        W_i = np.zeros((self.datanum, self.dim))
+        for i in data_idx:
+            N_i = float(len(data_idx[i]))
+            for tpl in data_idx[i]:
+                xj = tpl[1]
+                j = tpl[0]
+                W_i[j] = (xj - M_i[i])
+        return W_i
+
+    # Calculating alpha and Wj for Sw
+    def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
+        Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
+        for i in data_idx:
+            N_i = float(len(data_idx[i]))
+            for tpl in data_idx[i]:
+                k = tpl[0]
+                for j in lst_idx_all[i]:
+                    if k == j:
+                        alpha = 1 - (float(1) / N_i)
+                        Sig_alpha_W_i[k] += (alpha * W_i[j])
+                    else:
+                        alpha = 0 - (float(1) / N_i)
+                        Sig_alpha_W_i[k] += (alpha * W_i[j])
+        Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
+        return Sig_alpha_W_i
+
+    # This function calculates log of our prior
+    def lnpdf(self, x):
+        x = x.reshape(self.x_shape)
+        cls = self.compute_cls(x)
+        M_0 = np.mean(x, axis=0)
+        M_i = self.compute_Mi(cls)
+        Sb = self.compute_Sb(cls, M_i, M_0)
+        Sw = self.compute_Sw(cls, M_i)
+        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
+        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
+        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
+        Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
+        return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
+
+    # This function calculates derivative of the log of prior function
+    def lnpdf_grad(self, x):
+        x = x.reshape(self.x_shape)
+        cls = self.compute_cls(x)
+        M_0 = np.mean(x, axis=0)
+        M_i = self.compute_Mi(cls)
+        Sb = self.compute_Sb(cls, M_i, M_0)
+        Sw = self.compute_Sw(cls, M_i)
+        data_idx = self.compute_indices(x)
+        lst_idx_all = self.compute_listIndices(data_idx)
+        Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
+        W_i = self.compute_wj(data_idx, M_i)
+        Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
+
+        # Calculating inverse of Sb and its transpose and minus
+        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
+        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
+        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
+        Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
+        Sb_inv_N_trans = np.transpose(Sb_inv_N)
+        Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
+        Sw_trans = np.transpose(Sw)
+
+        # Calculating DJ/DXk
+        DJ_Dxk = 2 * (
+            Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
+                Sig_alpha_W_i))
+        # Calculating derivative of the log of the prior
+        DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
+        return DPx_Dx.T
+
+    # def frb(self, x):
+    #     from functools import partial
+    #     from GPy.models import GradientChecker
+    #     f = partial(self.lnpdf)
+    #     df = partial(self.lnpdf_grad)
+    #     grad = GradientChecker(f, df, x, 'X')
+    #     grad.checkgrad(verbose=1)
+
+    def rvs(self, n):
+        return np.random.rand(n)  # A WRONG implementation
+
+    def __str__(self):
+        return 'DGPLVM_prior_Raq'
+
+
+# ******************************************
+
+from .. import Parameterized
+from .. import Param
+class DGPLVM_Lamda(Prior, Parameterized):
+    """
+    Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
+
+    :param sigma2: constant
+
+    .. Note:: DGPLVM for Classification paper implementation
+
     """
     domain = _REAL
     # _instances = []
@@ -517,14 +746,18 @@ class DGPLVM(Prior):
     #     cls._instances.append(weakref.ref(o))
     #     return cls._instances[-1]()
 
-    def __init__(self, sigma2, lbl, x_shape):
+    def __init__(self, sigma2, lbl, x_shape, lamda, name='DP_prior'):
+        super(DGPLVM_Lamda, self).__init__(name=name)
         self.sigma2 = sigma2
         # self.x = x
         self.lbl = lbl
+        self.lamda = lamda
         self.classnum = lbl.shape[1]
         self.datanum = lbl.shape[0]
         self.x_shape = x_shape
         self.dim = x_shape[1]
+        self.lamda = Param('lamda', np.diag(lamda))
+        self.link_parameter(self.lamda)
 
     def get_class_label(self, y):
         for idx, v in enumerate(y):
@@ -549,7 +782,8 @@ class DGPLVM(Prior):
         M_i = np.zeros((self.classnum, self.dim))
         for i in cls:
             # Mean of each class
-            M_i[i] = np.mean(cls[i], axis=0)
+            class_i = cls[i]
+            M_i[i] = np.mean(class_i, axis=0)
         return M_i
 
     # Adding data points as tuple to the dictionary so that we can access indices
@@ -654,6 +888,13 @@ class DGPLVM(Prior):
     # This function calculates log of our prior
     def lnpdf(self, x):
         x = x.reshape(self.x_shape)
+	
+	#!!!!!!!!!!!!!!!!!!!!!!!!!!!
+	#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()	
+
+        xprime = x.dot(np.diagflat(self.lamda))
+        x = xprime
+	# print x
         cls = self.compute_cls(x)
         M_0 = np.mean(x, axis=0)
         M_i = self.compute_Mi(cls)
@@ -661,12 +902,16 @@ class DGPLVM(Prior):
         Sw = self.compute_Sw(cls, M_i)
         # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
         #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
-        Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
+        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
+	Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
         return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
 
     # This function calculates derivative of the log of prior function
     def lnpdf_grad(self, x):
         x = x.reshape(self.x_shape)
+        xprime = x.dot(np.diagflat(self.lamda))
+        x = xprime
+	# print x
         cls = self.compute_cls(x)
         M_0 = np.mean(x, axis=0)
         M_i = self.compute_Mi(cls)
@@ -680,8 +925,251 @@ class DGPLVM(Prior):
 
         # Calculating inverse of Sb and its transpose and minus
         # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
-        # Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
-        Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
+        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
+        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
+	Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
+        Sb_inv_N_trans = np.transpose(Sb_inv_N)
+        Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
+        Sw_trans = np.transpose(Sw)
+
+        # Calculating DJ/DXk
+        DJ_Dxk = 2 * (
+            Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
+                Sig_alpha_W_i))
+        # Calculating derivative of the log of the prior
+        DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
+
+        DPxprim_Dx = np.diagflat(self.lamda).dot(DPx_Dx)
+
+        # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
+        DPxprim_Dx = DPxprim_Dx.T
+    
+        DPxprim_Dlamda = DPx_Dx.dot(x)
+
+    	# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
+        DPxprim_Dlamda = DPxprim_Dlamda.T
+
+        self.lamda.gradient = np.diag(DPxprim_Dlamda)
+	# print DPxprim_Dx
+        return DPxprim_Dx
+
+
+    # def frb(self, x):
+    #     from functools import partial
+    #     from GPy.models import GradientChecker
+    #     f = partial(self.lnpdf)
+    #     df = partial(self.lnpdf_grad)
+    #     grad = GradientChecker(f, df, x, 'X')
+    #     grad.checkgrad(verbose=1)
+
+    def rvs(self, n):
+        return np.random.rand(n)  # A WRONG implementation
+
+    def __str__(self):
+        return 'DGPLVM_prior_Raq_Lamda'
+
+# ******************************************
+
+class DGPLVM_T(Prior):
+    """
+    Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
+
+    :param sigma2: constant
+
+    .. Note:: DGPLVM for Classification paper implementation
+
+    """
+    domain = _REAL
+    # _instances = []
+    # def __new__(cls, mu, sigma): # Singleton:
+    #     if cls._instances:
+    #         cls._instances[:] = [instance for instance in cls._instances if instance()]
+    #         for instance in cls._instances:
+    #             if instance().mu == mu and instance().sigma == sigma:
+    #                 return instance()
+    #     o = super(Prior, cls).__new__(cls, mu, sigma)
+    #     cls._instances.append(weakref.ref(o))
+    #     return cls._instances[-1]()
+
+    def __init__(self, sigma2, lbl, x_shape, vec):
+        self.sigma2 = sigma2
+        # self.x = x
+        self.lbl = lbl
+        self.classnum = lbl.shape[1]
+        self.datanum = lbl.shape[0]
+        self.x_shape = x_shape
+        self.dim = x_shape[1]
+        self.vec = vec
+
+
+    def get_class_label(self, y):
+        for idx, v in enumerate(y):
+            if v == 1:
+                return idx
+        return -1
+
+    # This function assigns each data point to its own class
+    # and returns the dictionary which contains the class name and parameters.
+    def compute_cls(self, x):
+        cls = {}
+        # Appending each data point to its proper class
+        for j in range(self.datanum):
+            class_label = self.get_class_label(self.lbl[j])
+            if class_label not in cls:
+                cls[class_label] = []
+            cls[class_label].append(x[j])
+        return cls
+
+    # This function computes mean of each class. The mean is calculated through each dimension
+    def compute_Mi(self, cls):
+        M_i = np.zeros((self.classnum, self.dim))
+        for i in cls:
+            # Mean of each class
+	    # class_i = np.multiply(cls[i],vec)
+            class_i = cls[i]
+            M_i[i] = np.mean(class_i, axis=0)
+        return M_i
+
+    # Adding data points as tuple to the dictionary so that we can access indices
+    def compute_indices(self, x):
+        data_idx = {}
+        for j in range(self.datanum):
+            class_label = self.get_class_label(self.lbl[j])
+            if class_label not in data_idx:
+                data_idx[class_label] = []
+            t = (j, x[j])
+            data_idx[class_label].append(t)
+        return data_idx
+
+    # Adding indices to the list so we can access whole the indices
+    def compute_listIndices(self, data_idx):
+        lst_idx = []
+        lst_idx_all = []
+        for i in data_idx:
+            if len(lst_idx) == 0:
+                pass
+                #Do nothing, because it is the first time list is created so is empty
+            else:
+                lst_idx = []
+            # Here we put indices of each class in to the list called lst_idx_all
+            for m in range(len(data_idx[i])):
+                lst_idx.append(data_idx[i][m][0])
+            lst_idx_all.append(lst_idx)
+        return lst_idx_all
+
+    # This function calculates between classes variances
+    def compute_Sb(self, cls, M_i, M_0):
+        Sb = np.zeros((self.dim, self.dim))
+        for i in cls:
+            B = (M_i[i] - M_0).reshape(self.dim, 1)
+            B_trans = B.transpose()
+            Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
+        return Sb
+
+    # This function calculates within classes variances
+    def compute_Sw(self, cls, M_i):
+        Sw = np.zeros((self.dim, self.dim))
+        for i in cls:
+            N_i = float(len(cls[i]))
+            W_WT = np.zeros((self.dim, self.dim))
+            for xk in cls[i]:
+                W = (xk - M_i[i])
+                W_WT += np.outer(W, W)
+            Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
+        return Sw
+
+    # Calculating beta and Bi for Sb
+    def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
+        # import pdb
+        # pdb.set_trace()
+        B_i = np.zeros((self.classnum, self.dim))
+        Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
+        for i in data_idx:
+            # pdb.set_trace()
+            # Calculating Bi
+            B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
+        for k in range(self.datanum):
+            for i in data_idx:
+                N_i = float(len(data_idx[i]))
+                if k in lst_idx_all[i]:
+                    beta = (float(1) / N_i) - (float(1) / self.datanum)
+                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
+                else:
+                    beta = -(float(1) / self.datanum)
+                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
+        Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
+        return Sig_beta_B_i_all
+
+
+    # Calculating W_j s separately so we can access all the W_j s anytime
+    def compute_wj(self, data_idx, M_i):
+        W_i = np.zeros((self.datanum, self.dim))
+        for i in data_idx:
+            N_i = float(len(data_idx[i]))
+            for tpl in data_idx[i]:
+                xj = tpl[1]
+                j = tpl[0]
+                W_i[j] = (xj - M_i[i])
+        return W_i
+
+    # Calculating alpha and Wj for Sw
+    def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
+        Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
+        for i in data_idx:
+            N_i = float(len(data_idx[i]))
+            for tpl in data_idx[i]:
+                k = tpl[0]
+                for j in lst_idx_all[i]:
+                    if k == j:
+                        alpha = 1 - (float(1) / N_i)
+                        Sig_alpha_W_i[k] += (alpha * W_i[j])
+                    else:
+                        alpha = 0 - (float(1) / N_i)
+                        Sig_alpha_W_i[k] += (alpha * W_i[j])
+        Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
+        return Sig_alpha_W_i
+
+    # This function calculates log of our prior
+    def lnpdf(self, x):
+        x = x.reshape(self.x_shape)
+        xprim = x.dot(self.vec)
+        x = xprim
+	# print x  
+        cls = self.compute_cls(x)
+        M_0 = np.mean(x, axis=0)
+        M_i = self.compute_Mi(cls)
+        Sb = self.compute_Sb(cls, M_i, M_0)
+        Sw = self.compute_Sw(cls, M_i)
+        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
+        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
+	#print 'SB_inv: ', Sb_inv_N
+        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
+        Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
+        return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
+
+    # This function calculates derivative of the log of prior function
+    def lnpdf_grad(self, x):
+        x = x.reshape(self.x_shape)
+        xprim = x.dot(self.vec)
+        x = xprim 
+	# print x       
+        cls = self.compute_cls(x)
+        M_0 = np.mean(x, axis=0)
+        M_i = self.compute_Mi(cls)
+        Sb = self.compute_Sb(cls, M_i, M_0)
+        Sw = self.compute_Sw(cls, M_i)
+        data_idx = self.compute_indices(x)
+        lst_idx_all = self.compute_listIndices(data_idx)
+        Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
+        W_i = self.compute_wj(data_idx, M_i)
+        Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
+
+        # Calculating inverse of Sb and its transpose and minus
+        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
+        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
+	#print 'SB_inv: ',Sb_inv_N
+        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
+        Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
         Sb_inv_N_trans = np.transpose(Sb_inv_N)
         Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
         Sw_trans = np.transpose(Sw)
@@ -706,7 +1194,9 @@ class DGPLVM(Prior):
         return np.random.rand(n)  # A WRONG implementation
 
     def __str__(self):
-        return 'DGPLVM_prior'
+        return 'DGPLVM_prior_Raq_TTT'
+
+
 
 class HalfT(Prior):
     """
diff --git a/GPy/core/parameterization/transformations.py b/GPy/core/parameterization/transformations.py
index 01a1f44b..830809d6 100644
--- a/GPy/core/parameterization/transformations.py
+++ b/GPy/core/parameterization/transformations.py
@@ -62,7 +62,7 @@ class Transformation(object):
         import matplotlib.pyplot as plt
         from ...plotting.matplot_dep import base_plots
         x = np.linspace(-8,8)
-        base_plots.meanplot(x, self.f(x),axes=axes*args,**kw)
+        base_plots.meanplot(x, self.f(x), *args, ax=axes, **kw)
         axes = plt.gca()
         axes.set_xlabel(xlabel)
         axes.set_ylabel(ylabel)
diff --git a/GPy/testing/rv_transformation_tests.py b/GPy/testing/rv_transformation_tests.py
new file mode 100644
index 00000000..e6b3e3e7
--- /dev/null
+++ b/GPy/testing/rv_transformation_tests.py
@@ -0,0 +1,101 @@
+# Written by Ilias Bilionis
+"""
+Test if hyperparameters in models are properly transformed.
+"""
+
+
+import unittest
+import numpy as np
+import scipy.stats as st
+import GPy
+
+
+class TestModel(GPy.core.Model):
+    """
+    A simple GPy model with one parameter.
+    """
+    def __init__(self):
+        GPy.core.Model.__init__(self, 'test_model')
+        theta = GPy.core.Param('theta', 1.)
+        self.link_parameter(theta)
+
+    def log_likelihood(self):
+        return 0.
+
+
+class RVTransformationTestCase(unittest.TestCase):
+
+    def _test_trans(self, trans):
+        m = TestModel()
+        prior = GPy.priors.LogGaussian(.5, 0.1)
+        m.theta.set_prior(prior)
+        m.theta.unconstrain()
+        m.theta.constrain(trans)
+        # The PDF of the transformed variables
+        p_phi = lambda(phi): np.exp(-m._objective_grads(phi)[0])
+        # To the empirical PDF of:
+        theta_s = prior.rvs(100000)
+        phi_s = trans.finv(theta_s)
+        # which is essentially a kernel density estimation
+        kde = st.gaussian_kde(phi_s)
+        # We will compare the PDF here:
+        phi = np.linspace(phi_s.min(), phi_s.max(), 100)
+        # The transformed PDF of phi should be this:
+        pdf_phi = np.array([p_phi(p) for p in phi])
+        # UNCOMMENT TO SEE GRAPHICAL COMPARISON
+        #import matplotlib.pyplot as plt
+        #fig, ax = plt.subplots()
+        #ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Histogram')
+        #ax.plot(phi, kde(phi), '--', linewidth=2, label='Kernel Density Estimation')
+        #ax.plot(phi, pdf_phi, ':', linewidth=2, label='Transformed PDF')
+        #ax.set_xlabel(r'transformed $\theta$', fontsize=16)
+        #ax.set_ylabel('PDF', fontsize=16)
+        #plt.legend(loc='best')
+        #plt.show(block=True)
+        # END OF PLOT
+        # The following test cannot be very accurate
+        self.assertTrue(np.linalg.norm(pdf_phi - kde(phi)) / np.linalg.norm(kde(phi)) <= 1e-1)
+        # Check the gradients at a few random points
+        for i in xrange(10):
+            m.theta = theta_s[i]
+            self.assertTrue(m.checkgrad(verbose=True))
+
+    def test_Logexp(self):
+        self._test_trans(GPy.constraints.Logexp())
+        self._test_trans(GPy.constraints.Exponent())
+
+
+if __name__ == '__main__':
+    unittest.main()
+    quit()
+    m = TestModel()
+    prior = GPy.priors.LogGaussian(0., .9)
+    m.theta.set_prior(prior)
+
+    # The following should return the PDF in terms of the transformed quantities
+    p_phi = lambda(phi): np.exp(-m._objective_grads(phi)[0])
+
+    # Let's look at the transformation phi = log(exp(theta - 1))
+    trans = GPy.constraints.Exponent()
+    m.theta.constrain(trans)
+    # Plot the transformed probability density
+    phi = np.linspace(-8, 8, 100)
+    fig, ax = plt.subplots()
+    # Let's draw some samples of theta and transform them so that we see
+    # which one is right
+    theta_s = prior.rvs(10000)
+    # Transform it to the new variables
+    phi_s = trans.finv(theta_s)
+    # And draw their histogram
+    ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Empirical')
+    # This is to be compared to the PDF of the model expressed in terms of these new
+    # variables
+    ax.plot(phi, [p_phi(p) for p in phi], label='Transformed PDF', linewidth=2)
+    ax.set_xlim(-3, 10)
+    ax.set_xlabel(r'transformed $\theta$', fontsize=16)
+    ax.set_ylabel('PDF', fontsize=16)
+    plt.legend(loc='best')
+    # Now let's test the gradients
+    m.checkgrad(verbose=True)
+    # And show the plot
+    plt.show(block=True)
diff --git a/ib_tests/test_regression.py b/ib_tests/test_regression.py
index cd9e2943..742948c1 100644
--- a/ib_tests/test_regression.py
+++ b/ib_tests/test_regression.py
@@ -34,3 +34,4 @@ if __name__ == '__main__':
         ax = fig.add_subplot(samples.shape[1], 1, i + 1)
         ax.plot(samples[:, i], linewidth=1.5)
     plt.show(block=True)
+
diff --git a/ib_tests/test_transformation_of_pdf.py b/ib_tests/test_transformation_of_pdf.py
index af152ca8..8da16b21 100644
--- a/ib_tests/test_transformation_of_pdf.py
+++ b/ib_tests/test_transformation_of_pdf.py
@@ -14,44 +14,54 @@ import sys
 import os
 # Make sure we load the GP that is here
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+print 'trying'
 import GPy
+print 'done'
 import matplotlib.pyplot as plt
 import numpy as np
 import scipy.stats as st
 import scipy.integrate as integrate
 
 
-if __name__ == '__main__':
-    p_theta = st.lognorm(.9)
+class TestModel(GPy.core.Model):
+    def __init__(self):
+        GPy.core.Model.__init__(self, 'test_model')
+        theta = GPy.core.Param('theta', 1.)
+        self.link_parameter(theta)
 
-    # Plot the PDF of theta
-    fig, ax = plt.subplots()
-    theta = np.linspace(0.0001, 8., 100)
-    ax.plot(theta, p_theta.pdf(theta), linewidth=2, label='True PDF')
-    ax.set_xlabel(r'$\theta$', fontsize=16)
-    ax.set_ylabel(r'$p(\theta)$', fontsize=16)
-    
-    # Now let's look at the transformation phi = log(exp(theta - 1))
-    t = GPy.constraints.Logexp()
-    t.plot()
+    def log_likelihood(self):
+        return 0.
+
+
+if __name__ == '__main__':
+    m = TestModel()
+    prior = GPy.priors.LogGaussian(0., .9)
+    m.theta.set_prior(prior)
+
+    # The following should return the PDF in terms of the transformed quantities
+    p_phi = lambda(phi): np.exp(-m._objective_grads(phi)[0])
+
+    # Let's look at the transformation phi = log(exp(theta - 1))
+    trans = GPy.constraints.Exponent()
+    m.theta.constrain(trans)
     # Plot the transformed probability density
     phi = np.linspace(-8, 8, 100)
     fig, ax = plt.subplots()
-    ax.plot(phi, p_theta.pdf(t.f(phi)) * t.jacobianfactor(t.f(phi)), linewidth=2,
-            label='Transformed PDF')
-    # Now find the normalization constant for the naive transformation of the
-    # PDF
-    p_phi_prop = lambda(phi): p_theta.pdf(t.f(phi))
-    c = integrate.quad(p_phi_prop, -np.inf, np.inf)[0]
-    p_phi = lambda(phi): p_phi_prop(phi) / c
-    ax.plot(phi, p_phi(phi), '--', linewidth=2, label='Naively transformed PDF')
-    # Now let's draw some samples of theta and transform them so that we see
+    # Let's draw some samples of theta and transform them so that we see
     # which one is right
-    theta_s = p_theta.rvs(100000)
-    phi_s = t.finv(theta_s)
+    theta_s = prior.rvs(10000)
+    # Transform it to the new variables
+    phi_s = trans.finv(theta_s)
+    # And draw their histogram
     ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Empirical')
+    # This is to be compared to the PDF of the model expressed in terms of these new
+    # variables
+    ax.plot(phi, [p_phi(p) for p in phi], label='Transformed PDF', linewidth=2)
     ax.set_xlim(-3, 10)
     ax.set_xlabel(r'transformed $\theta$', fontsize=16)
     ax.set_ylabel('PDF', fontsize=16)
     plt.legend(loc='best')
+    # Now let's test the gradients
+    m.checkgrad(verbose=True)
+    # And show the plot
     plt.show(block=True)