From e42937ccaf218209ea60d483fe86d2e78e9baba0 Mon Sep 17 00:00:00 2001
From: frb-yousefi <fariba.yousefi@gmail.com>
Date: Thu, 12 Mar 2015 11:01:55 +0000
Subject: [PATCH 1/6] Descrimative BGPLVM prior with lambda added

---
 GPy/core/parameterization/priors.py | 298 ++++++++++++++++++++++++++++
 1 file changed, 298 insertions(+)

diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index 20a78691..93784d08 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -712,6 +712,304 @@ class DGPLVM(Prior):
         return 'DGPLVM_prior_Raq'
 
 
+# ******************************************
+
+class DGPLVM_Lamda(Prior):
+    """
+    Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
+
+    :param sigma2: constant
+
+    .. Note:: DGPLVM for Classification paper implementation
+
+    """
+    domain = _REAL
+    # _instances = []
+    # def __new__(cls, mu, sigma): # Singleton:
+    #     if cls._instances:
+    #         cls._instances[:] = [instance for instance in cls._instances if instance()]
+    #         for instance in cls._instances:
+    #             if instance().mu == mu and instance().sigma == sigma:
+    #                 return instance()
+    #     o = super(Prior, cls).__new__(cls, mu, sigma)
+    #     cls._instances.append(weakref.ref(o))
+    #     return cls._instances[-1]()
+
+    def __init__(self, sigma2, lbl, x_shape, lamda):
+        self.sigma2 = sigma2
+        # self.x = x
+        self.lbl = lbl
+	self.lamda = lamda
+        self.classnum = lbl.shape[1]
+        self.datanum = lbl.shape[0]
+        self.x_shape = x_shape
+        self.dim = x_shape[1]
+	#self.lamda = Param('lamda', np.diag(lamda))
+        #self.link_parameter(self.lamda)
+
+    def get_class_label(self, y):
+        for idx, v in enumerate(y):
+            if v == 1:
+                return idx
+        return -1
+
+    # This function assigns each data point to its own class
+    # and returns the dictionary which contains the class name and parameters.
+    def compute_cls(self, x):
+        cls = {}
+        # Appending each data point to its proper class
+        for j in xrange(self.datanum):
+            class_label = self.get_class_label(self.lbl[j])
+            if class_label not in cls:
+                cls[class_label] = []
+            cls[class_label].append(x[j])
+        return cls
+
+    # This function computes mean of each class. The mean is calculated through each dimension
+    def compute_Mi(self, cls):
+        M_i = np.zeros((self.classnum, self.dim))
+        for i in cls:
+            # Mean of each class
+	    class_i = cls[i]
+            M_i[i] = np.mean(class_i, axis=0)
+        return M_i
+
+    # Adding data points as tuple to the dictionary so that we can access indices
+    def compute_indices(self, x):
+        data_idx = {}
+        for j in xrange(self.datanum):
+            class_label = self.get_class_label(self.lbl[j])
+            if class_label not in data_idx:
+                data_idx[class_label] = []
+            t = (j, x[j])
+            data_idx[class_label].append(t)
+        return data_idx
+
+    # Adding indices to the list so we can access whole the indices
+    def compute_listIndices(self, data_idx):
+        lst_idx = []
+        lst_idx_all = []
+        for i in data_idx:
+            if len(lst_idx) == 0:
+                pass
+                #Do nothing, because it is the first time list is created so is empty
+            else:
+                lst_idx = []
+            # Here we put indices of each class in to the list called lst_idx_all
+            for m in xrange(len(data_idx[i])):
+                lst_idx.append(data_idx[i][m][0])
+            lst_idx_all.append(lst_idx)
+        return lst_idx_all
+
+    # This function calculates between classes variances
+    def compute_Sb(self, cls, M_i, M_0):
+        Sb = np.zeros((self.dim, self.dim))
+        for i in cls:
+            B = (M_i[i] - M_0).reshape(self.dim, 1)
+            B_trans = B.transpose()
+            Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
+        return Sb
+
+    # This function calculates within classes variances
+    def compute_Sw(self, cls, M_i):
+        Sw = np.zeros((self.dim, self.dim))
+        for i in cls:
+            N_i = float(len(cls[i]))
+            W_WT = np.zeros((self.dim, self.dim))
+            for xk in cls[i]:
+                W = (xk - M_i[i])
+                W_WT += np.outer(W, W)
+            Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
+        return Sw
+
+    # Calculating beta and Bi for Sb
+    def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
+        import pdb
+        # pdb.set_trace()
+        B_i = np.zeros((self.classnum, self.dim))
+        Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
+        for i in data_idx:
+            # pdb.set_trace()
+            # Calculating Bi
+            B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
+        for k in xrange(self.datanum):
+            for i in data_idx:
+                N_i = float(len(data_idx[i]))
+                if k in lst_idx_all[i]:
+                    beta = (float(1) / N_i) - (float(1) / self.datanum)
+                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
+                else:
+                    beta = -(float(1) / self.datanum)
+                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
+        Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
+        return Sig_beta_B_i_all
+
+
+    # Calculating W_j s separately so we can access all the W_j s anytime
+    def compute_wj(self, data_idx, M_i):
+        W_i = np.zeros((self.datanum, self.dim))
+        for i in data_idx:
+            N_i = float(len(data_idx[i]))
+            for tpl in data_idx[i]:
+                xj = tpl[1]
+                j = tpl[0]
+                W_i[j] = (xj - M_i[i])
+        return W_i
+
+    # Calculating alpha and Wj for Sw
+    def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
+        Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
+        for i in data_idx:
+            N_i = float(len(data_idx[i]))
+            for tpl in data_idx[i]:
+                k = tpl[0]
+                for j in lst_idx_all[i]:
+                    if k == j:
+                        alpha = 1 - (float(1) / N_i)
+                        Sig_alpha_W_i[k] += (alpha * W_i[j])
+                    else:
+                        alpha = 0 - (float(1) / N_i)
+                        Sig_alpha_W_i[k] += (alpha * W_i[j])
+        Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
+        return Sig_alpha_W_i
+
+    # This function calculates log of our prior
+    def lnpdf(self, x):
+	#xprime = x.dot(np.diagflat(self.lambda))
+        x = x.reshape(self.x_shape)
+        cls = self.compute_cls(x)
+        M_0 = np.mean(x, axis=0)
+        M_i = self.compute_Mi(cls)
+        Sb = self.compute_Sb(cls, M_i, M_0)
+        Sw = self.compute_Sw(cls, M_i)
+        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
+        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
+        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
+	Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
+        return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
+
+    # This function calculates derivative of the log of prior function
+    def lnpdf_grad(self, x):
+	#xprime = x.dot(np.diagflat(self.lambda))
+	# pdb.set_trace()
+	#print 'before:', x.shape
+        x = x.reshape(self.x_shape)
+	#print 'after:', x.shape
+        cls = self.compute_cls(x)
+        M_0 = np.mean(x, axis=0)
+        M_i = self.compute_Mi(cls)
+	# print M_i.shape
+        Sb = self.compute_Sb(cls, M_i, M_0)
+	# print Sb.shape
+        Sw = self.compute_Sw(cls, M_i)
+	# print Sw.shape
+        data_idx = self.compute_indices(x)
+        lst_idx_all = self.compute_listIndices(data_idx)
+        Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
+        W_i = self.compute_wj(data_idx, M_i)
+        Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
+
+        # Calculating inverse of Sb and its transpose and minus
+        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
+        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
+        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
+	Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
+        Sb_inv_N_trans = np.transpose(Sb_inv_N)
+        Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
+        Sw_trans = np.transpose(Sw)
+
+        # Calculating DJ/DXk
+	# pdb.set_trace()
+        DJ_Dxk = 2 * (
+            Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
+                Sig_alpha_W_i))
+	#print 'DJ_Dxk:', DJ_Dxk.shape
+        # Calculating derivative of the log of the prior
+        DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
+	#print 'x.shape:', x.shape 	
+	#print 'DPx_Dx.shape:', DPx_Dx.shape
+
+
+	# DPxprim_Dlamda = DPx_Dx.dot(x)
+	# #DPxprim_Dlamda = (x.T).dot(DPx_Dx.T)
+	# # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
+	# DPxprim_Dlamda = DPxprim_Dlamda.T
+	# print 'DPxprim_Dlamda:', DPxprim_Dlamda.shape
+	# print DPxprim_Dlamda
+	# return DPxprim_Dlamda
+
+
+        DPxprim_Dx = self.lamda.dot(DPx_Dx)
+        print 'before :DPxprim_Dx:', DPxprim_Dx.shape
+        # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
+        DPxprim_Dx = DPxprim_Dx.T
+        print 'after: DPxprim_Dx:', DPxprim_Dx.shape
+        #return DPxprim_Dx
+    
+    	DPxprim_Dlamda = DPx_Dx.dot(x)
+    	#DPxprim_Dlamda = (x.T).dot(DPx_Dx.T)
+    	# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
+    	DPxprim_Dlamda = DPxprim_Dlamda.T
+    	print 'DPxprim_Dlamda:', DPxprim_Dlamda.shape
+    	print DPxprim_Dlamda
+    	c = DPxprim_Dx.dot(DPxprim_Dlamda)
+    	print 'c.shape:', c.shape
+
+	#self.lamda.gradient = np.diag(DPxprim_Dlamda)
+    	return c
+
+
+ #    def lnpdf_grad(self, x):
+	# x = x.reshape(self.x_shape)
+ #    	cls = self.compute_cls(x)
+ #    	M_0 = np.mean(x, axis=0)
+ #    	M_i = self.compute_Mi(cls)
+ #    	Sb = self.compute_Sb(cls, M_i, M_0)
+ #    	Sw = self.compute_Sw(cls, M_i)
+ #    	data_idx = self.compute_indices(x)
+ #    	lst_idx_all = self.compute_listIndices(data_idx)
+ #    	Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
+ #    	W_i = self.compute_wj(data_idx, M_i)
+ #    	Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
+
+ #    	# Calculating inverse of Sb and its transpose and minus
+ #    	Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
+ #    	Sb_inv_N_trans = np.transpose(Sb_inv_N)
+ #    	Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
+ #    	Sw_trans = np.transpose(Sw)
+
+ #    	# Calculating DJ/DXk
+ #    	DJ_Dxk = 2 * (
+	#     Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
+	#     Sig_alpha_W_i))
+ #    	print 'DJ_Dxk:', DJ_Dxk.shape
+ #    	# Calculating derivative of the log of the prior
+ #   	DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
+ #    	print 'x.shape:', x.shape 
+ #    	print 'DPx_Dx.shape:', DPx_Dx.shape
+ #    	DPxprim_Dx = self.lamda.dot(DPx_Dx)
+ #    	print 'before :DPxprim_Dx:', DPxprim_Dx.shape
+ #    	# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
+ #    	DPxprim_Dx = DPxprim_Dx.T
+ #    	print 'after: DPxprim_Dx:', DPxprim_Dx.shape
+ #    	return DPxprim_Dx
+
+
+    # def frb(self, x):
+    #     from functools import partial
+    #     from GPy.models import GradientChecker
+    #     f = partial(self.lnpdf)
+    #     df = partial(self.lnpdf_grad)
+    #     grad = GradientChecker(f, df, x, 'X')
+    #     grad.checkgrad(verbose=1)
+
+    def rvs(self, n):
+        return np.random.rand(n)  # A WRONG implementation
+
+    def __str__(self):
+        return 'DGPLVM_prior_Raq_Lamda'
+
+# ******************************************
 
 class DGPLVM_T(Prior):
     """

From e7650c8a90de31e3d4877fcbfbc217a9b1202ebf Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Mon, 20 Apr 2015 16:02:59 +0200
Subject: [PATCH 2/6] [sparse gp] memory overflow with big data, iterating over
 dimensions now

---
 GPy/core/sparse_gp.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 35644bfe..624a8f9c 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -10,6 +10,7 @@ from .parameterization.variational import VariationalPosterior, NormalPosterior
 from ..util.linalg import mdot
 
 import logging
+import itertools
 logger = logging.getLogger("sparse gp")
 
 class SparseGP(GP):
@@ -135,7 +136,13 @@ class SparseGP(GP):
                 var = var
             else:
                 Kxx = kern.Kdiag(Xnew)
-                var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
+                if self.posterior.woodbury_inv.ndim == 2:
+                    var = Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0)
+                elif self.posterior.woodbury_inv.ndim == 3:
+                    var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
+                    for i in range(var.shape[1]):
+                        var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
+                var = var
             #add in the mean function
             if self.mean_function is not None:
                 mu += self.mean_function.f(Xnew)

From 440d7b64786f6beed299adde5812d66aec662124 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Mon, 20 Apr 2015 16:03:39 +0200
Subject: [PATCH 3/6] [basis funcs] linear slope identifiability higher,
 symmetry plus true linear effect

---
 GPy/kern/_src/basis_funcs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/kern/_src/basis_funcs.py b/GPy/kern/_src/basis_funcs.py
index b6a95354..1b300661 100644
--- a/GPy/kern/_src/basis_funcs.py
+++ b/GPy/kern/_src/basis_funcs.py
@@ -81,7 +81,7 @@ class LinearSlopeBasisFuncKernel(BasisFuncKernel):
     def phi(self, X):
         phi = np.where(X < self.start, self.start, X)
         phi = np.where(phi > self.stop, self.stop, phi)
-        return ((phi-self.start)/(self.stop-self.start))-.5
+        return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.
         return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5
     
 class ChangePointBasisFuncKernel(BasisFuncKernel):

From 17fa8a0ada056d87e36479f52af650a435301e95 Mon Sep 17 00:00:00 2001
From: frb-yousefi <fariba.yousefi@gmail.com>
Date: Mon, 20 Apr 2015 15:51:28 +0100
Subject: [PATCH 4/6] prior with Lambda

---
 GPy/core/parameterization/priors.py | 113 +++++++++-------------------
 GPy/models/__init__.py              |   1 +
 GPy/models/dpgplvm.py               |  19 +++++
 3 files changed, 55 insertions(+), 78 deletions(-)
 create mode 100644 GPy/models/dpgplvm.py

diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index 93784d08..4a6b93e3 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -714,7 +714,9 @@ class DGPLVM(Prior):
 
 # ******************************************
 
-class DGPLVM_Lamda(Prior):
+from parameterized import Parameterized
+from .. import Param
+class DGPLVM_Lamda(Prior, Parameterized):
     """
     Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
 
@@ -735,7 +737,8 @@ class DGPLVM_Lamda(Prior):
     #     cls._instances.append(weakref.ref(o))
     #     return cls._instances[-1]()
 
-    def __init__(self, sigma2, lbl, x_shape, lamda):
+    def __init__(self, sigma2, lbl, x_shape, lamda, name='DP_prior'):
+        super(DGPLVM_Lamda, self).__init__(name=name)
         self.sigma2 = sigma2
         # self.x = x
         self.lbl = lbl
@@ -744,8 +747,8 @@ class DGPLVM_Lamda(Prior):
         self.datanum = lbl.shape[0]
         self.x_shape = x_shape
         self.dim = x_shape[1]
-	#self.lamda = Param('lamda', np.diag(lamda))
-        #self.link_parameter(self.lamda)
+	self.lamda = Param('lamda', np.diag(lamda))
+        self.link_parameter(self.lamda)
 
     def get_class_label(self, y):
         for idx, v in enumerate(y):
@@ -875,8 +878,14 @@ class DGPLVM_Lamda(Prior):
 
     # This function calculates log of our prior
     def lnpdf(self, x):
-	#xprime = x.dot(np.diagflat(self.lambda))
         x = x.reshape(self.x_shape)
+	
+	#!!!!!!!!!!!!!!!!!!!!!!!!!!!
+	#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()	
+
+	xprime = x.dot(np.diagflat(self.lamda))
+	x = xprime
+	# print x
         cls = self.compute_cls(x)
         M_0 = np.mean(x, axis=0)
         M_i = self.compute_Mi(cls)
@@ -890,19 +899,15 @@ class DGPLVM_Lamda(Prior):
 
     # This function calculates derivative of the log of prior function
     def lnpdf_grad(self, x):
-	#xprime = x.dot(np.diagflat(self.lambda))
-	# pdb.set_trace()
-	#print 'before:', x.shape
         x = x.reshape(self.x_shape)
-	#print 'after:', x.shape
+	xprime = x.dot(np.diagflat(self.lamda))
+	x = xprime
+	# print x
         cls = self.compute_cls(x)
         M_0 = np.mean(x, axis=0)
         M_i = self.compute_Mi(cls)
-	# print M_i.shape
         Sb = self.compute_Sb(cls, M_i, M_0)
-	# print Sb.shape
         Sw = self.compute_Sw(cls, M_i)
-	# print Sw.shape
         data_idx = self.compute_indices(x)
         lst_idx_all = self.compute_listIndices(data_idx)
         Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
@@ -919,80 +924,25 @@ class DGPLVM_Lamda(Prior):
         Sw_trans = np.transpose(Sw)
 
         # Calculating DJ/DXk
-	# pdb.set_trace()
         DJ_Dxk = 2 * (
             Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
                 Sig_alpha_W_i))
-	#print 'DJ_Dxk:', DJ_Dxk.shape
         # Calculating derivative of the log of the prior
         DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
-	#print 'x.shape:', x.shape 	
-	#print 'DPx_Dx.shape:', DPx_Dx.shape
 
+        DPxprim_Dx = np.diagflat(self.lamda).dot(DPx_Dx)
 
-	# DPxprim_Dlamda = DPx_Dx.dot(x)
-	# #DPxprim_Dlamda = (x.T).dot(DPx_Dx.T)
-	# # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
-	# DPxprim_Dlamda = DPxprim_Dlamda.T
-	# print 'DPxprim_Dlamda:', DPxprim_Dlamda.shape
-	# print DPxprim_Dlamda
-	# return DPxprim_Dlamda
-
-
-        DPxprim_Dx = self.lamda.dot(DPx_Dx)
-        print 'before :DPxprim_Dx:', DPxprim_Dx.shape
         # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
         DPxprim_Dx = DPxprim_Dx.T
-        print 'after: DPxprim_Dx:', DPxprim_Dx.shape
-        #return DPxprim_Dx
     
     	DPxprim_Dlamda = DPx_Dx.dot(x)
-    	#DPxprim_Dlamda = (x.T).dot(DPx_Dx.T)
+
     	# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
     	DPxprim_Dlamda = DPxprim_Dlamda.T
-    	print 'DPxprim_Dlamda:', DPxprim_Dlamda.shape
-    	print DPxprim_Dlamda
-    	c = DPxprim_Dx.dot(DPxprim_Dlamda)
-    	print 'c.shape:', c.shape
 
-	#self.lamda.gradient = np.diag(DPxprim_Dlamda)
-    	return c
-
-
- #    def lnpdf_grad(self, x):
-	# x = x.reshape(self.x_shape)
- #    	cls = self.compute_cls(x)
- #    	M_0 = np.mean(x, axis=0)
- #    	M_i = self.compute_Mi(cls)
- #    	Sb = self.compute_Sb(cls, M_i, M_0)
- #    	Sw = self.compute_Sw(cls, M_i)
- #    	data_idx = self.compute_indices(x)
- #    	lst_idx_all = self.compute_listIndices(data_idx)
- #    	Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
- #    	W_i = self.compute_wj(data_idx, M_i)
- #    	Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
-
- #    	# Calculating inverse of Sb and its transpose and minus
- #    	Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
- #    	Sb_inv_N_trans = np.transpose(Sb_inv_N)
- #    	Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
- #    	Sw_trans = np.transpose(Sw)
-
- #    	# Calculating DJ/DXk
- #    	DJ_Dxk = 2 * (
-	#     Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
-	#     Sig_alpha_W_i))
- #    	print 'DJ_Dxk:', DJ_Dxk.shape
- #    	# Calculating derivative of the log of the prior
- #   	DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
- #    	print 'x.shape:', x.shape 
- #    	print 'DPx_Dx.shape:', DPx_Dx.shape
- #    	DPxprim_Dx = self.lamda.dot(DPx_Dx)
- #    	print 'before :DPxprim_Dx:', DPxprim_Dx.shape
- #    	# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
- #    	DPxprim_Dx = DPxprim_Dx.T
- #    	print 'after: DPxprim_Dx:', DPxprim_Dx.shape
- #    	return DPxprim_Dx
+	self.lamda.gradient = np.diag(DPxprim_Dlamda)
+	# print DPxprim_Dx
+    	return DPxprim_Dx
 
 
     # def frb(self, x):
@@ -1062,11 +1012,12 @@ class DGPLVM_T(Prior):
         return cls
 
     # This function computes mean of each class. The mean is calculated through each dimension
-    def compute_Mi(self, cls, vec):
+    def compute_Mi(self, cls):
         M_i = np.zeros((self.classnum, self.dim))
         for i in cls:
             # Mean of each class
-	    class_i = np.multiply(cls[i],vec)
+	    # class_i = np.multiply(cls[i],vec)
+            class_i = cls[i]
             M_i[i] = np.mean(class_i, axis=0)
         return M_i
 
@@ -1172,9 +1123,12 @@ class DGPLVM_T(Prior):
     # This function calculates log of our prior
     def lnpdf(self, x):
         x = x.reshape(self.x_shape)
+	xprim = x.dot(self.vec)
+	x = xprim
+	# print x  
         cls = self.compute_cls(x)
         M_0 = np.mean(x, axis=0)
-        M_i = self.compute_Mi(cls, self.vec)
+        M_i = self.compute_Mi(cls)
         Sb = self.compute_Sb(cls, M_i, M_0)
         Sw = self.compute_Sw(cls, M_i)
         # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
@@ -1186,10 +1140,13 @@ class DGPLVM_T(Prior):
 
     # This function calculates derivative of the log of prior function
     def lnpdf_grad(self, x):
-        x = x.reshape(self.x_shape)
-        cls = self.compute_cls(x)
+	x = x.reshape(self.x_shape)
+	xprim = x.dot(self.vec)
+	x = xprim 
+	# print x       
+	cls = self.compute_cls(x)
         M_0 = np.mean(x, axis=0)
-        M_i = self.compute_Mi(cls, self.vec)
+        M_i = self.compute_Mi(cls)
         Sb = self.compute_Sb(cls, M_i, M_0)
         Sw = self.compute_Sw(cls, M_i)
         data_idx = self.compute_indices(x)
diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index c6abb5de..3d1d8a9c 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -21,3 +21,4 @@ from gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression
 from gp_var_gauss import GPVariationalGaussianApproximation
 from one_vs_all_classification import OneVsAllClassification
 from one_vs_all_sparse_classification import OneVsAllSparseClassification
+from dpgplvm import DPBayesianGPLVM
diff --git a/GPy/models/dpgplvm.py b/GPy/models/dpgplvm.py
new file mode 100644
index 00000000..1c5dccd0
--- /dev/null
+++ b/GPy/models/dpgplvm.py
@@ -0,0 +1,19 @@
+# Copyright (c) 2015 the GPy Austhors (see AUTHORS.txt)
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+from .. import kern
+from bayesian_gplvm import BayesianGPLVM
+from ..core.parameterization.variational import NormalPosterior, NormalPrior
+
+class DPBayesianGPLVM(BayesianGPLVM):
+    """
+    Bayesian Gaussian Process Latent Variable Model with Descriminative prior
+    """
+    def __init__(self, Y, input_dim, X_prior, X=None, X_variance=None, init='PCA', num_inducing=10,
+                 Z=None, kernel=None, inference_method=None, likelihood=None,
+                 name='bayesian gplvm', mpi_comm=None, normalizer=None,
+                 missing_data=False, stochastic=False, batchsize=1):
+        super(DPBayesianGPLVM,self).__init__(Y=Y, input_dim=input_dim, X=X, X_variance=X_variance, init=init, 					num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, mpi_comm=mpi_comm, normalizer=normalizer, missing_data=missing_data, stochastic=stochastic, batchsize=batchsize, name='dp bayesian gplvm')
+	self.X.mean.set_prior(X_prior)
+        self.link_parameter(X_prior)

From d428465e08eb62077e7f470f88413fd0fb04a478 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Tue, 21 Apr 2015 11:18:31 +0200
Subject: [PATCH 5/6] [basisfuncs] updated kernel to better reflect linear
 trends and added ARD support

---
 GPy/kern/_src/basis_funcs.py | 77 +++++++++++++++++++++++++++---------
 GPy/kern/_src/periodic.py    |  2 -
 2 files changed, 58 insertions(+), 21 deletions(-)

diff --git a/GPy/kern/_src/basis_funcs.py b/GPy/kern/_src/basis_funcs.py
index 1b300661..af4a7188 100644
--- a/GPy/kern/_src/basis_funcs.py
+++ b/GPy/kern/_src/basis_funcs.py
@@ -5,33 +5,59 @@ from ...core.parameterization.param import Param
 from ...core.parameterization.transformations import Logexp
 import numpy as np
 from ...util.caching import Cache_this
-from ...util.linalg import tdot
+from ...util.linalg import tdot, mdot
 
 class BasisFuncKernel(Kern):
-    def __init__(self, input_dim, variance=1., active_dims=None, name='basis func kernel'):
+    def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'):
         """
         Abstract superclass for kernels with explicit basis functions for use in GPy.
         
         This class does NOT automatically add an offset to the design matrix phi!
         """
         super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
+        self.ARD = ARD
+        if self.ARD:
+            phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim)))
+            variance = variance * np.ones(phi_test.shape[1])
+        else:
+            variance = np.array(variance)
         self.variance = Param('variance', variance, Logexp())
         self.link_parameter(self.variance)
     
+    def parameters_changed(self):
+        self.alpha = np.sqrt(self.variance)
+        #self.beta = 1./self.variance
+    
+    @Cache_this(limit=3, ignore_args=())
     def phi(self, X):
-        raise NotImplementedError('Overwrite this phi function, which maps the input X into the higher dimensional space and forms the design matrix Phi')
+        return self._phi(X)
+
+    def _phi(self, X):
+        raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi')
         
     def K(self, X, X2=None):
-        return self.variance * self._K(X, X2)
+        return self._K(X, X2)
 
     def Kdiag(self, X, X2=None):
-        return self.variance * np.diag(self._K(X, X2))
+        return np.diag(self._K(X, X2))
     
     def update_gradients_full(self, dL_dK, X, X2=None):
-        self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2))
+        if self.ARD:
+            phi1 = self.phi(X)
+            if X2 is None or X is X2:
+                self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi1)
+            else:
+                phi2 = self.phi(X2)
+                self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2)
+        else:
+            self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2))
         
     def update_gradients_diag(self, dL_dKdiag, X):
-        self.variance.gradient = np.einsum('i,i', dL_dKdiag, self._K(X))
+        if self.ARD:
+            phi1 = self.phi(X)
+            self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1)
+        else:
+            self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X))
         
     def concatenate_offset(self, X):
         return np.c_[np.ones((X.shape[0], 1)), X]
@@ -52,19 +78,19 @@ class BasisFuncKernel(Kern):
                 posterior = self._highest_parent_.posterior
             except NameError:
                 raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference")
-        phi = self.phi(X)
-        return self.variance * phi.T.dot(posterior.woodbury_vector), self.variance * (1 - self.variance * phi.T.dot(posterior.woodbury_inv.dot(phi)))
+        phi_alpha = self.phi(X) * self.variance
+        return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha))
     
     @Cache_this(limit=3, ignore_args=())
     def _K(self, X, X2):
         if X2 is None or X is X2:
-            phi = self.phi(X)
+            phi = self.phi(X) * self.alpha
             if phi.ndim != 2:
                 phi = phi[:, None]
             return tdot(phi)
         else:
-            phi1 = self.phi(X)
-            phi2 = self.phi(X2)
+            phi1 = self.phi(X) * self.alpha
+            phi2 = self.phi(X2) * self.alpha
             if phi1.ndim != 2:
                 phi1 = phi1[:, None]
                 phi2 = phi2[:, None]
@@ -72,30 +98,43 @@ class BasisFuncKernel(Kern):
         
         
 class LinearSlopeBasisFuncKernel(BasisFuncKernel):
-    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, name='linear_segment'):
-        super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name)
+    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'):
+        """
+        A linear segment transformation. The segments start at start, \
+        are then linear to stop and constant again. The segments are 
+        normalized, so that they have exactly as much mass above 
+        as below the origin. 
+        
+        Start and stop can be tuples or lists of starts and stops. 
+        Behaviour of start stop is as np.where(X<start) would do.
+        """
+        
         self.start = np.array(start)
         self.stop = np.array(stop)
+        super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
     
     @Cache_this(limit=3, ignore_args=())
-    def phi(self, X):
+    def _phi(self, X):
         phi = np.where(X < self.start, self.start, X)
         phi = np.where(phi > self.stop, self.stop, phi)
         return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.
         return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5
     
 class ChangePointBasisFuncKernel(BasisFuncKernel):
-    def __init__(self, input_dim, changepoint, variance=1., active_dims=None, name='changepoint'):
-        super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name)
+    def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'):
         self.changepoint = changepoint
+        super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
     
     @Cache_this(limit=3, ignore_args=())
-    def phi(self, X):
+    def _phi(self, X):
         return self.concatenate_offset(np.where((X < self.changepoint), -1, 1))
 
 class DomainKernel(LinearSlopeBasisFuncKernel):
+    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'):
+        super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name)
+    
     @Cache_this(limit=3, ignore_args=())
-    def phi(self, X):
+    def _phi(self, X):
         phi = np.where((X>self.start)*(X<self.stop), 1., 0.)
         return phi#((phi-self.start)/(self.stop-self.start))-.5
         return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5
diff --git a/GPy/kern/_src/periodic.py b/GPy/kern/_src/periodic.py
index 23818007..014909ef 100644
--- a/GPy/kern/_src/periodic.py
+++ b/GPy/kern/_src/periodic.py
@@ -68,8 +68,6 @@ class Periodic(Kern):
         return np.diag(self.K(X))
 
 
-
-
 class PeriodicExponential(Periodic):
     """
     Kernel of the periodic subspace (up to a given frequency) of a exponential

From 9b54f831b338126ad1090ecbff43a9c664b558f0 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Tue, 21 Apr 2015 13:33:09 +0200
Subject: [PATCH 6/6] [basisfunckern] gradients for non ard adjusted

---
 GPy/kern/_src/basis_funcs.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/GPy/kern/_src/basis_funcs.py b/GPy/kern/_src/basis_funcs.py
index af4a7188..5225ff10 100644
--- a/GPy/kern/_src/basis_funcs.py
+++ b/GPy/kern/_src/basis_funcs.py
@@ -26,7 +26,7 @@ class BasisFuncKernel(Kern):
     
     def parameters_changed(self):
         self.alpha = np.sqrt(self.variance)
-        #self.beta = 1./self.variance
+        self.beta = 1./self.variance
     
     @Cache_this(limit=3, ignore_args=())
     def phi(self, X):
@@ -50,14 +50,14 @@ class BasisFuncKernel(Kern):
                 phi2 = self.phi(X2)
                 self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2)
         else:
-            self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2))
+            self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) * self.beta
         
     def update_gradients_diag(self, dL_dKdiag, X):
         if self.ARD:
             phi1 = self.phi(X)
             self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1)
         else:
-            self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X))
+            self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta
         
     def concatenate_offset(self, X):
         return np.c_[np.ones((X.shape[0], 1)), X]
@@ -118,7 +118,6 @@ class LinearSlopeBasisFuncKernel(BasisFuncKernel):
         phi = np.where(X < self.start, self.start, X)
         phi = np.where(phi > self.stop, self.stop, phi)
         return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.
-        return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5
     
 class ChangePointBasisFuncKernel(BasisFuncKernel):
     def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'):
@@ -127,7 +126,7 @@ class ChangePointBasisFuncKernel(BasisFuncKernel):
     
     @Cache_this(limit=3, ignore_args=())
     def _phi(self, X):
-        return self.concatenate_offset(np.where((X < self.changepoint), -1, 1))
+        return np.where((X < self.changepoint), -1, 1)
 
 class DomainKernel(LinearSlopeBasisFuncKernel):
     def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'):
@@ -135,6 +134,5 @@ class DomainKernel(LinearSlopeBasisFuncKernel):
     
     @Cache_this(limit=3, ignore_args=())
     def _phi(self, X):
-        phi = np.where((X>self.start)*(X<self.stop), 1., 0.)
+        phi = np.where((X>self.start)*(X<self.stop), 1, 0)
         return phi#((phi-self.start)/(self.stop-self.start))-.5
-        return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5