From f5178b95ad3f112de8b594ba7dde1f45a688901a Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Wed, 29 Oct 2014 16:48:52 +0000
Subject: [PATCH] reimplement discriminative prior

---
 GPy/core/parameterization/priors.py | 336 ++++++++++++----------------
 1 file changed, 147 insertions(+), 189 deletions(-)

diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index 4bbef0a3..26bb397a 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 from scipy.special import gammaln, digamma
-from ...util.linalg import pdinv
+from ...util.linalg import pdinv,tdot,backsub_both_sides
 from domains import _REAL, _POSITIVE
 import warnings
 import weakref
@@ -428,201 +428,96 @@ class DGPLVM(Prior):
 
     """
     domain = _REAL
-    # _instances = []
-    # def __new__(cls, mu, sigma): # Singleton:
-    #     if cls._instances:
-    #         cls._instances[:] = [instance for instance in cls._instances if instance()]
-    #         for instance in cls._instances:
-    #             if instance().mu == mu and instance().sigma == sigma:
-    #                 return instance()
-    #     o = super(Prior, cls).__new__(cls, mu, sigma)
-    #     cls._instances.append(weakref.ref(o))
-    #     return cls._instances[-1]()
 
-    def __init__(self, sigma2, lbl, x_shape):
+    def __init__(self, sigma2, label, x_shape, jit=0.):
         self.sigma2 = sigma2
-        # self.x = x
-        self.lbl = lbl
-        self.classnum = lbl.shape[1]
-        self.datanum = lbl.shape[0]
+        self.labels = np.unique(label)
+        self.cls_idx = [np.where(label==l)[0] for l in self.labels]
+        self.classnum = self.labels.shape[0]
+        self.datanum = x_shape[0]
         self.x_shape = x_shape
-        self.dim = x_shape[1]
+        self.cls_ratio = np.array([float(len(idx))/self.datanum for idx in self.cls_idx])
+        self.jit = jit
 
-    def get_class_label(self, y):
-        for idx, v in enumerate(y):
-            if v == 1:
-                return idx
-        return -1
-
-    # This function assigns each data point to its own class
-    # and returns the dictionary which contains the class name and parameters.
-    def compute_cls(self, x):
-        cls = {}
-        # Appending each data point to its proper class
-        for j in xrange(self.datanum):
-            class_label = self.get_class_label(self.lbl[j])
-            if class_label not in cls:
-                cls[class_label] = []
-            cls[class_label].append(x[j])
-        return cls
-
-    # This function computes mean of each class. The mean is calculated through each dimension
-    def compute_Mi(self, cls):
-        M_i = np.zeros((self.classnum, self.dim))
-        for i in cls:
-            # Mean of each class
-            M_i[i] = np.mean(cls[i], axis=0)
-        return M_i
-
-    # Adding data points as tuple to the dictionary so that we can access indices
-    def compute_indices(self, x):
-        data_idx = {}
-        for j in xrange(self.datanum):
-            class_label = self.get_class_label(self.lbl[j])
-            if class_label not in data_idx:
-                data_idx[class_label] = []
-            t = (j, x[j])
-            data_idx[class_label].append(t)
-        return data_idx
-
-    # Adding indices to the list so we can access whole the indices
-    def compute_listIndices(self, data_idx):
-        lst_idx = []
-        lst_idx_all = []
-        for i in data_idx:
-            if len(lst_idx) == 0:
-                pass
-                #Do nothing, because it is the first time list is created so is empty
-            else:
-                lst_idx = []
-            # Here we put indices of each class in to the list called lst_idx_all
-            for m in xrange(len(data_idx[i])):
-                lst_idx.append(data_idx[i][m][0])
-            lst_idx_all.append(lst_idx)
-        return lst_idx_all
-
-    # This function calculates between classes variances
-    def compute_Sb(self, cls, M_i, M_0):
-        Sb = np.zeros((self.dim, self.dim))
-        for i in cls:
-            B = (M_i[i] - M_0).reshape(self.dim, 1)
-            B_trans = B.transpose()
-            Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
-        return Sb
-
-    # This function calculates within classes variances
-    def compute_Sw(self, cls, M_i):
-        Sw = np.zeros((self.dim, self.dim))
-        for i in cls:
-            N_i = float(len(cls[i]))
-            W_WT = np.zeros((self.dim, self.dim))
-            for xk in cls[i]:
-                W = (xk - M_i[i])
-                W_WT += np.outer(W, W)
-            Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
-        return Sw
-
-    # Calculating beta and Bi for Sb
-    def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
-        # import pdb
-        # pdb.set_trace()
-        B_i = np.zeros((self.classnum, self.dim))
-        Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
-        for i in data_idx:
-            # pdb.set_trace()
-            # Calculating Bi
-            B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
-        for k in xrange(self.datanum):
-            for i in data_idx:
-                N_i = float(len(data_idx[i]))
-                if k in lst_idx_all[i]:
-                    beta = (float(1) / N_i) - (float(1) / self.datanum)
-                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
-                else:
-                    beta = -(float(1) / self.datanum)
-                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
-        Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
-        return Sig_beta_B_i_all
-
-
-    # Calculating W_j s separately so we can access all the W_j s anytime
-    def compute_wj(self, data_idx, M_i):
-        W_i = np.zeros((self.datanum, self.dim))
-        for i in data_idx:
-            N_i = float(len(data_idx[i]))
-            for tpl in data_idx[i]:
-                xj = tpl[1]
-                j = tpl[0]
-                W_i[j] = (xj - M_i[i])
-        return W_i
-
-    # Calculating alpha and Wj for Sw
-    def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
-        Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
-        for i in data_idx:
-            N_i = float(len(data_idx[i]))
-            for tpl in data_idx[i]:
-                k = tpl[0]
-                for j in lst_idx_all[i]:
-                    if k == j:
-                        alpha = 1 - (float(1) / N_i)
-                        Sig_alpha_W_i[k] += (alpha * W_i[j])
-                    else:
-                        alpha = 0 - (float(1) / N_i)
-                        Sig_alpha_W_i[k] += (alpha * W_i[j])
-        Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
-        return Sig_alpha_W_i
-
-    # This function calculates log of our prior
+        
+    def _compute_SbSw(self,X):
+        M_0 = X.mean(axis=0)
+        Ms = np.vstack([X[idx].mean(axis=0) for idx in self.cls_idx])
+        
+        tmp = Ms - M_0
+        Sb = np.dot(tmp.T,self.cls_ratio[:,None]*tmp)
+        Sw = np.sum([tdot((X[idx]-Ms[i]).T) for i,idx in enumerate(self.cls_idx)],axis=0)/self.datanum
+        return Sb,Sw
+    
+#     def _compute(self,X):
+#         X = X.reshape(self.x_shape)
+#  
+#         M_0 = X.mean(axis=0)
+#         Ms = np.vstack([X[idx].mean(axis=0) for idx in self.cls_idx])
+#         print Ms
+#          
+#         dMs = Ms - M_0
+#         dX = X.copy()
+#         for i,idx in enumerate(self.cls_idx):
+#             dX[idx] = X[idx]-Ms[i]
+#  
+#         Sb = np.dot(dMs.T,self.cls_ratio[:,None]*dMs)
+# #         Sb = np.identity(self.x_shape[1])
+# #        print Sb
+#         Sw = np.sum([tdot(dX[idx].T) for idx in self.cls_idx],axis=0)/self.datanum
+#          
+#         Swinv,Lw,_,_ = pdinv(Sw+self.jit*np.identity(self.x_shape[1]))
+#         LwinvSbLwinvT = backsub_both_sides(Lw,Sb,transpose='right')
+# #         lnpdf =  -1./(self.sigma2*np.trace(LwinvSbLwinvT))
+#         lnpdf =  np.trace(LwinvSbLwinvT)/self.sigma2
+#          
+#         SwinvSbSwinv = backsub_both_sides(Lw,LwinvSbLwinvT,transpose='left')
+#          
+#         dX = -np.dot(dX,SwinvSbSwinv)
+#         dMs = np.dot(dMs,Swinv)
+#         for i,idx in enumerate(self.cls_idx):
+#             dX[idx] += dMs[i]
+#          
+# #         dX *= 2.*lnpdf*lnpdf*self.sigma2/self.datanum
+#         dX *= 2./self.sigma2/self.datanum
+#         return lnpdf, dX
+        
+    def _compute(self,X):
+        X = X.reshape(self.x_shape)
+ 
+        M_0 = X.mean(axis=0)
+        Ms = np.vstack([X[idx].mean(axis=0) for idx in self.cls_idx])
+         
+        dMs = Ms - M_0
+        dX = X.copy()
+        for i,idx in enumerate(self.cls_idx):
+            dX[idx] = X[idx]-Ms[i]
+ 
+        Sb = np.dot(dMs.T,self.cls_ratio[:,None]*dMs)
+        Sw = np.sum([tdot(dX[idx].T) for idx in self.cls_idx],axis=0)/self.datanum
+         
+        Sbinv,Lb,_,_ = pdinv(Sb+self.jit*np.identity(self.x_shape[1]))
+        LbinvSwLbinvT = backsub_both_sides(Lb,Sw,transpose='right')
+        lnpdf =  -np.trace(LbinvSwLbinvT)/self.sigma2
+         
+        SbinvSwSbinv = backsub_both_sides(Lb,LbinvSwLbinvT,transpose='left')
+         
+        dX = np.dot(dX,Sbinv)
+        dMs = -np.dot(dMs,SbinvSwSbinv)
+        for i,idx in enumerate(self.cls_idx):
+            dX[idx] += dMs[i]
+         
+        dX *= -2./self.sigma2/self.datanum
+         
+        return lnpdf, dX
+        
     def lnpdf(self, x):
-        x = x.reshape(self.x_shape)
-        cls = self.compute_cls(x)
-        M_0 = np.mean(x, axis=0)
-        M_i = self.compute_Mi(cls)
-        Sb = self.compute_Sb(cls, M_i, M_0)
-        Sw = self.compute_Sw(cls, M_i)
-        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
-        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
-        Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
-        return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
+        lnpdf,_ = self._compute(x)
+        return lnpdf
 
-    # This function calculates derivative of the log of prior function
     def lnpdf_grad(self, x):
-        x = x.reshape(self.x_shape)
-        cls = self.compute_cls(x)
-        M_0 = np.mean(x, axis=0)
-        M_i = self.compute_Mi(cls)
-        Sb = self.compute_Sb(cls, M_i, M_0)
-        Sw = self.compute_Sw(cls, M_i)
-        data_idx = self.compute_indices(x)
-        lst_idx_all = self.compute_listIndices(data_idx)
-        Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
-        W_i = self.compute_wj(data_idx, M_i)
-        Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
-
-        # Calculating inverse of Sb and its transpose and minus
-        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
-        # Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
-        Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
-        Sb_inv_N_trans = np.transpose(Sb_inv_N)
-        Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
-        Sw_trans = np.transpose(Sw)
-
-        # Calculating DJ/DXk
-        DJ_Dxk = 2 * (
-            Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
-                Sig_alpha_W_i))
-        # Calculating derivative of the log of the prior
-        DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
-        return DPx_Dx.T
-
-    # def frb(self, x):
-    #     from functools import partial
-    #     from GPy.models import GradientChecker
-    #     f = partial(self.lnpdf)
-    #     df = partial(self.lnpdf_grad)
-    #     grad = GradientChecker(f, df, x, 'X')
-    #     grad.checkgrad(verbose=1)
+        _, dX = self._compute(x)
+        return dX.flat
 
     def rvs(self, n):
         return np.random.rand(n)  # A WRONG implementation
@@ -630,3 +525,66 @@ class DGPLVM(Prior):
     def __str__(self):
         return 'DGPLVM_prior'
 
+    
+class Dis_prior(Prior):
+    """
+    Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
+
+    :param sigma2: constant
+
+    .. Note:: DGPLVM for Classification paper implementation
+
+    """
+    domain = _REAL
+
+    def __init__(self, sigma2, label, x_shape, jit=0.):
+        self.sigma2 = sigma2
+        self.labels = np.unique(label)
+        self.cls_idx = [np.where(label==l)[0] for l in self.labels]
+        self.classnum = self.labels.shape[0]
+        self.datanum = x_shape[0]
+        self.x_shape = x_shape
+        self.cls_ratio = np.array([float(len(idx))/self.datanum for idx in self.cls_idx])
+        self.jit = jit
+#        self.lengthscale = lengthscale
+        self.lengthscale = np.ones(x_shape[1])
+        
+    def _compute(self,X):
+        X = X.reshape(self.x_shape)/self.lengthscale
+ 
+        Ms = np.vstack([X[idx].mean(axis=0) for idx in self.cls_idx])
+         
+        dX = X.copy()
+        for i,idx in enumerate(self.cls_idx):
+            dX[idx] = X[idx]-Ms[i]
+        
+        dMs = (Ms[None,:,:]-Ms[:,None,:])
+        dMs_c = dMs.sum(axis=0)
+        Ms_num = (self.classnum-1)*self.classnum/2
+        
+        nom = np.square(dX).sum()/self.datanum
+        denom = np.square(dMs).sum()/(2*Ms_num)
+        
+        lnpdf =  -nom/(denom*self.sigma2)
+         
+        dX *= -1./(denom*self.datanum)
+        for i,idx in enumerate(self.cls_idx):
+            dX[idx] += nom/(denom*denom*Ms_num*len(idx))*dMs_c[i]
+         
+        dX *= 2./(self.sigma2*self.lengthscale)
+         
+        return lnpdf, dX
+        
+    def lnpdf(self, x):
+        lnpdf,_ = self._compute(x)
+        return lnpdf
+
+    def lnpdf_grad(self, x):
+        _, dX = self._compute(x)
+        return dX.flat
+
+    def rvs(self, n):
+        return np.random.rand(n)  # A WRONG implementation
+
+    def __str__(self):
+        return 'DGPLVM_prior'