From f5178b95ad3f112de8b594ba7dde1f45a688901a Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Wed, 29 Oct 2014 16:48:52 +0000 Subject: [PATCH] reimplement discriminative prior --- GPy/core/parameterization/priors.py | 336 ++++++++++++---------------- 1 file changed, 147 insertions(+), 189 deletions(-) diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py index 4bbef0a3..26bb397a 100644 --- a/GPy/core/parameterization/priors.py +++ b/GPy/core/parameterization/priors.py @@ -4,7 +4,7 @@ import numpy as np from scipy.special import gammaln, digamma -from ...util.linalg import pdinv +from ...util.linalg import pdinv,tdot,backsub_both_sides from domains import _REAL, _POSITIVE import warnings import weakref @@ -428,201 +428,96 @@ class DGPLVM(Prior): """ domain = _REAL - # _instances = [] - # def __new__(cls, mu, sigma): # Singleton: - # if cls._instances: - # cls._instances[:] = [instance for instance in cls._instances if instance()] - # for instance in cls._instances: - # if instance().mu == mu and instance().sigma == sigma: - # return instance() - # o = super(Prior, cls).__new__(cls, mu, sigma) - # cls._instances.append(weakref.ref(o)) - # return cls._instances[-1]() - def __init__(self, sigma2, lbl, x_shape): + def __init__(self, sigma2, label, x_shape, jit=0.): self.sigma2 = sigma2 - # self.x = x - self.lbl = lbl - self.classnum = lbl.shape[1] - self.datanum = lbl.shape[0] + self.labels = np.unique(label) + self.cls_idx = [np.where(label==l)[0] for l in self.labels] + self.classnum = self.labels.shape[0] + self.datanum = x_shape[0] self.x_shape = x_shape - self.dim = x_shape[1] + self.cls_ratio = np.array([float(len(idx))/self.datanum for idx in self.cls_idx]) + self.jit = jit - def get_class_label(self, y): - for idx, v in enumerate(y): - if v == 1: - return idx - return -1 - - # This function assigns each data point to its own class - # and returns the dictionary which contains the class name and parameters. - def compute_cls(self, x): - cls = {} - # Appending each data point to its proper class - for j in xrange(self.datanum): - class_label = self.get_class_label(self.lbl[j]) - if class_label not in cls: - cls[class_label] = [] - cls[class_label].append(x[j]) - return cls - - # This function computes mean of each class. The mean is calculated through each dimension - def compute_Mi(self, cls): - M_i = np.zeros((self.classnum, self.dim)) - for i in cls: - # Mean of each class - M_i[i] = np.mean(cls[i], axis=0) - return M_i - - # Adding data points as tuple to the dictionary so that we can access indices - def compute_indices(self, x): - data_idx = {} - for j in xrange(self.datanum): - class_label = self.get_class_label(self.lbl[j]) - if class_label not in data_idx: - data_idx[class_label] = [] - t = (j, x[j]) - data_idx[class_label].append(t) - return data_idx - - # Adding indices to the list so we can access whole the indices - def compute_listIndices(self, data_idx): - lst_idx = [] - lst_idx_all = [] - for i in data_idx: - if len(lst_idx) == 0: - pass - #Do nothing, because it is the first time list is created so is empty - else: - lst_idx = [] - # Here we put indices of each class in to the list called lst_idx_all - for m in xrange(len(data_idx[i])): - lst_idx.append(data_idx[i][m][0]) - lst_idx_all.append(lst_idx) - return lst_idx_all - - # This function calculates between classes variances - def compute_Sb(self, cls, M_i, M_0): - Sb = np.zeros((self.dim, self.dim)) - for i in cls: - B = (M_i[i] - M_0).reshape(self.dim, 1) - B_trans = B.transpose() - Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans) - return Sb - - # This function calculates within classes variances - def compute_Sw(self, cls, M_i): - Sw = np.zeros((self.dim, self.dim)) - for i in cls: - N_i = float(len(cls[i])) - W_WT = np.zeros((self.dim, self.dim)) - for xk in cls[i]: - W = (xk - M_i[i]) - W_WT += np.outer(W, W) - Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT) - return Sw - - # Calculating beta and Bi for Sb - def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all): - # import pdb - # pdb.set_trace() - B_i = np.zeros((self.classnum, self.dim)) - Sig_beta_B_i_all = np.zeros((self.datanum, self.dim)) - for i in data_idx: - # pdb.set_trace() - # Calculating Bi - B_i[i] = (M_i[i] - M_0).reshape(1, self.dim) - for k in xrange(self.datanum): - for i in data_idx: - N_i = float(len(data_idx[i])) - if k in lst_idx_all[i]: - beta = (float(1) / N_i) - (float(1) / self.datanum) - Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i]) - else: - beta = -(float(1) / self.datanum) - Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i]) - Sig_beta_B_i_all = Sig_beta_B_i_all.transpose() - return Sig_beta_B_i_all - - - # Calculating W_j s separately so we can access all the W_j s anytime - def compute_wj(self, data_idx, M_i): - W_i = np.zeros((self.datanum, self.dim)) - for i in data_idx: - N_i = float(len(data_idx[i])) - for tpl in data_idx[i]: - xj = tpl[1] - j = tpl[0] - W_i[j] = (xj - M_i[i]) - return W_i - - # Calculating alpha and Wj for Sw - def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i): - Sig_alpha_W_i = np.zeros((self.datanum, self.dim)) - for i in data_idx: - N_i = float(len(data_idx[i])) - for tpl in data_idx[i]: - k = tpl[0] - for j in lst_idx_all[i]: - if k == j: - alpha = 1 - (float(1) / N_i) - Sig_alpha_W_i[k] += (alpha * W_i[j]) - else: - alpha = 0 - (float(1) / N_i) - Sig_alpha_W_i[k] += (alpha * W_i[j]) - Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i) - return Sig_alpha_W_i - - # This function calculates log of our prior + + def _compute_SbSw(self,X): + M_0 = X.mean(axis=0) + Ms = np.vstack([X[idx].mean(axis=0) for idx in self.cls_idx]) + + tmp = Ms - M_0 + Sb = np.dot(tmp.T,self.cls_ratio[:,None]*tmp) + Sw = np.sum([tdot((X[idx]-Ms[i]).T) for i,idx in enumerate(self.cls_idx)],axis=0)/self.datanum + return Sb,Sw + +# def _compute(self,X): +# X = X.reshape(self.x_shape) +# +# M_0 = X.mean(axis=0) +# Ms = np.vstack([X[idx].mean(axis=0) for idx in self.cls_idx]) +# print Ms +# +# dMs = Ms - M_0 +# dX = X.copy() +# for i,idx in enumerate(self.cls_idx): +# dX[idx] = X[idx]-Ms[i] +# +# Sb = np.dot(dMs.T,self.cls_ratio[:,None]*dMs) +# # Sb = np.identity(self.x_shape[1]) +# # print Sb +# Sw = np.sum([tdot(dX[idx].T) for idx in self.cls_idx],axis=0)/self.datanum +# +# Swinv,Lw,_,_ = pdinv(Sw+self.jit*np.identity(self.x_shape[1])) +# LwinvSbLwinvT = backsub_both_sides(Lw,Sb,transpose='right') +# # lnpdf = -1./(self.sigma2*np.trace(LwinvSbLwinvT)) +# lnpdf = np.trace(LwinvSbLwinvT)/self.sigma2 +# +# SwinvSbSwinv = backsub_both_sides(Lw,LwinvSbLwinvT,transpose='left') +# +# dX = -np.dot(dX,SwinvSbSwinv) +# dMs = np.dot(dMs,Swinv) +# for i,idx in enumerate(self.cls_idx): +# dX[idx] += dMs[i] +# +# # dX *= 2.*lnpdf*lnpdf*self.sigma2/self.datanum +# dX *= 2./self.sigma2/self.datanum +# return lnpdf, dX + + def _compute(self,X): + X = X.reshape(self.x_shape) + + M_0 = X.mean(axis=0) + Ms = np.vstack([X[idx].mean(axis=0) for idx in self.cls_idx]) + + dMs = Ms - M_0 + dX = X.copy() + for i,idx in enumerate(self.cls_idx): + dX[idx] = X[idx]-Ms[i] + + Sb = np.dot(dMs.T,self.cls_ratio[:,None]*dMs) + Sw = np.sum([tdot(dX[idx].T) for idx in self.cls_idx],axis=0)/self.datanum + + Sbinv,Lb,_,_ = pdinv(Sb+self.jit*np.identity(self.x_shape[1])) + LbinvSwLbinvT = backsub_both_sides(Lb,Sw,transpose='right') + lnpdf = -np.trace(LbinvSwLbinvT)/self.sigma2 + + SbinvSwSbinv = backsub_both_sides(Lb,LbinvSwLbinvT,transpose='left') + + dX = np.dot(dX,Sbinv) + dMs = -np.dot(dMs,SbinvSwSbinv) + for i,idx in enumerate(self.cls_idx): + dX[idx] += dMs[i] + + dX *= -2./self.sigma2/self.datanum + + return lnpdf, dX + def lnpdf(self, x): - x = x.reshape(self.x_shape) - cls = self.compute_cls(x) - M_0 = np.mean(x, axis=0) - M_i = self.compute_Mi(cls) - Sb = self.compute_Sb(cls, M_i, M_0) - Sw = self.compute_Sw(cls, M_i) - # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) - #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] - return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) + lnpdf,_ = self._compute(x) + return lnpdf - # This function calculates derivative of the log of prior function def lnpdf_grad(self, x): - x = x.reshape(self.x_shape) - cls = self.compute_cls(x) - M_0 = np.mean(x, axis=0) - M_i = self.compute_Mi(cls) - Sb = self.compute_Sb(cls, M_i, M_0) - Sw = self.compute_Sw(cls, M_i) - data_idx = self.compute_indices(x) - lst_idx_all = self.compute_listIndices(data_idx) - Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all) - W_i = self.compute_wj(data_idx, M_i) - Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i) - - # Calculating inverse of Sb and its transpose and minus - # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) - # Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] - Sb_inv_N_trans = np.transpose(Sb_inv_N) - Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans - Sw_trans = np.transpose(Sw) - - # Calculating DJ/DXk - DJ_Dxk = 2 * ( - Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot( - Sig_alpha_W_i)) - # Calculating derivative of the log of the prior - DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk) - return DPx_Dx.T - - # def frb(self, x): - # from functools import partial - # from GPy.models import GradientChecker - # f = partial(self.lnpdf) - # df = partial(self.lnpdf_grad) - # grad = GradientChecker(f, df, x, 'X') - # grad.checkgrad(verbose=1) + _, dX = self._compute(x) + return dX.flat def rvs(self, n): return np.random.rand(n) # A WRONG implementation @@ -630,3 +525,66 @@ class DGPLVM(Prior): def __str__(self): return 'DGPLVM_prior' + +class Dis_prior(Prior): + """ + Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel. + + :param sigma2: constant + + .. Note:: DGPLVM for Classification paper implementation + + """ + domain = _REAL + + def __init__(self, sigma2, label, x_shape, jit=0.): + self.sigma2 = sigma2 + self.labels = np.unique(label) + self.cls_idx = [np.where(label==l)[0] for l in self.labels] + self.classnum = self.labels.shape[0] + self.datanum = x_shape[0] + self.x_shape = x_shape + self.cls_ratio = np.array([float(len(idx))/self.datanum for idx in self.cls_idx]) + self.jit = jit +# self.lengthscale = lengthscale + self.lengthscale = np.ones(x_shape[1]) + + def _compute(self,X): + X = X.reshape(self.x_shape)/self.lengthscale + + Ms = np.vstack([X[idx].mean(axis=0) for idx in self.cls_idx]) + + dX = X.copy() + for i,idx in enumerate(self.cls_idx): + dX[idx] = X[idx]-Ms[i] + + dMs = (Ms[None,:,:]-Ms[:,None,:]) + dMs_c = dMs.sum(axis=0) + Ms_num = (self.classnum-1)*self.classnum/2 + + nom = np.square(dX).sum()/self.datanum + denom = np.square(dMs).sum()/(2*Ms_num) + + lnpdf = -nom/(denom*self.sigma2) + + dX *= -1./(denom*self.datanum) + for i,idx in enumerate(self.cls_idx): + dX[idx] += nom/(denom*denom*Ms_num*len(idx))*dMs_c[i] + + dX *= 2./(self.sigma2*self.lengthscale) + + return lnpdf, dX + + def lnpdf(self, x): + lnpdf,_ = self._compute(x) + return lnpdf + + def lnpdf_grad(self, x): + _, dX = self._compute(x) + return dX.flat + + def rvs(self, n): + return np.random.rand(n) # A WRONG implementation + + def __str__(self): + return 'DGPLVM_prior'