From e42937ccaf218209ea60d483fe86d2e78e9baba0 Mon Sep 17 00:00:00 2001 From: frb-yousefi Date: Thu, 12 Mar 2015 11:01:55 +0000 Subject: [PATCH 1/6] Descrimative BGPLVM prior with lambda added --- GPy/core/parameterization/priors.py | 298 ++++++++++++++++++++++++++++ 1 file changed, 298 insertions(+) diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py index 20a78691..93784d08 100644 --- a/GPy/core/parameterization/priors.py +++ b/GPy/core/parameterization/priors.py @@ -712,6 +712,304 @@ class DGPLVM(Prior): return 'DGPLVM_prior_Raq' +# ****************************************** + +class DGPLVM_Lamda(Prior): + """ + Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel. + + :param sigma2: constant + + .. Note:: DGPLVM for Classification paper implementation + + """ + domain = _REAL + # _instances = [] + # def __new__(cls, mu, sigma): # Singleton: + # if cls._instances: + # cls._instances[:] = [instance for instance in cls._instances if instance()] + # for instance in cls._instances: + # if instance().mu == mu and instance().sigma == sigma: + # return instance() + # o = super(Prior, cls).__new__(cls, mu, sigma) + # cls._instances.append(weakref.ref(o)) + # return cls._instances[-1]() + + def __init__(self, sigma2, lbl, x_shape, lamda): + self.sigma2 = sigma2 + # self.x = x + self.lbl = lbl + self.lamda = lamda + self.classnum = lbl.shape[1] + self.datanum = lbl.shape[0] + self.x_shape = x_shape + self.dim = x_shape[1] + #self.lamda = Param('lamda', np.diag(lamda)) + #self.link_parameter(self.lamda) + + def get_class_label(self, y): + for idx, v in enumerate(y): + if v == 1: + return idx + return -1 + + # This function assigns each data point to its own class + # and returns the dictionary which contains the class name and parameters. + def compute_cls(self, x): + cls = {} + # Appending each data point to its proper class + for j in xrange(self.datanum): + class_label = self.get_class_label(self.lbl[j]) + if class_label not in cls: + cls[class_label] = [] + cls[class_label].append(x[j]) + return cls + + # This function computes mean of each class. The mean is calculated through each dimension + def compute_Mi(self, cls): + M_i = np.zeros((self.classnum, self.dim)) + for i in cls: + # Mean of each class + class_i = cls[i] + M_i[i] = np.mean(class_i, axis=0) + return M_i + + # Adding data points as tuple to the dictionary so that we can access indices + def compute_indices(self, x): + data_idx = {} + for j in xrange(self.datanum): + class_label = self.get_class_label(self.lbl[j]) + if class_label not in data_idx: + data_idx[class_label] = [] + t = (j, x[j]) + data_idx[class_label].append(t) + return data_idx + + # Adding indices to the list so we can access whole the indices + def compute_listIndices(self, data_idx): + lst_idx = [] + lst_idx_all = [] + for i in data_idx: + if len(lst_idx) == 0: + pass + #Do nothing, because it is the first time list is created so is empty + else: + lst_idx = [] + # Here we put indices of each class in to the list called lst_idx_all + for m in xrange(len(data_idx[i])): + lst_idx.append(data_idx[i][m][0]) + lst_idx_all.append(lst_idx) + return lst_idx_all + + # This function calculates between classes variances + def compute_Sb(self, cls, M_i, M_0): + Sb = np.zeros((self.dim, self.dim)) + for i in cls: + B = (M_i[i] - M_0).reshape(self.dim, 1) + B_trans = B.transpose() + Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans) + return Sb + + # This function calculates within classes variances + def compute_Sw(self, cls, M_i): + Sw = np.zeros((self.dim, self.dim)) + for i in cls: + N_i = float(len(cls[i])) + W_WT = np.zeros((self.dim, self.dim)) + for xk in cls[i]: + W = (xk - M_i[i]) + W_WT += np.outer(W, W) + Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT) + return Sw + + # Calculating beta and Bi for Sb + def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all): + import pdb + # pdb.set_trace() + B_i = np.zeros((self.classnum, self.dim)) + Sig_beta_B_i_all = np.zeros((self.datanum, self.dim)) + for i in data_idx: + # pdb.set_trace() + # Calculating Bi + B_i[i] = (M_i[i] - M_0).reshape(1, self.dim) + for k in xrange(self.datanum): + for i in data_idx: + N_i = float(len(data_idx[i])) + if k in lst_idx_all[i]: + beta = (float(1) / N_i) - (float(1) / self.datanum) + Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i]) + else: + beta = -(float(1) / self.datanum) + Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i]) + Sig_beta_B_i_all = Sig_beta_B_i_all.transpose() + return Sig_beta_B_i_all + + + # Calculating W_j s separately so we can access all the W_j s anytime + def compute_wj(self, data_idx, M_i): + W_i = np.zeros((self.datanum, self.dim)) + for i in data_idx: + N_i = float(len(data_idx[i])) + for tpl in data_idx[i]: + xj = tpl[1] + j = tpl[0] + W_i[j] = (xj - M_i[i]) + return W_i + + # Calculating alpha and Wj for Sw + def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i): + Sig_alpha_W_i = np.zeros((self.datanum, self.dim)) + for i in data_idx: + N_i = float(len(data_idx[i])) + for tpl in data_idx[i]: + k = tpl[0] + for j in lst_idx_all[i]: + if k == j: + alpha = 1 - (float(1) / N_i) + Sig_alpha_W_i[k] += (alpha * W_i[j]) + else: + alpha = 0 - (float(1) / N_i) + Sig_alpha_W_i[k] += (alpha * W_i[j]) + Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i) + return Sig_alpha_W_i + + # This function calculates log of our prior + def lnpdf(self, x): + #xprime = x.dot(np.diagflat(self.lambda)) + x = x.reshape(self.x_shape) + cls = self.compute_cls(x) + M_0 = np.mean(x, axis=0) + M_i = self.compute_Mi(cls) + Sb = self.compute_Sb(cls, M_i, M_0) + Sw = self.compute_Sw(cls, M_i) + # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) + #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] + return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) + + # This function calculates derivative of the log of prior function + def lnpdf_grad(self, x): + #xprime = x.dot(np.diagflat(self.lambda)) + # pdb.set_trace() + #print 'before:', x.shape + x = x.reshape(self.x_shape) + #print 'after:', x.shape + cls = self.compute_cls(x) + M_0 = np.mean(x, axis=0) + M_i = self.compute_Mi(cls) + # print M_i.shape + Sb = self.compute_Sb(cls, M_i, M_0) + # print Sb.shape + Sw = self.compute_Sw(cls, M_i) + # print Sw.shape + data_idx = self.compute_indices(x) + lst_idx_all = self.compute_listIndices(data_idx) + Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all) + W_i = self.compute_wj(data_idx, M_i) + Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i) + + # Calculating inverse of Sb and its transpose and minus + # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) + #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] + Sb_inv_N_trans = np.transpose(Sb_inv_N) + Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans + Sw_trans = np.transpose(Sw) + + # Calculating DJ/DXk + # pdb.set_trace() + DJ_Dxk = 2 * ( + Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot( + Sig_alpha_W_i)) + #print 'DJ_Dxk:', DJ_Dxk.shape + # Calculating derivative of the log of the prior + DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk) + #print 'x.shape:', x.shape + #print 'DPx_Dx.shape:', DPx_Dx.shape + + + # DPxprim_Dlamda = DPx_Dx.dot(x) + # #DPxprim_Dlamda = (x.T).dot(DPx_Dx.T) + # # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) + # DPxprim_Dlamda = DPxprim_Dlamda.T + # print 'DPxprim_Dlamda:', DPxprim_Dlamda.shape + # print DPxprim_Dlamda + # return DPxprim_Dlamda + + + DPxprim_Dx = self.lamda.dot(DPx_Dx) + print 'before :DPxprim_Dx:', DPxprim_Dx.shape + # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) + DPxprim_Dx = DPxprim_Dx.T + print 'after: DPxprim_Dx:', DPxprim_Dx.shape + #return DPxprim_Dx + + DPxprim_Dlamda = DPx_Dx.dot(x) + #DPxprim_Dlamda = (x.T).dot(DPx_Dx.T) + # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) + DPxprim_Dlamda = DPxprim_Dlamda.T + print 'DPxprim_Dlamda:', DPxprim_Dlamda.shape + print DPxprim_Dlamda + c = DPxprim_Dx.dot(DPxprim_Dlamda) + print 'c.shape:', c.shape + + #self.lamda.gradient = np.diag(DPxprim_Dlamda) + return c + + + # def lnpdf_grad(self, x): + # x = x.reshape(self.x_shape) + # cls = self.compute_cls(x) + # M_0 = np.mean(x, axis=0) + # M_i = self.compute_Mi(cls) + # Sb = self.compute_Sb(cls, M_i, M_0) + # Sw = self.compute_Sw(cls, M_i) + # data_idx = self.compute_indices(x) + # lst_idx_all = self.compute_listIndices(data_idx) + # Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all) + # W_i = self.compute_wj(data_idx, M_i) + # Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i) + + # # Calculating inverse of Sb and its transpose and minus + # Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] + # Sb_inv_N_trans = np.transpose(Sb_inv_N) + # Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans + # Sw_trans = np.transpose(Sw) + + # # Calculating DJ/DXk + # DJ_Dxk = 2 * ( + # Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot( + # Sig_alpha_W_i)) + # print 'DJ_Dxk:', DJ_Dxk.shape + # # Calculating derivative of the log of the prior + # DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk) + # print 'x.shape:', x.shape + # print 'DPx_Dx.shape:', DPx_Dx.shape + # DPxprim_Dx = self.lamda.dot(DPx_Dx) + # print 'before :DPxprim_Dx:', DPxprim_Dx.shape + # # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) + # DPxprim_Dx = DPxprim_Dx.T + # print 'after: DPxprim_Dx:', DPxprim_Dx.shape + # return DPxprim_Dx + + + # def frb(self, x): + # from functools import partial + # from GPy.models import GradientChecker + # f = partial(self.lnpdf) + # df = partial(self.lnpdf_grad) + # grad = GradientChecker(f, df, x, 'X') + # grad.checkgrad(verbose=1) + + def rvs(self, n): + return np.random.rand(n) # A WRONG implementation + + def __str__(self): + return 'DGPLVM_prior_Raq_Lamda' + +# ****************************************** class DGPLVM_T(Prior): """ From e7650c8a90de31e3d4877fcbfbc217a9b1202ebf Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Mon, 20 Apr 2015 16:02:59 +0200 Subject: [PATCH 2/6] [sparse gp] memory overflow with big data, iterating over dimensions now --- GPy/core/sparse_gp.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 35644bfe..624a8f9c 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -10,6 +10,7 @@ from .parameterization.variational import VariationalPosterior, NormalPosterior from ..util.linalg import mdot import logging +import itertools logger = logging.getLogger("sparse gp") class SparseGP(GP): @@ -135,7 +136,13 @@ class SparseGP(GP): var = var else: Kxx = kern.Kdiag(Xnew) - var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T + if self.posterior.woodbury_inv.ndim == 2: + var = Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0) + elif self.posterior.woodbury_inv.ndim == 3: + var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2])) + for i in range(var.shape[1]): + var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0))) + var = var #add in the mean function if self.mean_function is not None: mu += self.mean_function.f(Xnew) From 440d7b64786f6beed299adde5812d66aec662124 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Mon, 20 Apr 2015 16:03:39 +0200 Subject: [PATCH 3/6] [basis funcs] linear slope identifiability higher, symmetry plus true linear effect --- GPy/kern/_src/basis_funcs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/kern/_src/basis_funcs.py b/GPy/kern/_src/basis_funcs.py index b6a95354..1b300661 100644 --- a/GPy/kern/_src/basis_funcs.py +++ b/GPy/kern/_src/basis_funcs.py @@ -81,7 +81,7 @@ class LinearSlopeBasisFuncKernel(BasisFuncKernel): def phi(self, X): phi = np.where(X < self.start, self.start, X) phi = np.where(phi > self.stop, self.stop, phi) - return ((phi-self.start)/(self.stop-self.start))-.5 + return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1. return self.concatenate_offset(phi) # ((phi-self.start)/(self.stop-self.start))-.5 class ChangePointBasisFuncKernel(BasisFuncKernel): From 17fa8a0ada056d87e36479f52af650a435301e95 Mon Sep 17 00:00:00 2001 From: frb-yousefi Date: Mon, 20 Apr 2015 15:51:28 +0100 Subject: [PATCH 4/6] prior with Lambda --- GPy/core/parameterization/priors.py | 113 +++++++++------------------- GPy/models/__init__.py | 1 + GPy/models/dpgplvm.py | 19 +++++ 3 files changed, 55 insertions(+), 78 deletions(-) create mode 100644 GPy/models/dpgplvm.py diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py index 93784d08..4a6b93e3 100644 --- a/GPy/core/parameterization/priors.py +++ b/GPy/core/parameterization/priors.py @@ -714,7 +714,9 @@ class DGPLVM(Prior): # ****************************************** -class DGPLVM_Lamda(Prior): +from parameterized import Parameterized +from .. import Param +class DGPLVM_Lamda(Prior, Parameterized): """ Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel. @@ -735,7 +737,8 @@ class DGPLVM_Lamda(Prior): # cls._instances.append(weakref.ref(o)) # return cls._instances[-1]() - def __init__(self, sigma2, lbl, x_shape, lamda): + def __init__(self, sigma2, lbl, x_shape, lamda, name='DP_prior'): + super(DGPLVM_Lamda, self).__init__(name=name) self.sigma2 = sigma2 # self.x = x self.lbl = lbl @@ -744,8 +747,8 @@ class DGPLVM_Lamda(Prior): self.datanum = lbl.shape[0] self.x_shape = x_shape self.dim = x_shape[1] - #self.lamda = Param('lamda', np.diag(lamda)) - #self.link_parameter(self.lamda) + self.lamda = Param('lamda', np.diag(lamda)) + self.link_parameter(self.lamda) def get_class_label(self, y): for idx, v in enumerate(y): @@ -875,8 +878,14 @@ class DGPLVM_Lamda(Prior): # This function calculates log of our prior def lnpdf(self, x): - #xprime = x.dot(np.diagflat(self.lambda)) x = x.reshape(self.x_shape) + + #!!!!!!!!!!!!!!!!!!!!!!!!!!! + #self.lamda.values[:] = self.lamda.values/self.lamda.values.sum() + + xprime = x.dot(np.diagflat(self.lamda)) + x = xprime + # print x cls = self.compute_cls(x) M_0 = np.mean(x, axis=0) M_i = self.compute_Mi(cls) @@ -890,19 +899,15 @@ class DGPLVM_Lamda(Prior): # This function calculates derivative of the log of prior function def lnpdf_grad(self, x): - #xprime = x.dot(np.diagflat(self.lambda)) - # pdb.set_trace() - #print 'before:', x.shape x = x.reshape(self.x_shape) - #print 'after:', x.shape + xprime = x.dot(np.diagflat(self.lamda)) + x = xprime + # print x cls = self.compute_cls(x) M_0 = np.mean(x, axis=0) M_i = self.compute_Mi(cls) - # print M_i.shape Sb = self.compute_Sb(cls, M_i, M_0) - # print Sb.shape Sw = self.compute_Sw(cls, M_i) - # print Sw.shape data_idx = self.compute_indices(x) lst_idx_all = self.compute_listIndices(data_idx) Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all) @@ -919,80 +924,25 @@ class DGPLVM_Lamda(Prior): Sw_trans = np.transpose(Sw) # Calculating DJ/DXk - # pdb.set_trace() DJ_Dxk = 2 * ( Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot( Sig_alpha_W_i)) - #print 'DJ_Dxk:', DJ_Dxk.shape # Calculating derivative of the log of the prior DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk) - #print 'x.shape:', x.shape - #print 'DPx_Dx.shape:', DPx_Dx.shape + DPxprim_Dx = np.diagflat(self.lamda).dot(DPx_Dx) - # DPxprim_Dlamda = DPx_Dx.dot(x) - # #DPxprim_Dlamda = (x.T).dot(DPx_Dx.T) - # # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) - # DPxprim_Dlamda = DPxprim_Dlamda.T - # print 'DPxprim_Dlamda:', DPxprim_Dlamda.shape - # print DPxprim_Dlamda - # return DPxprim_Dlamda - - - DPxprim_Dx = self.lamda.dot(DPx_Dx) - print 'before :DPxprim_Dx:', DPxprim_Dx.shape # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) DPxprim_Dx = DPxprim_Dx.T - print 'after: DPxprim_Dx:', DPxprim_Dx.shape - #return DPxprim_Dx DPxprim_Dlamda = DPx_Dx.dot(x) - #DPxprim_Dlamda = (x.T).dot(DPx_Dx.T) + # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) DPxprim_Dlamda = DPxprim_Dlamda.T - print 'DPxprim_Dlamda:', DPxprim_Dlamda.shape - print DPxprim_Dlamda - c = DPxprim_Dx.dot(DPxprim_Dlamda) - print 'c.shape:', c.shape - #self.lamda.gradient = np.diag(DPxprim_Dlamda) - return c - - - # def lnpdf_grad(self, x): - # x = x.reshape(self.x_shape) - # cls = self.compute_cls(x) - # M_0 = np.mean(x, axis=0) - # M_i = self.compute_Mi(cls) - # Sb = self.compute_Sb(cls, M_i, M_0) - # Sw = self.compute_Sw(cls, M_i) - # data_idx = self.compute_indices(x) - # lst_idx_all = self.compute_listIndices(data_idx) - # Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all) - # W_i = self.compute_wj(data_idx, M_i) - # Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i) - - # # Calculating inverse of Sb and its transpose and minus - # Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] - # Sb_inv_N_trans = np.transpose(Sb_inv_N) - # Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans - # Sw_trans = np.transpose(Sw) - - # # Calculating DJ/DXk - # DJ_Dxk = 2 * ( - # Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot( - # Sig_alpha_W_i)) - # print 'DJ_Dxk:', DJ_Dxk.shape - # # Calculating derivative of the log of the prior - # DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk) - # print 'x.shape:', x.shape - # print 'DPx_Dx.shape:', DPx_Dx.shape - # DPxprim_Dx = self.lamda.dot(DPx_Dx) - # print 'before :DPxprim_Dx:', DPxprim_Dx.shape - # # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) - # DPxprim_Dx = DPxprim_Dx.T - # print 'after: DPxprim_Dx:', DPxprim_Dx.shape - # return DPxprim_Dx + self.lamda.gradient = np.diag(DPxprim_Dlamda) + # print DPxprim_Dx + return DPxprim_Dx # def frb(self, x): @@ -1062,11 +1012,12 @@ class DGPLVM_T(Prior): return cls # This function computes mean of each class. The mean is calculated through each dimension - def compute_Mi(self, cls, vec): + def compute_Mi(self, cls): M_i = np.zeros((self.classnum, self.dim)) for i in cls: # Mean of each class - class_i = np.multiply(cls[i],vec) + # class_i = np.multiply(cls[i],vec) + class_i = cls[i] M_i[i] = np.mean(class_i, axis=0) return M_i @@ -1172,9 +1123,12 @@ class DGPLVM_T(Prior): # This function calculates log of our prior def lnpdf(self, x): x = x.reshape(self.x_shape) + xprim = x.dot(self.vec) + x = xprim + # print x cls = self.compute_cls(x) M_0 = np.mean(x, axis=0) - M_i = self.compute_Mi(cls, self.vec) + M_i = self.compute_Mi(cls) Sb = self.compute_Sb(cls, M_i, M_0) Sw = self.compute_Sw(cls, M_i) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) @@ -1186,10 +1140,13 @@ class DGPLVM_T(Prior): # This function calculates derivative of the log of prior function def lnpdf_grad(self, x): - x = x.reshape(self.x_shape) - cls = self.compute_cls(x) + x = x.reshape(self.x_shape) + xprim = x.dot(self.vec) + x = xprim + # print x + cls = self.compute_cls(x) M_0 = np.mean(x, axis=0) - M_i = self.compute_Mi(cls, self.vec) + M_i = self.compute_Mi(cls) Sb = self.compute_Sb(cls, M_i, M_0) Sw = self.compute_Sw(cls, M_i) data_idx = self.compute_indices(x) diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index c6abb5de..3d1d8a9c 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -21,3 +21,4 @@ from gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression from gp_var_gauss import GPVariationalGaussianApproximation from one_vs_all_classification import OneVsAllClassification from one_vs_all_sparse_classification import OneVsAllSparseClassification +from dpgplvm import DPBayesianGPLVM diff --git a/GPy/models/dpgplvm.py b/GPy/models/dpgplvm.py new file mode 100644 index 00000000..1c5dccd0 --- /dev/null +++ b/GPy/models/dpgplvm.py @@ -0,0 +1,19 @@ +# Copyright (c) 2015 the GPy Austhors (see AUTHORS.txt) +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from .. import kern +from bayesian_gplvm import BayesianGPLVM +from ..core.parameterization.variational import NormalPosterior, NormalPrior + +class DPBayesianGPLVM(BayesianGPLVM): + """ + Bayesian Gaussian Process Latent Variable Model with Descriminative prior + """ + def __init__(self, Y, input_dim, X_prior, X=None, X_variance=None, init='PCA', num_inducing=10, + Z=None, kernel=None, inference_method=None, likelihood=None, + name='bayesian gplvm', mpi_comm=None, normalizer=None, + missing_data=False, stochastic=False, batchsize=1): + super(DPBayesianGPLVM,self).__init__(Y=Y, input_dim=input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, mpi_comm=mpi_comm, normalizer=normalizer, missing_data=missing_data, stochastic=stochastic, batchsize=batchsize, name='dp bayesian gplvm') + self.X.mean.set_prior(X_prior) + self.link_parameter(X_prior) From d428465e08eb62077e7f470f88413fd0fb04a478 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 21 Apr 2015 11:18:31 +0200 Subject: [PATCH 5/6] [basisfuncs] updated kernel to better reflect linear trends and added ARD support --- GPy/kern/_src/basis_funcs.py | 77 +++++++++++++++++++++++++++--------- GPy/kern/_src/periodic.py | 2 - 2 files changed, 58 insertions(+), 21 deletions(-) diff --git a/GPy/kern/_src/basis_funcs.py b/GPy/kern/_src/basis_funcs.py index 1b300661..af4a7188 100644 --- a/GPy/kern/_src/basis_funcs.py +++ b/GPy/kern/_src/basis_funcs.py @@ -5,33 +5,59 @@ from ...core.parameterization.param import Param from ...core.parameterization.transformations import Logexp import numpy as np from ...util.caching import Cache_this -from ...util.linalg import tdot +from ...util.linalg import tdot, mdot class BasisFuncKernel(Kern): - def __init__(self, input_dim, variance=1., active_dims=None, name='basis func kernel'): + def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'): """ Abstract superclass for kernels with explicit basis functions for use in GPy. This class does NOT automatically add an offset to the design matrix phi! """ super(BasisFuncKernel, self).__init__(input_dim, active_dims, name) + self.ARD = ARD + if self.ARD: + phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim))) + variance = variance * np.ones(phi_test.shape[1]) + else: + variance = np.array(variance) self.variance = Param('variance', variance, Logexp()) self.link_parameter(self.variance) + def parameters_changed(self): + self.alpha = np.sqrt(self.variance) + #self.beta = 1./self.variance + + @Cache_this(limit=3, ignore_args=()) def phi(self, X): - raise NotImplementedError('Overwrite this phi function, which maps the input X into the higher dimensional space and forms the design matrix Phi') + return self._phi(X) + + def _phi(self, X): + raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi') def K(self, X, X2=None): - return self.variance * self._K(X, X2) + return self._K(X, X2) def Kdiag(self, X, X2=None): - return self.variance * np.diag(self._K(X, X2)) + return np.diag(self._K(X, X2)) def update_gradients_full(self, dL_dK, X, X2=None): - self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) + if self.ARD: + phi1 = self.phi(X) + if X2 is None or X is X2: + self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi1) + else: + phi2 = self.phi(X2) + self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2) + else: + self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) def update_gradients_diag(self, dL_dKdiag, X): - self.variance.gradient = np.einsum('i,i', dL_dKdiag, self._K(X)) + if self.ARD: + phi1 = self.phi(X) + self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1) + else: + self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) def concatenate_offset(self, X): return np.c_[np.ones((X.shape[0], 1)), X] @@ -52,19 +78,19 @@ class BasisFuncKernel(Kern): posterior = self._highest_parent_.posterior except NameError: raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference") - phi = self.phi(X) - return self.variance * phi.T.dot(posterior.woodbury_vector), self.variance * (1 - self.variance * phi.T.dot(posterior.woodbury_inv.dot(phi))) + phi_alpha = self.phi(X) * self.variance + return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha)) @Cache_this(limit=3, ignore_args=()) def _K(self, X, X2): if X2 is None or X is X2: - phi = self.phi(X) + phi = self.phi(X) * self.alpha if phi.ndim != 2: phi = phi[:, None] return tdot(phi) else: - phi1 = self.phi(X) - phi2 = self.phi(X2) + phi1 = self.phi(X) * self.alpha + phi2 = self.phi(X2) * self.alpha if phi1.ndim != 2: phi1 = phi1[:, None] phi2 = phi2[:, None] @@ -72,30 +98,43 @@ class BasisFuncKernel(Kern): class LinearSlopeBasisFuncKernel(BasisFuncKernel): - def __init__(self, input_dim, start, stop, variance=1., active_dims=None, name='linear_segment'): - super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name) + def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'): + """ + A linear segment transformation. The segments start at start, \ + are then linear to stop and constant again. The segments are + normalized, so that they have exactly as much mass above + as below the origin. + + Start and stop can be tuples or lists of starts and stops. + Behaviour of start stop is as np.where(X self.stop, self.stop, phi) return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1. return self.concatenate_offset(phi) # ((phi-self.start)/(self.stop-self.start))-.5 class ChangePointBasisFuncKernel(BasisFuncKernel): - def __init__(self, input_dim, changepoint, variance=1., active_dims=None, name='changepoint'): - super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name) + def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'): self.changepoint = changepoint + super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name) @Cache_this(limit=3, ignore_args=()) - def phi(self, X): + def _phi(self, X): return self.concatenate_offset(np.where((X < self.changepoint), -1, 1)) class DomainKernel(LinearSlopeBasisFuncKernel): + def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'): + super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name) + @Cache_this(limit=3, ignore_args=()) - def phi(self, X): + def _phi(self, X): phi = np.where((X>self.start)*(X Date: Tue, 21 Apr 2015 13:33:09 +0200 Subject: [PATCH 6/6] [basisfunckern] gradients for non ard adjusted --- GPy/kern/_src/basis_funcs.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/GPy/kern/_src/basis_funcs.py b/GPy/kern/_src/basis_funcs.py index af4a7188..5225ff10 100644 --- a/GPy/kern/_src/basis_funcs.py +++ b/GPy/kern/_src/basis_funcs.py @@ -26,7 +26,7 @@ class BasisFuncKernel(Kern): def parameters_changed(self): self.alpha = np.sqrt(self.variance) - #self.beta = 1./self.variance + self.beta = 1./self.variance @Cache_this(limit=3, ignore_args=()) def phi(self, X): @@ -50,14 +50,14 @@ class BasisFuncKernel(Kern): phi2 = self.phi(X2) self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2) else: - self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) + self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) * self.beta def update_gradients_diag(self, dL_dKdiag, X): if self.ARD: phi1 = self.phi(X) self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1) else: - self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) + self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta def concatenate_offset(self, X): return np.c_[np.ones((X.shape[0], 1)), X] @@ -118,7 +118,6 @@ class LinearSlopeBasisFuncKernel(BasisFuncKernel): phi = np.where(X < self.start, self.start, X) phi = np.where(phi > self.stop, self.stop, phi) return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1. - return self.concatenate_offset(phi) # ((phi-self.start)/(self.stop-self.start))-.5 class ChangePointBasisFuncKernel(BasisFuncKernel): def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'): @@ -127,7 +126,7 @@ class ChangePointBasisFuncKernel(BasisFuncKernel): @Cache_this(limit=3, ignore_args=()) def _phi(self, X): - return self.concatenate_offset(np.where((X < self.changepoint), -1, 1)) + return np.where((X < self.changepoint), -1, 1) class DomainKernel(LinearSlopeBasisFuncKernel): def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'): @@ -135,6 +134,5 @@ class DomainKernel(LinearSlopeBasisFuncKernel): @Cache_this(limit=3, ignore_args=()) def _phi(self, X): - phi = np.where((X>self.start)*(Xself.start)*(X