diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py index c4dfbc2a..3550a8b5 100644 --- a/GPy/core/parameterization/priors.py +++ b/GPy/core/parameterization/priors.py @@ -13,14 +13,15 @@ import weakref class Prior(object): domain = None _instance = None + def __new__(cls, *args, **kwargs): if not cls._instance or cls._instance.__class__ is not cls: - newfunc = super(Prior, cls).__new__ - if newfunc is object.__new__: - cls._instance = newfunc(cls) - else: - cls._instance = newfunc(cls, *args, **kwargs) - return cls._instance + newfunc = super(Prior, cls).__new__ + if newfunc is object.__new__: + cls._instance = newfunc(cls) + else: + cls._instance = newfunc(cls, *args, **kwargs) + return cls._instance def pdf(self, x): return np.exp(self.lnpdf(x)) @@ -47,6 +48,7 @@ class Gaussian(Prior): .. Note:: Bishop 2006 notation is used throughout the code """ + domain = _REAL _instances = [] @@ -82,6 +84,7 @@ class Gaussian(Prior): def rvs(self, n): return np.random.randn(n) * self.sigma + self.mu + # def __getstate__(self): # return self.mu, self.sigma # @@ -91,6 +94,7 @@ class Gaussian(Prior): # self.sigma2 = np.square(self.sigma) # self.constant = -0.5 * np.log(2 * np.pi * self.sigma2) + class Uniform(Prior): _instances = [] @@ -132,6 +136,7 @@ class Uniform(Prior): def rvs(self, n): return np.random.uniform(self.lower, self.upper, size=n) + # def __getstate__(self): # return self.lower, self.upper # @@ -139,6 +144,7 @@ class Uniform(Prior): # self.lower = state[0] # self.upper = state[1] + class LogGaussian(Gaussian): """ Implementation of the univariate *log*-Gaussian probability function, coupled with random variables. @@ -149,6 +155,7 @@ class LogGaussian(Gaussian): .. Note:: Bishop 2006 notation is used throughout the code """ + domain = _POSITIVE _instances = [] @@ -160,7 +167,7 @@ class LogGaussian(Gaussian): return instance() newfunc = super(Prior, cls).__new__ if newfunc is object.__new__: - o = newfunc(cls) + o = newfunc(cls) else: o = newfunc(cls, mu, sigma) cls._instances.append(weakref.ref(o)) @@ -176,10 +183,14 @@ class LogGaussian(Gaussian): return "lnN({:.2g}, {:.2g})".format(self.mu, self.sigma) def lnpdf(self, x): - return self.constant - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 - np.log(x) + return ( + self.constant + - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 + - np.log(x) + ) def lnpdf_grad(self, x): - return -((np.log(x) - self.mu) / self.sigma2 + 1.) / x + return -((np.log(x) - self.mu) / self.sigma2 + 1.0) / x def rvs(self, n): return np.exp(np.random.randn(int(n)) * self.sigma + self.mu) @@ -195,16 +206,15 @@ class MultivariateGaussian(Prior): .. Note:: Bishop 2006 notation is used throughout the code """ + domain = _REAL _instances = [] def __new__(cls, mu=0, var=1): # Singleton: if cls._instances: - cls._instances[:] = [instance for instance in cls._instances if - instance()] + cls._instances[:] = [instance for instance in cls._instances if instance()] for instance in cls._instances: - if np.all(instance().mu == mu) and np.all( - instance().var == var): + if np.all(instance().mu == mu) and np.all(instance().var == var): return instance() newfunc = super(Prior, cls).__new__ if newfunc is object.__new__: @@ -217,16 +227,17 @@ class MultivariateGaussian(Prior): def __init__(self, mu, var): self.mu = np.array(mu).flatten() self.var = np.array(var) - assert len(self.var.shape) == 2, 'Covariance must be a matrix' - assert self.var.shape[0] == self.var.shape[1], \ - 'Covariance must be a square matrix' + assert len(self.var.shape) == 2, "Covariance must be a matrix" + assert ( + self.var.shape[0] == self.var.shape[1] + ), "Covariance must be a square matrix" assert self.var.shape[0] == self.mu.size self.input_dim = self.mu.size self.inv, _, self.hld, _ = pdinv(self.var) self.constant = -0.5 * (self.input_dim * np.log(2 * np.pi) + self.hld) def __str__(self): - return 'MultiN(' + str(self.mu) + ', ' + str(np.diag(self.var)) + ')' + return "MultiN(" + str(self.mu) + ", " + str(np.diag(self.var)) + ")" def summary(self): raise NotImplementedError @@ -243,7 +254,7 @@ class MultivariateGaussian(Prior): def lnpdf_grad(self, x): x = np.array(x).flatten() d = x - self.mu - return - np.dot(self.inv, d) + return -np.dot(self.inv, d) def rvs(self, n): return np.random.multivariate_normal(self.mu, self.var, n) @@ -262,14 +273,16 @@ class MultivariateGaussian(Prior): def __setstate__(self, state): self.mu = np.array(state[0]).flatten() self.var = state[1] - assert len(self.var.shape) == 2, 'Covariance must be a matrix' - assert self.var.shape[0] == self.var.shape[1], \ - 'Covariance must be a square matrix' + assert len(self.var.shape) == 2, "Covariance must be a matrix" + assert ( + self.var.shape[0] == self.var.shape[1] + ), "Covariance must be a square matrix" assert self.var.shape[0] == self.mu.size self.input_dim = self.mu.size self.inv, _, self.hld, _ = pdinv(self.var) self.constant = -0.5 * (self.input_dim * np.log(2 * np.pi) + self.hld) + def gamma_from_EV(E, V): warnings.warn("use Gamma.from_EV to create Gamma Prior", FutureWarning) return Gamma.from_EV(E, V) @@ -285,10 +298,11 @@ class Gamma(Prior): .. Note:: Bishop 2006 notation is used throughout the code """ + domain = _POSITIVE _instances = [] - def __new__(cls, a=1, b=.5): # Singleton: + def __new__(cls, a=1, b=0.5): # Singleton: if cls._instances: cls._instances[:] = [instance for instance in cls._instances if instance()] for instance in cls._instances: @@ -319,24 +333,29 @@ class Gamma(Prior): return "Ga({:.2g}, {:.2g})".format(self.a, self.b) def summary(self): - ret = {"E[x]": self.a / self.b, \ - "E[ln x]": digamma(self.a) - np.log(self.b), \ - "var[x]": self.a / self.b / self.b, \ - "Entropy": gammaln(self.a) - (self.a - 1.) * digamma(self.a) - np.log(self.b) + self.a} + ret = { + "E[x]": self.a / self.b, + "E[ln x]": digamma(self.a) - np.log(self.b), + "var[x]": self.a / self.b / self.b, + "Entropy": gammaln(self.a) + - (self.a - 1.0) * digamma(self.a) + - np.log(self.b) + + self.a, + } if self.a > 1: - ret['Mode'] = (self.a - 1.) / self.b + ret["Mode"] = (self.a - 1.0) / self.b else: - ret['mode'] = np.nan + ret["mode"] = np.nan return ret def lnpdf(self, x): return self.constant + (self.a - 1) * np.log(x) - self.b * x def lnpdf_grad(self, x): - return (self.a - 1.) / x - self.b + return (self.a - 1.0) / x - self.b def rvs(self, n): - return np.random.gamma(scale=1. / self.b, shape=self.a, size=n) + return np.random.gamma(scale=1.0 / self.b, shape=self.a, size=n) @staticmethod def from_EV(E, V): @@ -359,6 +378,7 @@ class Gamma(Prior): self._b = state[1] self.constant = -gammaln(self.a) + self.a * np.log(self.b) + class InverseGamma(Gamma): """ Implementation of the inverse-Gamma probability function, coupled with random variables. @@ -369,6 +389,7 @@ class InverseGamma(Gamma): .. Note:: Bishop 2006 notation is used throughout the code """ + domain = _POSITIVE _instances = [] @@ -386,10 +407,11 @@ class InverseGamma(Gamma): return self.constant - (self.a + 1) * np.log(x) - self.b / x def lnpdf_grad(self, x): - return -(self.a + 1.) / x + self.b / x ** 2 + return -(self.a + 1.0) / x + self.b / x**2 def rvs(self, n): - return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n) + return 1.0 / np.random.gamma(scale=1.0 / self.b, shape=self.a, size=n) + class DGPLVM_KFDA(Prior): """ @@ -403,6 +425,7 @@ class DGPLVM_KFDA(Prior): .. Note:: Surpassing Human-Level Face paper dgplvm implementation """ + domain = _REAL # _instances = [] # def __new__(cls, lambdaa, sigma2): # Singleton: @@ -459,8 +482,8 @@ class DGPLVM_KFDA(Prior): lst_ni = [] lst_ni1 = [] lst_ni2 = [] - f1 = (np.where(self.lbl[:, 0] == 1)[0]) - f2 = (np.where(self.lbl[:, 1] == 1)[0]) + f1 = np.where(self.lbl[:, 0] == 1)[0] + f2 = np.where(self.lbl[:, 1] == 1)[0] for idx in f1: lst_ni1.append(idx) for idx in f2: @@ -474,11 +497,11 @@ class DGPLVM_KFDA(Prior): count = 0 for N_i in lst_ni: if N_i == lst_ni[0]: - a[count:count + N_i] = (float(1) / N_i) * a[count] + a[count : count + N_i] = (float(1) / N_i) * a[count] count += N_i else: if N_i == lst_ni[1]: - a[count: count + N_i] = -(float(1) / N_i) * a[count] + a[count : count + N_i] = -(float(1) / N_i) * a[count] count += N_i return a @@ -486,8 +509,12 @@ class DGPLVM_KFDA(Prior): A = np.zeros((self.datanum, self.datanum)) idx = 0 for N_i in lst_ni: - B = float(1) / np.sqrt(N_i) * (np.eye(N_i) - ((float(1) / N_i) * np.ones((N_i, N_i)))) - A[idx:idx + N_i, idx:idx + N_i] = B + B = ( + float(1) + / np.sqrt(N_i) + * (np.eye(N_i) - ((float(1) / N_i) * np.ones((N_i, N_i)))) + ) + A[idx : idx + N_i, idx : idx + N_i] = B idx += N_i return A @@ -498,9 +525,11 @@ class DGPLVM_KFDA(Prior): a_trans = np.transpose(self.a) paran = self.lambdaa * np.eye(x.shape[0]) + self.A.dot(K).dot(self.A) inv_part = pdinv(paran)[0] - J = a_trans.dot(K).dot(self.a) - a_trans.dot(K).dot(self.A).dot(inv_part).dot(self.A).dot(K).dot(self.a) - J_star = (1. / self.lambdaa) * J - return (-1. / self.sigma2) * J_star + J = a_trans.dot(K).dot(self.a) - a_trans.dot(K).dot(self.A).dot(inv_part).dot( + self.A + ).dot(K).dot(self.a) + J_star = (1.0 / self.lambdaa) * J + return (-1.0 / self.sigma2) * J_star # Here gradient function def lnpdf_grad(self, x): @@ -511,15 +540,15 @@ class DGPLVM_KFDA(Prior): b = self.A.dot(inv_part).dot(self.A).dot(K).dot(self.a) a_Minus_b = self.a - b a_b_trans = np.transpose(a_Minus_b) - DJ_star_DK = (1. / self.lambdaa) * (a_Minus_b.dot(a_b_trans)) + DJ_star_DK = (1.0 / self.lambdaa) * (a_Minus_b.dot(a_b_trans)) DJ_star_DX = self.kern.gradients_X(DJ_star_DK, x) - return (-1. / self.sigma2) * DJ_star_DX + return (-1.0 / self.sigma2) * DJ_star_DX def rvs(self, n): return np.random.rand(n) # A WRONG implementation def __str__(self): - return 'DGPLVM_prior' + return "DGPLVM_prior" def __getstate___(self): return self.lbl, self.lambdaa, self.sigma2, self.kern, self.x_shape @@ -547,6 +576,7 @@ class DGPLVM(Prior): .. Note:: DGPLVM for Classification paper implementation """ + domain = _REAL def __new__(cls, sigma2, lbl, x_shape): @@ -606,7 +636,7 @@ class DGPLVM(Prior): for i in data_idx: if len(lst_idx) == 0: pass - #Do nothing, because it is the first time list is created so is empty + # Do nothing, because it is the first time list is created so is empty else: lst_idx = [] # Here we put indices of each class in to the list called lst_idx_all @@ -631,9 +661,9 @@ class DGPLVM(Prior): N_i = float(len(cls[i])) W_WT = np.zeros((self.dim, self.dim)) for xk in cls[i]: - W = (xk - M_i[i]) + W = xk - M_i[i] W_WT += np.outer(W, W) - Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT) + Sw += (N_i / self.datanum) * ((1.0 / N_i) * W_WT) return Sw # Calculating beta and Bi for Sb @@ -658,7 +688,6 @@ class DGPLVM(Prior): Sig_beta_B_i_all = Sig_beta_B_i_all.transpose() return Sig_beta_B_i_all - # Calculating W_j s separately so we can access all the W_j s anytime def compute_wj(self, data_idx, M_i): W_i = np.zeros((self.datanum, self.dim)) @@ -667,7 +696,7 @@ class DGPLVM(Prior): for tpl in data_idx[i]: xj = tpl[1] j = tpl[0] - W_i[j] = (xj - M_i[i]) + W_i[j] = xj - M_i[i] return W_i # Calculating alpha and Wj for Sw @@ -680,11 +709,11 @@ class DGPLVM(Prior): for j in lst_idx_all[i]: if k == j: alpha = 1 - (float(1) / N_i) - Sig_alpha_W_i[k] += (alpha * W_i[j]) + Sig_alpha_W_i[k] += alpha * W_i[j] else: alpha = 0 - (float(1) / N_i) - Sig_alpha_W_i[k] += (alpha * W_i[j]) - Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i) + Sig_alpha_W_i[k] += alpha * W_i[j] + Sig_alpha_W_i = (1.0 / self.datanum) * np.transpose(Sig_alpha_W_i) return Sig_alpha_W_i # This function calculates log of our prior @@ -696,9 +725,9 @@ class DGPLVM(Prior): Sb = self.compute_Sb(cls, M_i, M_0) Sw = self.compute_Sw(cls, M_i) # sb_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) - #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] - Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] + # Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + # Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0]) * 0.1)[0] return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) # This function calculates derivative of the log of prior function @@ -717,19 +746,20 @@ class DGPLVM(Prior): # Calculating inverse of Sb and its transpose and minus # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) - #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] - Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] + # Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + # Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0]) * 0.1)[0] Sb_inv_N_trans = np.transpose(Sb_inv_N) Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans Sw_trans = np.transpose(Sw) # Calculating DJ/DXk DJ_Dxk = 2 * ( - Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot( - Sig_alpha_W_i)) + Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + + Sb_inv_N_trans.dot(Sig_alpha_W_i) + ) # Calculating derivative of the log of the prior - DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk) + DPx_Dx = (-1 / self.sigma2) * DJ_Dxk return DPx_Dx.T # def frb(self, x): @@ -744,7 +774,7 @@ class DGPLVM(Prior): return np.random.rand(n) # A WRONG implementation def __str__(self): - return 'DGPLVM_prior_Raq' + return "DGPLVM_prior_Raq" # ****************************************** @@ -752,6 +782,7 @@ class DGPLVM(Prior): from . import Parameterized from . import Param + class DGPLVM_Lamda(Prior, Parameterized): """ Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel. @@ -761,6 +792,7 @@ class DGPLVM_Lamda(Prior, Parameterized): .. Note:: DGPLVM for Classification paper implementation """ + domain = _REAL # _instances = [] # def __new__(cls, mu, sigma): # Singleton: @@ -773,7 +805,7 @@ class DGPLVM_Lamda(Prior, Parameterized): # cls._instances.append(weakref.ref(o)) # return cls._instances[-1]() - def __init__(self, sigma2, lbl, x_shape, lamda, name='DP_prior'): + def __init__(self, sigma2, lbl, x_shape, lamda, name="DP_prior"): super(DGPLVM_Lamda, self).__init__(name=name) self.sigma2 = sigma2 # self.x = x @@ -783,7 +815,7 @@ class DGPLVM_Lamda(Prior, Parameterized): self.datanum = lbl.shape[0] self.x_shape = x_shape self.dim = x_shape[1] - self.lamda = Param('lamda', np.diag(lamda)) + self.lamda = Param("lamda", np.diag(lamda)) self.link_parameter(self.lamda) def get_class_label(self, y): @@ -831,7 +863,7 @@ class DGPLVM_Lamda(Prior, Parameterized): for i in data_idx: if len(lst_idx) == 0: pass - #Do nothing, because it is the first time list is created so is empty + # Do nothing, because it is the first time list is created so is empty else: lst_idx = [] # Here we put indices of each class in to the list called lst_idx_all @@ -856,9 +888,9 @@ class DGPLVM_Lamda(Prior, Parameterized): N_i = float(len(cls[i])) W_WT = np.zeros((self.dim, self.dim)) for xk in cls[i]: - W = (xk - M_i[i]) + W = xk - M_i[i] W_WT += np.outer(W, W) - Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT) + Sw += (N_i / self.datanum) * ((1.0 / N_i) * W_WT) return Sw # Calculating beta and Bi for Sb @@ -883,7 +915,6 @@ class DGPLVM_Lamda(Prior, Parameterized): Sig_beta_B_i_all = Sig_beta_B_i_all.transpose() return Sig_beta_B_i_all - # Calculating W_j s separately so we can access all the W_j s anytime def compute_wj(self, data_idx, M_i): W_i = np.zeros((self.datanum, self.dim)) @@ -892,7 +923,7 @@ class DGPLVM_Lamda(Prior, Parameterized): for tpl in data_idx[i]: xj = tpl[1] j = tpl[0] - W_i[j] = (xj - M_i[i]) + W_i[j] = xj - M_i[i] return W_i # Calculating alpha and Wj for Sw @@ -905,11 +936,11 @@ class DGPLVM_Lamda(Prior, Parameterized): for j in lst_idx_all[i]: if k == j: alpha = 1 - (float(1) / N_i) - Sig_alpha_W_i[k] += (alpha * W_i[j]) + Sig_alpha_W_i[k] += alpha * W_i[j] else: alpha = 0 - (float(1) / N_i) - Sig_alpha_W_i[k] += (alpha * W_i[j]) - Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i) + Sig_alpha_W_i[k] += alpha * W_i[j] + Sig_alpha_W_i = (1.0 / self.datanum) * np.transpose(Sig_alpha_W_i) return Sig_alpha_W_i # This function calculates log of our prior @@ -917,7 +948,7 @@ class DGPLVM_Lamda(Prior, Parameterized): x = x.reshape(self.x_shape) #!!!!!!!!!!!!!!!!!!!!!!!!!!! - #self.lamda.values[:] = self.lamda.values/self.lamda.values.sum() + # self.lamda.values[:] = self.lamda.values/self.lamda.values.sum() xprime = x.dot(np.diagflat(self.lamda)) x = xprime @@ -928,9 +959,9 @@ class DGPLVM_Lamda(Prior, Parameterized): Sb = self.compute_Sb(cls, M_i, M_0) Sw = self.compute_Sw(cls, M_i) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) - #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0] - Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0] + # Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + # Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0]) * 0.9)[0] return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) # This function calculates derivative of the log of prior function @@ -952,19 +983,20 @@ class DGPLVM_Lamda(Prior, Parameterized): # Calculating inverse of Sb and its transpose and minus # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) - #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0] - Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0] + # Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + # Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0]) * 0.9)[0] Sb_inv_N_trans = np.transpose(Sb_inv_N) Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans Sw_trans = np.transpose(Sw) # Calculating DJ/DXk DJ_Dxk = 2 * ( - Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot( - Sig_alpha_W_i)) + Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + + Sb_inv_N_trans.dot(Sig_alpha_W_i) + ) # Calculating derivative of the log of the prior - DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk) + DPx_Dx = (-1 / self.sigma2) * DJ_Dxk DPxprim_Dx = np.diagflat(self.lamda).dot(DPx_Dx) @@ -980,7 +1012,6 @@ class DGPLVM_Lamda(Prior, Parameterized): # print DPxprim_Dx return DPxprim_Dx - # def frb(self, x): # from functools import partial # from GPy.models import GradientChecker @@ -993,10 +1024,12 @@ class DGPLVM_Lamda(Prior, Parameterized): return np.random.rand(n) # A WRONG implementation def __str__(self): - return 'DGPLVM_prior_Raq_Lamda' + return "DGPLVM_prior_Raq_Lamda" + # ****************************************** + class DGPLVM_T(Prior): """ Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel. @@ -1006,6 +1039,7 @@ class DGPLVM_T(Prior): .. Note:: DGPLVM for Classification paper implementation """ + domain = _REAL # _instances = [] # def __new__(cls, mu, sigma): # Singleton: @@ -1028,7 +1062,6 @@ class DGPLVM_T(Prior): self.dim = x_shape[1] self.vec = vec - def get_class_label(self, y): for idx, v in enumerate(y): if v == 1: @@ -1075,7 +1108,7 @@ class DGPLVM_T(Prior): for i in data_idx: if len(lst_idx) == 0: pass - #Do nothing, because it is the first time list is created so is empty + # Do nothing, because it is the first time list is created so is empty else: lst_idx = [] # Here we put indices of each class in to the list called lst_idx_all @@ -1100,9 +1133,9 @@ class DGPLVM_T(Prior): N_i = float(len(cls[i])) W_WT = np.zeros((self.dim, self.dim)) for xk in cls[i]: - W = (xk - M_i[i]) + W = xk - M_i[i] W_WT += np.outer(W, W) - Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT) + Sw += (N_i / self.datanum) * ((1.0 / N_i) * W_WT) return Sw # Calculating beta and Bi for Sb @@ -1127,7 +1160,6 @@ class DGPLVM_T(Prior): Sig_beta_B_i_all = Sig_beta_B_i_all.transpose() return Sig_beta_B_i_all - # Calculating W_j s separately so we can access all the W_j s anytime def compute_wj(self, data_idx, M_i): W_i = np.zeros((self.datanum, self.dim)) @@ -1136,7 +1168,7 @@ class DGPLVM_T(Prior): for tpl in data_idx[i]: xj = tpl[1] j = tpl[0] - W_i[j] = (xj - M_i[i]) + W_i[j] = xj - M_i[i] return W_i # Calculating alpha and Wj for Sw @@ -1149,11 +1181,11 @@ class DGPLVM_T(Prior): for j in lst_idx_all[i]: if k == j: alpha = 1 - (float(1) / N_i) - Sig_alpha_W_i[k] += (alpha * W_i[j]) + Sig_alpha_W_i[k] += alpha * W_i[j] else: alpha = 0 - (float(1) / N_i) - Sig_alpha_W_i[k] += (alpha * W_i[j]) - Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i) + Sig_alpha_W_i[k] += alpha * W_i[j] + Sig_alpha_W_i = (1.0 / self.datanum) * np.transpose(Sig_alpha_W_i) return Sig_alpha_W_i # This function calculates log of our prior @@ -1168,10 +1200,10 @@ class DGPLVM_T(Prior): Sb = self.compute_Sb(cls, M_i, M_0) Sw = self.compute_Sw(cls, M_i) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) - #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - #print 'SB_inv: ', Sb_inv_N - #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] - Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0] + # Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + # print 'SB_inv: ', Sb_inv_N + # Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0]) * 0.1)[0] return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) # This function calculates derivative of the log of prior function @@ -1193,20 +1225,21 @@ class DGPLVM_T(Prior): # Calculating inverse of Sb and its transpose and minus # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) - #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - #print 'SB_inv: ',Sb_inv_N - #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] - Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0] + # Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + # print 'SB_inv: ',Sb_inv_N + # Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0]) * 0.1)[0] Sb_inv_N_trans = np.transpose(Sb_inv_N) Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans Sw_trans = np.transpose(Sw) # Calculating DJ/DXk DJ_Dxk = 2 * ( - Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot( - Sig_alpha_W_i)) + Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + + Sb_inv_N_trans.dot(Sig_alpha_W_i) + ) # Calculating derivative of the log of the prior - DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk) + DPx_Dx = (-1 / self.sigma2) * DJ_Dxk return DPx_Dx.T # def frb(self, x): @@ -1221,9 +1254,7 @@ class DGPLVM_T(Prior): return np.random.rand(n) # A WRONG implementation def __str__(self): - return 'DGPLVM_prior_Raq_TTT' - - + return "DGPLVM_prior_Raq_TTT" class HalfT(Prior): @@ -1234,6 +1265,7 @@ class HalfT(Prior): :param nu: degrees of freedom """ + domain = _POSITIVE _instances = [] @@ -1250,13 +1282,22 @@ class HalfT(Prior): def __init__(self, A, nu): self.A = float(A) self.nu = float(nu) - self.constant = gammaln(.5*(self.nu+1.)) - gammaln(.5*self.nu) - .5*np.log(np.pi*self.A*self.nu) + self.constant = ( + gammaln(0.5 * (self.nu + 1.0)) + - gammaln(0.5 * self.nu) + - 0.5 * np.log(np.pi * self.A * self.nu) + ) def __str__(self): return "hT({:.2g}, {:.2g})".format(self.A, self.nu) def lnpdf(self, theta): - return (theta > 0) * (self.constant - .5*(self.nu + 1) * np.log(1. + (1./self.nu) * (theta/self.A)**2)) + return (theta > 0) * ( + self.constant + - 0.5 + * (self.nu + 1) + * np.log(1.0 + (1.0 / self.nu) * (theta / self.A) ** 2) + ) # theta = theta if isinstance(theta,np.ndarray) else np.array([theta]) # lnpdfs = np.zeros_like(theta) @@ -1268,7 +1309,7 @@ class HalfT(Prior): # lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5) # - gammaln(v * 0.5) # - 0.5*np.log(sigma2 * v * np.pi) - # - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2)) + # - 0.5*(v + 1)*np.log(1 + (1/float(v))*((theta[above_zero][0]**2)/sigma2)) # ) # return lnpdfs @@ -1278,12 +1319,18 @@ class HalfT(Prior): above_zero = theta > 1e-6 v = self.nu sigma2 = self.A - grad[above_zero] = -0.5*(v+1)*(2*theta[above_zero])/(v*sigma2 + theta[above_zero][0]**2) + grad[above_zero] = ( + -0.5 + * (v + 1) + * (2 * theta[above_zero]) + / (v * sigma2 + theta[above_zero][0] ** 2) + ) return grad def rvs(self, n): # return np.random.randn(n) * self.sigma + self.mu from scipy.stats import t + # [np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)]) ret = t.rvs(self.nu, loc=0, scale=self.A, size=n) ret[ret < 0] = 0 @@ -1298,6 +1345,7 @@ class Exponential(Prior): :param l: shape parameter """ + domain = _POSITIVE _instances = [] @@ -1318,22 +1366,25 @@ class Exponential(Prior): return "Exp({:.2g})".format(self.l) def summary(self): - ret = {"E[x]": 1. / self.l, - "E[ln x]": np.nan, - "var[x]": 1. / self.l**2, - "Entropy": 1. - np.log(self.l), - "Mode": 0.} + ret = { + "E[x]": 1.0 / self.l, + "E[ln x]": np.nan, + "var[x]": 1.0 / self.l**2, + "Entropy": 1.0 - np.log(self.l), + "Mode": 0.0, + } return ret def lnpdf(self, x): return np.log(self.l) - self.l * x def lnpdf_grad(self, x): - return - self.l + return -self.l def rvs(self, n): return np.random.exponential(scale=self.l, size=n) + class StudentT(Prior): """ Implementation of the student t probability function, coupled with random variables. @@ -1345,6 +1396,7 @@ class StudentT(Prior): .. Note:: Bishop 2006 notation is used throughout the code """ + domain = _REAL _instances = [] @@ -1352,7 +1404,11 @@ class StudentT(Prior): if cls._instances: cls._instances[:] = [instance for instance in cls._instances if instance()] for instance in cls._instances: - if instance().mu == mu and instance().sigma == sigma and instance().nu == nu: + if ( + instance().mu == mu + and instance().sigma == sigma + and instance().nu == nu + ): return instance() newfunc = super(Prior, cls).__new__ if newfunc is object.__new__: @@ -1373,13 +1429,18 @@ class StudentT(Prior): def lnpdf(self, x): from scipy.stats import t - return t.logpdf(x,self.nu,self.mu,self.sigma) + + return t.logpdf(x, self.nu, self.mu, self.sigma) def lnpdf_grad(self, x): - return -(self.nu + 1.)*(x - self.mu)/( self.nu*self.sigma2 + np.square(x - self.mu) ) + return ( + -(self.nu + 1.0) + * (x - self.mu) + / (self.nu * self.sigma2 + np.square(x - self.mu)) + ) def rvs(self, n): from scipy.stats import t + ret = t.rvs(self.nu, loc=self.mu, scale=self.sigma, size=n) return ret - diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py index e8de3c40..6c97a5d8 100644 --- a/GPy/likelihoods/student_t.py +++ b/GPy/likelihoods/student_t.py @@ -12,6 +12,7 @@ from ..core.parameterization import Param from paramz.transformations import Logexp from scipy.special import psi as digamma + class StudentT(Likelihood): """ Student T likelihood @@ -22,17 +23,18 @@ class StudentT(Likelihood): p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} """ - def __init__(self,gp_link=None, deg_free=5, sigma2=2): + + def __init__(self, gp_link=None, deg_free=5, sigma2=2): if gp_link is None: gp_link = link_functions.Identity() - super(StudentT, self).__init__(gp_link, name='Student_T') + super(StudentT, self).__init__(gp_link, name="Student_T") # sigma2 is not a noise parameter, it is a squared scale. - self.sigma2 = Param('t_scale2', float(sigma2), Logexp()) - self.v = Param('deg_free', float(deg_free), Logexp()) + self.sigma2 = Param("t_scale2", float(sigma2), Logexp()) + self.v = Param("deg_free", float(deg_free), Logexp()) self.link_parameter(self.sigma2) self.link_parameter(self.v) - #self.v.constrain_fixed() + # self.v.constrain_fixed() self.log_concave = False @@ -61,11 +63,14 @@ class StudentT(Likelihood): """ assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape e = y - inv_link_f - #Careful gamma(big_number) is infinity! - objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5)) - / (np.sqrt(self.v * np.pi * self.sigma2))) - * ((1 + (1./float(self.v))*((e**2)/float(self.sigma2)))**(-0.5*(self.v + 1))) - ) + # Careful gamma(big_number) is infinity! + objective = ( + np.exp(gammaln((self.v + 1) * 0.5) - gammaln(self.v * 0.5)) + / (np.sqrt(self.v * np.pi * self.sigma2)) + ) * ( + (1 + (1.0 / float(self.v)) * ((e**2) / float(self.sigma2))) + ** (-0.5 * (self.v + 1)) + ) return np.prod(objective) def logpdf_link(self, inv_link_f, y, Y_metadata=None): @@ -85,15 +90,16 @@ class StudentT(Likelihood): """ e = y - inv_link_f - #FIXME: - #Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?! - #But np.log(1 + (1/float(self.v))*((y-inv_link_f)**2)/self.sigma2) throws it correctly - #print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) - objective = (+ gammaln((self.v + 1) * 0.5) - - gammaln(self.v * 0.5) - - 0.5*np.log(self.sigma2 * self.v * np.pi) - - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) - ) + # FIXME: + # Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?! + # But np.log(1 + (1/float(self.v))*((y-inv_link_f)**2)/self.sigma2) throws it correctly + # print - 0.5*(self.v + 1)*np.log(1 + (1/(self.v))*((e**2)/self.sigma2)) + objective = ( + +gammaln((self.v + 1) * 0.5) + - gammaln(self.v * 0.5) + - 0.5 * np.log(self.sigma2 * self.v * np.pi) + - 0.5 * (self.v + 1) * np.log(1 + (1 / (self.v)) * ((e**2) / self.sigma2)) + ) return objective def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): @@ -138,7 +144,9 @@ class StudentT(Likelihood): (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) """ e = y - inv_link_f - hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) + hess = ((self.v + 1) * (e**2 - self.v * self.sigma2)) / ( + (self.sigma2 * self.v + e**2) ** 2 + ) return hess def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): @@ -157,9 +165,9 @@ class StudentT(Likelihood): :rtype: Nx1 array """ e = y - inv_link_f - d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / - ((e**2 + self.sigma2*self.v)**3) - ) + d3lik_dlink3 = -( + 2 * (self.v + 1) * (-e) * (e**2 - 3 * self.v * self.sigma2) + ) / ((e**2 + self.sigma2 * self.v) ** 3) return d3lik_dlink3 def dlogpdf_link_dvar(self, inv_link_f, y, Y_metadata=None): @@ -179,7 +187,11 @@ class StudentT(Likelihood): """ e = y - inv_link_f e2 = np.square(e) - dlogpdf_dvar = self.v*(e2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e2)) + dlogpdf_dvar = ( + self.v + * (e2 - self.sigma2) + / (2 * self.sigma2 * (self.sigma2 * self.v + e2)) + ) return dlogpdf_dvar def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None): @@ -198,7 +210,9 @@ class StudentT(Likelihood): :rtype: Nx1 array """ e = y - inv_link_f - dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) + dlogpdf_dlink_dvar = (self.v * (self.v + 1) * (-e)) / ( + (self.sigma2 * self.v + e**2) ** 2 + ) return dlogpdf_dlink_dvar def d2logpdf_dlink2_dvar(self, inv_link_f, y, Y_metadata=None): @@ -217,9 +231,9 @@ class StudentT(Likelihood): :rtype: Nx1 array """ e = y - inv_link_f - d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) - / ((self.sigma2*self.v + (e**2))**3) - ) + d2logpdf_dlink2_dvar = ( + self.v * (self.v + 1) * (self.sigma2 * self.v - 3 * (e**2)) + ) / ((self.sigma2 * self.v + (e**2)) ** 3) return d2logpdf_dlink2_dvar def dlogpdf_link_dv(self, inv_link_f, y, Y_metadata=None): @@ -227,9 +241,11 @@ class StudentT(Likelihood): e2 = np.square(e) df = float(self.v[:]) s2 = float(self.sigma2[:]) - dlogpdf_dv = 0.5*digamma(0.5*(df+1)) - 0.5*digamma(0.5*df) - 1.0/(2*df) - dlogpdf_dv += 0.5*(df+1)*e2/(df*(e2 + s2*df)) - dlogpdf_dv -= 0.5*np.log1p(e2/(s2*df)) + dlogpdf_dv = ( + 0.5 * digamma(0.5 * (df + 1)) - 0.5 * digamma(0.5 * df) - 1.0 / (2 * df) + ) + dlogpdf_dv += 0.5 * (df + 1) * e2 / (df * (e2 + s2 * df)) + dlogpdf_dv -= 0.5 * np.log1p(e2 / (s2 * df)) return dlogpdf_dv def dlogpdf_dlink_dv(self, inv_link_f, y, Y_metadata=None): @@ -237,7 +253,7 @@ class StudentT(Likelihood): e2 = np.square(e) df = float(self.v[:]) s2 = float(self.sigma2[:]) - dlogpdf_df_dv = e*(e2 - self.sigma2)/(e2 + s2*df)**2 + dlogpdf_df_dv = e * (e2 - self.sigma2) / (e2 + s2 * df) ** 2 return dlogpdf_df_dv def d2logpdf_dlink2_dv(self, inv_link_f, y, Y_metadata=None): @@ -245,8 +261,10 @@ class StudentT(Likelihood): e2 = np.square(e) df = float(self.v[:]) s2 = float(self.sigma2[:]) - e2_s2v = e**2 + s2*df - d2logpdf_df2_dv = (-s2*(df+1) + e2 - s2*df)/e2_s2v**2 - 2*s2*(df+1)*(e2 - s2*df)/e2_s2v**3 + e2_s2v = e**2 + s2 * df + d2logpdf_df2_dv = (-s2 * (df + 1) + e2 - s2 * df) / e2_s2v**2 - 2 * s2 * ( + df + 1 + ) * (e2 - s2 * df) / e2_s2v**3 return d2logpdf_df2_dv def dlogpdf_link_dtheta(self, f, y, Y_metadata=None): @@ -266,19 +284,23 @@ class StudentT(Likelihood): def predictive_mean(self, mu, sigma, Y_metadata=None): # The comment here confuses mean and median. - return self.gp_link.transf(mu) # only true if link is monotonic, which it is. + return self.gp_link.transf(mu) # only true if link is monotonic, which it is. - def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): - if self.deg_free<=2.: - return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2. + def predictive_variance(self, mu, variance, predictive_mean=None, Y_metadata=None): + if self.deg_free <= 2.0: + return ( + np.empty(mu.shape) * np.nan + ) # does not exist for degrees of freedom <= 2. else: - return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata) + return super(StudentT, self).predictive_variance( + mu, variance, predictive_mean, Y_metadata + ) def conditional_mean(self, gp): return self.gp_link.transf(gp) def conditional_variance(self, gp): - return self.deg_free/(self.deg_free - 2.) + return self.deg_free / (self.deg_free - 2.0) def samples(self, gp, Y_metadata=None): """ @@ -288,11 +310,10 @@ class StudentT(Likelihood): """ orig_shape = gp.shape gp = gp.flatten() - #FIXME: Very slow as we are computing a new random variable per input! - #Can't get it to sample all at the same time - #student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp]) - dfs = np.ones_like(gp)*self.v - scales = np.ones_like(gp)*np.sqrt(self.sigma2) - student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp), - scale=scales) + # FIXME: Very slow as we are computing a new random variable per input! + # Can't get it to sample all at the same time + # student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp]) + dfs = np.ones_like(gp) * self.v + scales = np.ones_like(gp) * np.sqrt(self.sigma2) + student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp), scale=scales) return student_t_samples.reshape(orig_shape) diff --git a/benchmarks/regression/evaluation.py b/benchmarks/regression/evaluation.py index c57bce7e..7de8d5ae 100644 --- a/benchmarks/regression/evaluation.py +++ b/benchmarks/regression/evaluation.py @@ -4,18 +4,19 @@ import abc import numpy as np + class Evaluation(object): __metaclass__ = abc.ABCMeta - + @abc.abstractmethod def evaluate(self, gt, pred): """Compute a scalar for access the performance""" return None + class RMSE(Evaluation): "Rooted Mean Square Error" - name = 'RMSE' - + name = "RMSE" + def evaluate(self, gt, pred): - return np.sqrt(np.square(gt-pred).astype(np.float).mean()) - + return np.sqrt(np.square(gt - pred).astype(float).mean())