diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py index 84b6357e..83c83dfd 100644 --- a/GPy/core/parameterization/priors.py +++ b/GPy/core/parameterization/priors.py @@ -5,7 +5,7 @@ import numpy as np from scipy.special import gammaln, digamma from ...util.linalg import pdinv -from domains import _REAL, _POSITIVE +from .domains import _REAL, _POSITIVE import warnings import weakref @@ -15,8 +15,12 @@ class Prior(object): _instance = None def __new__(cls, *args, **kwargs): if not cls._instance or cls._instance.__class__ is not cls: - cls._instance = super(Prior, cls).__new__(cls, *args, **kwargs) - return cls._instance + newfunc = super(Prior, cls).__new__ + if newfunc is object.__new__: + cls._instance = newfunc(cls) + else: + cls._instance = newfunc(cls, *args, **kwargs) + return cls._instance def pdf(self, x): return np.exp(self.lnpdf(x)) @@ -52,7 +56,11 @@ class Gaussian(Prior): for instance in cls._instances: if instance().mu == mu and instance().sigma == sigma: return instance() - o = super(Prior, cls).__new__(cls, mu, sigma) + newfunc = super(Prior, cls).__new__ + if newfunc is object.__new__: + o = newfunc(cls) + else: + o = newfunc(cls, mu, sigma) cls._instances.append(weakref.ref(o)) return cls._instances[-1]() @@ -140,7 +148,11 @@ class LogGaussian(Gaussian): for instance in cls._instances: if instance().mu == mu and instance().sigma == sigma: return instance() - o = super(Prior, cls).__new__(cls, mu, sigma) + newfunc = super(Prior, cls).__new__ + if newfunc is object.__new__: + o = newfunc(cls) + else: + o = newfunc(cls, mu, sigma) cls._instances.append(weakref.ref(o)) return cls._instances[-1]() @@ -258,7 +270,11 @@ class Gamma(Prior): for instance in cls._instances: if instance().a == a and instance().b == b: return instance() - o = super(Prior, cls).__new__(cls, a, b) + newfunc = super(Prior, cls).__new__ + if newfunc is object.__new__: + o = newfunc(cls) + else: + o = newfunc(cls, a, b) cls._instances.append(weakref.ref(o)) return cls._instances[-1]() @@ -398,7 +414,7 @@ class DGPLVM_KFDA(Prior): def compute_cls(self, x): cls = {} # Appending each data point to its proper class - for j in xrange(self.datanum): + for j in range(self.datanum): class_label = self.get_class_label(self.lbl[j]) if class_label not in cls: cls[class_label] = [] @@ -504,6 +520,219 @@ class DGPLVM(Prior): .. Note:: DGPLVM for Classification paper implementation + """ + domain = _REAL + + def __new__(cls, sigma2, lbl, x_shape): + return super(Prior, cls).__new__(cls, sigma2, lbl, x_shape) + + def __init__(self, sigma2, lbl, x_shape): + self.sigma2 = sigma2 + # self.x = x + self.lbl = lbl + self.classnum = lbl.shape[1] + self.datanum = lbl.shape[0] + self.x_shape = x_shape + self.dim = x_shape[1] + + def get_class_label(self, y): + for idx, v in enumerate(y): + if v == 1: + return idx + return -1 + + # This function assigns each data point to its own class + # and returns the dictionary which contains the class name and parameters. + def compute_cls(self, x): + cls = {} + # Appending each data point to its proper class + for j in range(self.datanum): + class_label = self.get_class_label(self.lbl[j]) + if class_label not in cls: + cls[class_label] = [] + cls[class_label].append(x[j]) + return cls + + # This function computes mean of each class. The mean is calculated through each dimension + def compute_Mi(self, cls): + M_i = np.zeros((self.classnum, self.dim)) + for i in cls: + # Mean of each class + class_i = cls[i] + M_i[i] = np.mean(class_i, axis=0) + return M_i + + # Adding data points as tuple to the dictionary so that we can access indices + def compute_indices(self, x): + data_idx = {} + for j in range(self.datanum): + class_label = self.get_class_label(self.lbl[j]) + if class_label not in data_idx: + data_idx[class_label] = [] + t = (j, x[j]) + data_idx[class_label].append(t) + return data_idx + + # Adding indices to the list so we can access whole the indices + def compute_listIndices(self, data_idx): + lst_idx = [] + lst_idx_all = [] + for i in data_idx: + if len(lst_idx) == 0: + pass + #Do nothing, because it is the first time list is created so is empty + else: + lst_idx = [] + # Here we put indices of each class in to the list called lst_idx_all + for m in range(len(data_idx[i])): + lst_idx.append(data_idx[i][m][0]) + lst_idx_all.append(lst_idx) + return lst_idx_all + + # This function calculates between classes variances + def compute_Sb(self, cls, M_i, M_0): + Sb = np.zeros((self.dim, self.dim)) + for i in cls: + B = (M_i[i] - M_0).reshape(self.dim, 1) + B_trans = B.transpose() + Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans) + return Sb + + # This function calculates within classes variances + def compute_Sw(self, cls, M_i): + Sw = np.zeros((self.dim, self.dim)) + for i in cls: + N_i = float(len(cls[i])) + W_WT = np.zeros((self.dim, self.dim)) + for xk in cls[i]: + W = (xk - M_i[i]) + W_WT += np.outer(W, W) + Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT) + return Sw + + # Calculating beta and Bi for Sb + def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all): + # import pdb + # pdb.set_trace() + B_i = np.zeros((self.classnum, self.dim)) + Sig_beta_B_i_all = np.zeros((self.datanum, self.dim)) + for i in data_idx: + # pdb.set_trace() + # Calculating Bi + B_i[i] = (M_i[i] - M_0).reshape(1, self.dim) + for k in range(self.datanum): + for i in data_idx: + N_i = float(len(data_idx[i])) + if k in lst_idx_all[i]: + beta = (float(1) / N_i) - (float(1) / self.datanum) + Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i]) + else: + beta = -(float(1) / self.datanum) + Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i]) + Sig_beta_B_i_all = Sig_beta_B_i_all.transpose() + return Sig_beta_B_i_all + + + # Calculating W_j s separately so we can access all the W_j s anytime + def compute_wj(self, data_idx, M_i): + W_i = np.zeros((self.datanum, self.dim)) + for i in data_idx: + N_i = float(len(data_idx[i])) + for tpl in data_idx[i]: + xj = tpl[1] + j = tpl[0] + W_i[j] = (xj - M_i[i]) + return W_i + + # Calculating alpha and Wj for Sw + def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i): + Sig_alpha_W_i = np.zeros((self.datanum, self.dim)) + for i in data_idx: + N_i = float(len(data_idx[i])) + for tpl in data_idx[i]: + k = tpl[0] + for j in lst_idx_all[i]: + if k == j: + alpha = 1 - (float(1) / N_i) + Sig_alpha_W_i[k] += (alpha * W_i[j]) + else: + alpha = 0 - (float(1) / N_i) + Sig_alpha_W_i[k] += (alpha * W_i[j]) + Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i) + return Sig_alpha_W_i + + # This function calculates log of our prior + def lnpdf(self, x): + x = x.reshape(self.x_shape) + cls = self.compute_cls(x) + M_0 = np.mean(x, axis=0) + M_i = self.compute_Mi(cls) + Sb = self.compute_Sb(cls, M_i, M_0) + Sw = self.compute_Sw(cls, M_i) + # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) + #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] + return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) + + # This function calculates derivative of the log of prior function + def lnpdf_grad(self, x): + x = x.reshape(self.x_shape) + cls = self.compute_cls(x) + M_0 = np.mean(x, axis=0) + M_i = self.compute_Mi(cls) + Sb = self.compute_Sb(cls, M_i, M_0) + Sw = self.compute_Sw(cls, M_i) + data_idx = self.compute_indices(x) + lst_idx_all = self.compute_listIndices(data_idx) + Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all) + W_i = self.compute_wj(data_idx, M_i) + Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i) + + # Calculating inverse of Sb and its transpose and minus + # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) + #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] + Sb_inv_N_trans = np.transpose(Sb_inv_N) + Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans + Sw_trans = np.transpose(Sw) + + # Calculating DJ/DXk + DJ_Dxk = 2 * ( + Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot( + Sig_alpha_W_i)) + # Calculating derivative of the log of the prior + DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk) + return DPx_Dx.T + + # def frb(self, x): + # from functools import partial + # from GPy.models import GradientChecker + # f = partial(self.lnpdf) + # df = partial(self.lnpdf_grad) + # grad = GradientChecker(f, df, x, 'X') + # grad.checkgrad(verbose=1) + + def rvs(self, n): + return np.random.rand(n) # A WRONG implementation + + def __str__(self): + return 'DGPLVM_prior_Raq' + + +# ****************************************** + +from .. import Parameterized +from .. import Param +class DGPLVM_Lamda(Prior, Parameterized): + """ + Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel. + + :param sigma2: constant + + .. Note:: DGPLVM for Classification paper implementation + """ domain = _REAL # _instances = [] @@ -517,14 +746,18 @@ class DGPLVM(Prior): # cls._instances.append(weakref.ref(o)) # return cls._instances[-1]() - def __init__(self, sigma2, lbl, x_shape): + def __init__(self, sigma2, lbl, x_shape, lamda, name='DP_prior'): + super(DGPLVM_Lamda, self).__init__(name=name) self.sigma2 = sigma2 # self.x = x self.lbl = lbl + self.lamda = lamda self.classnum = lbl.shape[1] self.datanum = lbl.shape[0] self.x_shape = x_shape self.dim = x_shape[1] + self.lamda = Param('lamda', np.diag(lamda)) + self.link_parameter(self.lamda) def get_class_label(self, y): for idx, v in enumerate(y): @@ -549,7 +782,8 @@ class DGPLVM(Prior): M_i = np.zeros((self.classnum, self.dim)) for i in cls: # Mean of each class - M_i[i] = np.mean(cls[i], axis=0) + class_i = cls[i] + M_i[i] = np.mean(class_i, axis=0) return M_i # Adding data points as tuple to the dictionary so that we can access indices @@ -654,6 +888,13 @@ class DGPLVM(Prior): # This function calculates log of our prior def lnpdf(self, x): x = x.reshape(self.x_shape) + + #!!!!!!!!!!!!!!!!!!!!!!!!!!! + #self.lamda.values[:] = self.lamda.values/self.lamda.values.sum() + + xprime = x.dot(np.diagflat(self.lamda)) + x = xprime + # print x cls = self.compute_cls(x) M_0 = np.mean(x, axis=0) M_i = self.compute_Mi(cls) @@ -661,12 +902,16 @@ class DGPLVM(Prior): Sw = self.compute_Sw(cls, M_i) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0] return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) # This function calculates derivative of the log of prior function def lnpdf_grad(self, x): x = x.reshape(self.x_shape) + xprime = x.dot(np.diagflat(self.lamda)) + x = xprime + # print x cls = self.compute_cls(x) M_0 = np.mean(x, axis=0) M_i = self.compute_Mi(cls) @@ -680,8 +925,251 @@ class DGPLVM(Prior): # Calculating inverse of Sb and its transpose and minus # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) - # Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0] + Sb_inv_N_trans = np.transpose(Sb_inv_N) + Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans + Sw_trans = np.transpose(Sw) + + # Calculating DJ/DXk + DJ_Dxk = 2 * ( + Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot( + Sig_alpha_W_i)) + # Calculating derivative of the log of the prior + DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk) + + DPxprim_Dx = np.diagflat(self.lamda).dot(DPx_Dx) + + # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) + DPxprim_Dx = DPxprim_Dx.T + + DPxprim_Dlamda = DPx_Dx.dot(x) + + # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) + DPxprim_Dlamda = DPxprim_Dlamda.T + + self.lamda.gradient = np.diag(DPxprim_Dlamda) + # print DPxprim_Dx + return DPxprim_Dx + + + # def frb(self, x): + # from functools import partial + # from GPy.models import GradientChecker + # f = partial(self.lnpdf) + # df = partial(self.lnpdf_grad) + # grad = GradientChecker(f, df, x, 'X') + # grad.checkgrad(verbose=1) + + def rvs(self, n): + return np.random.rand(n) # A WRONG implementation + + def __str__(self): + return 'DGPLVM_prior_Raq_Lamda' + +# ****************************************** + +class DGPLVM_T(Prior): + """ + Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel. + + :param sigma2: constant + + .. Note:: DGPLVM for Classification paper implementation + + """ + domain = _REAL + # _instances = [] + # def __new__(cls, mu, sigma): # Singleton: + # if cls._instances: + # cls._instances[:] = [instance for instance in cls._instances if instance()] + # for instance in cls._instances: + # if instance().mu == mu and instance().sigma == sigma: + # return instance() + # o = super(Prior, cls).__new__(cls, mu, sigma) + # cls._instances.append(weakref.ref(o)) + # return cls._instances[-1]() + + def __init__(self, sigma2, lbl, x_shape, vec): + self.sigma2 = sigma2 + # self.x = x + self.lbl = lbl + self.classnum = lbl.shape[1] + self.datanum = lbl.shape[0] + self.x_shape = x_shape + self.dim = x_shape[1] + self.vec = vec + + + def get_class_label(self, y): + for idx, v in enumerate(y): + if v == 1: + return idx + return -1 + + # This function assigns each data point to its own class + # and returns the dictionary which contains the class name and parameters. + def compute_cls(self, x): + cls = {} + # Appending each data point to its proper class + for j in range(self.datanum): + class_label = self.get_class_label(self.lbl[j]) + if class_label not in cls: + cls[class_label] = [] + cls[class_label].append(x[j]) + return cls + + # This function computes mean of each class. The mean is calculated through each dimension + def compute_Mi(self, cls): + M_i = np.zeros((self.classnum, self.dim)) + for i in cls: + # Mean of each class + # class_i = np.multiply(cls[i],vec) + class_i = cls[i] + M_i[i] = np.mean(class_i, axis=0) + return M_i + + # Adding data points as tuple to the dictionary so that we can access indices + def compute_indices(self, x): + data_idx = {} + for j in range(self.datanum): + class_label = self.get_class_label(self.lbl[j]) + if class_label not in data_idx: + data_idx[class_label] = [] + t = (j, x[j]) + data_idx[class_label].append(t) + return data_idx + + # Adding indices to the list so we can access whole the indices + def compute_listIndices(self, data_idx): + lst_idx = [] + lst_idx_all = [] + for i in data_idx: + if len(lst_idx) == 0: + pass + #Do nothing, because it is the first time list is created so is empty + else: + lst_idx = [] + # Here we put indices of each class in to the list called lst_idx_all + for m in range(len(data_idx[i])): + lst_idx.append(data_idx[i][m][0]) + lst_idx_all.append(lst_idx) + return lst_idx_all + + # This function calculates between classes variances + def compute_Sb(self, cls, M_i, M_0): + Sb = np.zeros((self.dim, self.dim)) + for i in cls: + B = (M_i[i] - M_0).reshape(self.dim, 1) + B_trans = B.transpose() + Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans) + return Sb + + # This function calculates within classes variances + def compute_Sw(self, cls, M_i): + Sw = np.zeros((self.dim, self.dim)) + for i in cls: + N_i = float(len(cls[i])) + W_WT = np.zeros((self.dim, self.dim)) + for xk in cls[i]: + W = (xk - M_i[i]) + W_WT += np.outer(W, W) + Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT) + return Sw + + # Calculating beta and Bi for Sb + def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all): + # import pdb + # pdb.set_trace() + B_i = np.zeros((self.classnum, self.dim)) + Sig_beta_B_i_all = np.zeros((self.datanum, self.dim)) + for i in data_idx: + # pdb.set_trace() + # Calculating Bi + B_i[i] = (M_i[i] - M_0).reshape(1, self.dim) + for k in range(self.datanum): + for i in data_idx: + N_i = float(len(data_idx[i])) + if k in lst_idx_all[i]: + beta = (float(1) / N_i) - (float(1) / self.datanum) + Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i]) + else: + beta = -(float(1) / self.datanum) + Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i]) + Sig_beta_B_i_all = Sig_beta_B_i_all.transpose() + return Sig_beta_B_i_all + + + # Calculating W_j s separately so we can access all the W_j s anytime + def compute_wj(self, data_idx, M_i): + W_i = np.zeros((self.datanum, self.dim)) + for i in data_idx: + N_i = float(len(data_idx[i])) + for tpl in data_idx[i]: + xj = tpl[1] + j = tpl[0] + W_i[j] = (xj - M_i[i]) + return W_i + + # Calculating alpha and Wj for Sw + def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i): + Sig_alpha_W_i = np.zeros((self.datanum, self.dim)) + for i in data_idx: + N_i = float(len(data_idx[i])) + for tpl in data_idx[i]: + k = tpl[0] + for j in lst_idx_all[i]: + if k == j: + alpha = 1 - (float(1) / N_i) + Sig_alpha_W_i[k] += (alpha * W_i[j]) + else: + alpha = 0 - (float(1) / N_i) + Sig_alpha_W_i[k] += (alpha * W_i[j]) + Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i) + return Sig_alpha_W_i + + # This function calculates log of our prior + def lnpdf(self, x): + x = x.reshape(self.x_shape) + xprim = x.dot(self.vec) + x = xprim + # print x + cls = self.compute_cls(x) + M_0 = np.mean(x, axis=0) + M_i = self.compute_Mi(cls) + Sb = self.compute_Sb(cls, M_i, M_0) + Sw = self.compute_Sw(cls, M_i) + # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) + #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + #print 'SB_inv: ', Sb_inv_N + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0] + return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) + + # This function calculates derivative of the log of prior function + def lnpdf_grad(self, x): + x = x.reshape(self.x_shape) + xprim = x.dot(self.vec) + x = xprim + # print x + cls = self.compute_cls(x) + M_0 = np.mean(x, axis=0) + M_i = self.compute_Mi(cls) + Sb = self.compute_Sb(cls, M_i, M_0) + Sw = self.compute_Sw(cls, M_i) + data_idx = self.compute_indices(x) + lst_idx_all = self.compute_listIndices(data_idx) + Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all) + W_i = self.compute_wj(data_idx, M_i) + Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i) + + # Calculating inverse of Sb and its transpose and minus + # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) + #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + #print 'SB_inv: ',Sb_inv_N + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0] Sb_inv_N_trans = np.transpose(Sb_inv_N) Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans Sw_trans = np.transpose(Sw) @@ -706,7 +1194,9 @@ class DGPLVM(Prior): return np.random.rand(n) # A WRONG implementation def __str__(self): - return 'DGPLVM_prior' + return 'DGPLVM_prior_Raq_TTT' + + class HalfT(Prior): """ diff --git a/GPy/core/parameterization/transformations.py b/GPy/core/parameterization/transformations.py index 01a1f44b..830809d6 100644 --- a/GPy/core/parameterization/transformations.py +++ b/GPy/core/parameterization/transformations.py @@ -62,7 +62,7 @@ class Transformation(object): import matplotlib.pyplot as plt from ...plotting.matplot_dep import base_plots x = np.linspace(-8,8) - base_plots.meanplot(x, self.f(x),axes=axes*args,**kw) + base_plots.meanplot(x, self.f(x), *args, ax=axes, **kw) axes = plt.gca() axes.set_xlabel(xlabel) axes.set_ylabel(ylabel) diff --git a/GPy/testing/rv_transformation_tests.py b/GPy/testing/rv_transformation_tests.py new file mode 100644 index 00000000..e6b3e3e7 --- /dev/null +++ b/GPy/testing/rv_transformation_tests.py @@ -0,0 +1,101 @@ +# Written by Ilias Bilionis +""" +Test if hyperparameters in models are properly transformed. +""" + + +import unittest +import numpy as np +import scipy.stats as st +import GPy + + +class TestModel(GPy.core.Model): + """ + A simple GPy model with one parameter. + """ + def __init__(self): + GPy.core.Model.__init__(self, 'test_model') + theta = GPy.core.Param('theta', 1.) + self.link_parameter(theta) + + def log_likelihood(self): + return 0. + + +class RVTransformationTestCase(unittest.TestCase): + + def _test_trans(self, trans): + m = TestModel() + prior = GPy.priors.LogGaussian(.5, 0.1) + m.theta.set_prior(prior) + m.theta.unconstrain() + m.theta.constrain(trans) + # The PDF of the transformed variables + p_phi = lambda(phi): np.exp(-m._objective_grads(phi)[0]) + # To the empirical PDF of: + theta_s = prior.rvs(100000) + phi_s = trans.finv(theta_s) + # which is essentially a kernel density estimation + kde = st.gaussian_kde(phi_s) + # We will compare the PDF here: + phi = np.linspace(phi_s.min(), phi_s.max(), 100) + # The transformed PDF of phi should be this: + pdf_phi = np.array([p_phi(p) for p in phi]) + # UNCOMMENT TO SEE GRAPHICAL COMPARISON + #import matplotlib.pyplot as plt + #fig, ax = plt.subplots() + #ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Histogram') + #ax.plot(phi, kde(phi), '--', linewidth=2, label='Kernel Density Estimation') + #ax.plot(phi, pdf_phi, ':', linewidth=2, label='Transformed PDF') + #ax.set_xlabel(r'transformed $\theta$', fontsize=16) + #ax.set_ylabel('PDF', fontsize=16) + #plt.legend(loc='best') + #plt.show(block=True) + # END OF PLOT + # The following test cannot be very accurate + self.assertTrue(np.linalg.norm(pdf_phi - kde(phi)) / np.linalg.norm(kde(phi)) <= 1e-1) + # Check the gradients at a few random points + for i in xrange(10): + m.theta = theta_s[i] + self.assertTrue(m.checkgrad(verbose=True)) + + def test_Logexp(self): + self._test_trans(GPy.constraints.Logexp()) + self._test_trans(GPy.constraints.Exponent()) + + +if __name__ == '__main__': + unittest.main() + quit() + m = TestModel() + prior = GPy.priors.LogGaussian(0., .9) + m.theta.set_prior(prior) + + # The following should return the PDF in terms of the transformed quantities + p_phi = lambda(phi): np.exp(-m._objective_grads(phi)[0]) + + # Let's look at the transformation phi = log(exp(theta - 1)) + trans = GPy.constraints.Exponent() + m.theta.constrain(trans) + # Plot the transformed probability density + phi = np.linspace(-8, 8, 100) + fig, ax = plt.subplots() + # Let's draw some samples of theta and transform them so that we see + # which one is right + theta_s = prior.rvs(10000) + # Transform it to the new variables + phi_s = trans.finv(theta_s) + # And draw their histogram + ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Empirical') + # This is to be compared to the PDF of the model expressed in terms of these new + # variables + ax.plot(phi, [p_phi(p) for p in phi], label='Transformed PDF', linewidth=2) + ax.set_xlim(-3, 10) + ax.set_xlabel(r'transformed $\theta$', fontsize=16) + ax.set_ylabel('PDF', fontsize=16) + plt.legend(loc='best') + # Now let's test the gradients + m.checkgrad(verbose=True) + # And show the plot + plt.show(block=True) diff --git a/ib_tests/test_regression.py b/ib_tests/test_regression.py index cd9e2943..742948c1 100644 --- a/ib_tests/test_regression.py +++ b/ib_tests/test_regression.py @@ -34,3 +34,4 @@ if __name__ == '__main__': ax = fig.add_subplot(samples.shape[1], 1, i + 1) ax.plot(samples[:, i], linewidth=1.5) plt.show(block=True) + diff --git a/ib_tests/test_transformation_of_pdf.py b/ib_tests/test_transformation_of_pdf.py index af152ca8..8da16b21 100644 --- a/ib_tests/test_transformation_of_pdf.py +++ b/ib_tests/test_transformation_of_pdf.py @@ -14,44 +14,54 @@ import sys import os # Make sure we load the GP that is here sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +print 'trying' import GPy +print 'done' import matplotlib.pyplot as plt import numpy as np import scipy.stats as st import scipy.integrate as integrate -if __name__ == '__main__': - p_theta = st.lognorm(.9) +class TestModel(GPy.core.Model): + def __init__(self): + GPy.core.Model.__init__(self, 'test_model') + theta = GPy.core.Param('theta', 1.) + self.link_parameter(theta) - # Plot the PDF of theta - fig, ax = plt.subplots() - theta = np.linspace(0.0001, 8., 100) - ax.plot(theta, p_theta.pdf(theta), linewidth=2, label='True PDF') - ax.set_xlabel(r'$\theta$', fontsize=16) - ax.set_ylabel(r'$p(\theta)$', fontsize=16) - - # Now let's look at the transformation phi = log(exp(theta - 1)) - t = GPy.constraints.Logexp() - t.plot() + def log_likelihood(self): + return 0. + + +if __name__ == '__main__': + m = TestModel() + prior = GPy.priors.LogGaussian(0., .9) + m.theta.set_prior(prior) + + # The following should return the PDF in terms of the transformed quantities + p_phi = lambda(phi): np.exp(-m._objective_grads(phi)[0]) + + # Let's look at the transformation phi = log(exp(theta - 1)) + trans = GPy.constraints.Exponent() + m.theta.constrain(trans) # Plot the transformed probability density phi = np.linspace(-8, 8, 100) fig, ax = plt.subplots() - ax.plot(phi, p_theta.pdf(t.f(phi)) * t.jacobianfactor(t.f(phi)), linewidth=2, - label='Transformed PDF') - # Now find the normalization constant for the naive transformation of the - # PDF - p_phi_prop = lambda(phi): p_theta.pdf(t.f(phi)) - c = integrate.quad(p_phi_prop, -np.inf, np.inf)[0] - p_phi = lambda(phi): p_phi_prop(phi) / c - ax.plot(phi, p_phi(phi), '--', linewidth=2, label='Naively transformed PDF') - # Now let's draw some samples of theta and transform them so that we see + # Let's draw some samples of theta and transform them so that we see # which one is right - theta_s = p_theta.rvs(100000) - phi_s = t.finv(theta_s) + theta_s = prior.rvs(10000) + # Transform it to the new variables + phi_s = trans.finv(theta_s) + # And draw their histogram ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Empirical') + # This is to be compared to the PDF of the model expressed in terms of these new + # variables + ax.plot(phi, [p_phi(p) for p in phi], label='Transformed PDF', linewidth=2) ax.set_xlim(-3, 10) ax.set_xlabel(r'transformed $\theta$', fontsize=16) ax.set_ylabel('PDF', fontsize=16) plt.legend(loc='best') + # Now let's test the gradients + m.checkgrad(verbose=True) + # And show the plot plt.show(block=True)