diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 6add95b0..48c05a8d 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -13,11 +13,12 @@ Observable Pattern for patameterization """ -from transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED +from .transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED import numpy as np import re import logging -from updateable import Updateable +from .updateable import Updateable +from functools import reduce class HierarchyError(Exception): """ @@ -36,7 +37,7 @@ def adjust_name_for_printing(name): name = name.replace("/", "_l_").replace("@", '_at_') name = name.replace("(", "_of_").replace(")", "") if re.match(r'^[a-zA-Z_][a-zA-Z0-9-_]*$', name) is None: - raise NameError, "name {} converted to {} cannot be further converted to valid python variable name!".format(name2, name) + raise NameError("name {} converted to {} cannot be further converted to valid python variable name!".format(name2, name)) return name return '' @@ -65,13 +66,13 @@ class Parentable(object): Gets called, when the parent changed, so we can adjust our inner attributes according to the new parent. """ - raise NotImplementedError, "shouldnt happen, Parentable objects need to be able to change their parent" + raise NotImplementedError("shouldnt happen, Parentable objects need to be able to change their parent") def _disconnect_parent(self, *args, **kw): """ Disconnect this object from its parent """ - raise NotImplementedError, "Abstract superclass" + raise NotImplementedError("Abstract superclass") @property def _highest_parent_(self): @@ -109,7 +110,10 @@ class Pickleable(object): it properly. :param protocol: pickling protocol to use, python-pickle for details. """ - import cPickle as pickle + try: #Py2 + import cPickle as pickle + except ImportError: #Py3 + import pickle if isinstance(f, str): with open(f, 'wb') as f: pickle.dump(self, f, protocol) @@ -138,9 +142,9 @@ class Pickleable(object): which = self which.traverse_parents(parents.append) # collect parents for p in parents: - if not memo.has_key(id(p)):memo[id(p)] = None # set all parents to be None, so they will not be copied - if not memo.has_key(id(self.gradient)):memo[id(self.gradient)] = None # reset the gradient - if not memo.has_key(id(self._fixes_)):memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent + if not id(p) in memo :memo[id(p)] = None # set all parents to be None, so they will not be copied + if not id(self.gradient) in memo:memo[id(self.gradient)] = None # reset the gradient + if not id(self._fixes_) in memo :memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent copy = copy.deepcopy(self, memo) # and start the copy copy._parent_index_ = None copy._trigger_params_changed() @@ -163,14 +167,16 @@ class Pickleable(object): '_Cacher_wrap__cachers', # never pickle cachers ] dc = dict() - for k,v in self.__dict__.iteritems(): + #py3 fix + #for k,v in self.__dict__.iteritems(): + for k,v in self.__dict__.items(): if k not in ignore_list: dc[k] = v return dc def __setstate__(self, state): self.__dict__.update(state) - from lists_and_dicts import ObserverList + from .lists_and_dicts import ObserverList self.observers = ObserverList() self._setup_observers() self._optimizer_copy_transformed = False @@ -214,7 +220,7 @@ class Gradcheckable(Pickleable, Parentable): Perform the checkgrad on the model. TODO: this can be done more efficiently, when doing it inside here """ - raise HierarchyError, "This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!" + raise HierarchyError("This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!") class Nameable(Gradcheckable): """ @@ -268,7 +274,7 @@ class Indexable(Nameable, Updateable): def __init__(self, name, default_constraint=None, *a, **kw): super(Indexable, self).__init__(name=name, *a, **kw) self._default_constraint_ = default_constraint - from index_operations import ParameterIndexOperations + from .index_operations import ParameterIndexOperations self.constraints = ParameterIndexOperations() self.priors = ParameterIndexOperations() if self._default_constraint_ is not None: @@ -310,7 +316,7 @@ class Indexable(Nameable, Updateable): that is an int array, containing the indexes for the flattened param inside this parameterized logic. """ - from param import ParamConcatenation + from .param import ParamConcatenation if isinstance(param, ParamConcatenation): return np.hstack((self._raveled_index_for(p) for p in param.params)) return param._raveled_index() + self._offset_for(param) @@ -407,7 +413,7 @@ class Indexable(Nameable, Updateable): repriorized = self.unset_priors() self._add_to_index_operations(self.priors, repriorized, prior, warning) - from domains import _REAL, _POSITIVE, _NEGATIVE + from .domains import _REAL, _POSITIVE, _NEGATIVE if prior.domain is _POSITIVE: self.constrain_positive(warning) elif prior.domain is _NEGATIVE: @@ -424,19 +430,38 @@ class Indexable(Nameable, Updateable): def log_prior(self): """evaluate the prior""" - if self.priors.size > 0: - x = self.param_array - return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0) - return 0. + if self.priors.size == 0: + return 0. + x = self.param_array + #evaluate the prior log densities + log_p = reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0) + + #account for the transformation by evaluating the log Jacobian (where things are transformed) + log_j = 0. + priored_indexes = np.hstack([i for p, i in self.priors.items()]) + for c,j in self.constraints.items(): + if not isinstance(c, Transformation):continue + for jj in j: + if jj in priored_indexes: + log_j += c.log_jacobian(x[jj]) + return log_p + log_j def _log_prior_gradients(self): """evaluate the gradients of the priors""" - if self.priors.size > 0: - x = self.param_array - ret = np.zeros(x.size) - [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()] - return ret - return 0. + if self.priors.size == 0: + return 0. + x = self.param_array + ret = np.zeros(x.size) + #compute derivate of prior density + [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()] + #add in jacobian derivatives if transformed + priored_indexes = np.hstack([i for p, i in self.priors.items()]) + for c,j in self.constraints.items(): + if not isinstance(c, Transformation):continue + for jj in j: + if jj in priored_indexes: + ret[jj] += c.log_jacobian_grad(x[jj]) + return ret #=========================================================================== # Tie parameters together @@ -471,7 +496,7 @@ class Indexable(Nameable, Updateable): self.param_array[...] = transform.initialize(self.param_array) reconstrained = self.unconstrain() added = self._add_to_index_operations(self.constraints, reconstrained, transform, warning) - self.notify_observers(self, None if trigger_parent else -np.inf) + self.trigger_update(trigger_parent) return added def unconstrain(self, *transforms): @@ -536,7 +561,7 @@ class Indexable(Nameable, Updateable): update the constraints and priors view, so that constraining is automized for the parent. """ - from index_operations import ParameterIndexOperationsView + from .index_operations import ParameterIndexOperationsView #if getattr(self, "_in_init_"): #import ipdb;ipdb.set_trace() #self.constraints.update(param.constraints, start) @@ -558,7 +583,7 @@ class Indexable(Nameable, Updateable): """ if warning and reconstrained.size > 0: # TODO: figure out which parameters have changed and only print those - print "WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name) + print("WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name)) index = self._raveled_index() which.add(what, index) return index @@ -571,7 +596,7 @@ class Indexable(Nameable, Updateable): if len(transforms) == 0: transforms = which.properties() removed = np.empty((0,), dtype=int) - for t in transforms: + for t in list(transforms): unconstrained = which.remove(t, self._raveled_index()) removed = np.union1d(removed, unconstrained) if t is __fixed__: @@ -612,7 +637,9 @@ class OptimizationHandlable(Indexable): if not self._optimizer_copy_transformed: self._optimizer_copy_.flat = self.param_array.flat - [np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] + #py3 fix + #[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] + [np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.items() if c != __fixed__] if self.has_parent() and (self.constraints[__fixed__].size != 0 or self._has_ties()): fixes = np.ones(self.size).astype(bool) fixes[self.constraints[__fixed__]] = FIXED @@ -641,21 +668,25 @@ class OptimizationHandlable(Indexable): if f is None: self.param_array.flat = p [np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) - for c, ind in self.constraints.iteritems() if c != __fixed__] + #py3 fix + #for c, ind in self.constraints.iteritems() if c != __fixed__] + for c, ind in self.constraints.items() if c != __fixed__] else: self.param_array.flat[f] = p [np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]])) - for c, ind in self.constraints.iteritems() if c != __fixed__] + #py3 fix + #for c, ind in self.constraints.iteritems() if c != __fixed__] + for c, ind in self.constraints.items() if c != __fixed__] #self._highest_parent_.tie.propagate_val() self._optimizer_copy_transformed = False self.trigger_update() def _get_params_transformed(self): - raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!" + raise DeprecationWarning("_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!") # def _set_params_transformed(self, p): - raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!" + raise DeprecationWarning("_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!") def _trigger_params_changed(self, trigger_parent=True): """ @@ -680,17 +711,32 @@ class OptimizationHandlable(Indexable): constraint to it. """ self._highest_parent_.tie.collate_gradient() - [np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__] + #py3 fix + #[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__] + [np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.items() if c != __fixed__] if self._has_fixes(): return g[self._fixes_] return g + def _transform_gradients_non_natural(self, g): + """ + Transform the gradients by multiplying the gradient factor for each + constraint to it. + """ + self._highest_parent_.tie.collate_gradient() + #py3 fix + #[np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__] + [np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.items() if c != __fixed__] + if self._has_fixes(): return g[self._fixes_] + return g + + @property def num_params(self): """ Return the number of parameters of this parameter_handle. Param objects will always return 0. """ - raise NotImplemented, "Abstract, please implement in respective classes" + raise NotImplemented("Abstract, please implement in respective classes") def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): """ @@ -739,7 +785,9 @@ class OptimizationHandlable(Indexable): self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...) # now draw from prior where possible x = self.param_array.copy() - [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None] + #Py3 fix + #[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None] + [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.items() if not p is None] unfixlist = np.ones((self.size,),dtype=np.bool) unfixlist[self.constraints[__fixed__]] = False self.param_array.flat[unfixlist] = x.view(np.ndarray).ravel()[unfixlist] @@ -798,7 +846,7 @@ class Parameterizable(OptimizationHandlable): A parameterisable class. This class provides the parameters list (ArrayList) and standard parameter handling, - such as {add|remove}_parameter(), traverse hierarchy and param_array, gradient_array + such as {link|unlink}_parameter(), traverse hierarchy and param_array, gradient_array and the empty parameters_changed(). This class is abstract and should not be instantiated. @@ -936,7 +984,7 @@ class Parameterizable(OptimizationHandlable): self._add_parameter_name(param, ignore_added_names) # and makes sure to not delete programmatically added parameters for other in self.parameters[::-1]: - if other is not param and other.name.startswith(param.name): + if other is not param and other.name == param.name: warn_and_retry(param, _name_digit.match(other.name)) return if pname not in dir(self): @@ -1031,6 +1079,9 @@ class Parameterizable(OptimizationHandlable): p = param_to_array(p) d = f.create_dataset(n,p.shape,dtype=p.dtype) d[:] = p + if hasattr(self, 'param_array'): + d = f.create_dataset('param_array',self.param_array.shape, dtype=self.param_array.dtype) + d[:] = self.param_array f.close() except: raise 'Fails to write the parameters into a HDF5 file!' diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py index 84b6357e..83c83dfd 100644 --- a/GPy/core/parameterization/priors.py +++ b/GPy/core/parameterization/priors.py @@ -5,7 +5,7 @@ import numpy as np from scipy.special import gammaln, digamma from ...util.linalg import pdinv -from domains import _REAL, _POSITIVE +from .domains import _REAL, _POSITIVE import warnings import weakref @@ -15,8 +15,12 @@ class Prior(object): _instance = None def __new__(cls, *args, **kwargs): if not cls._instance or cls._instance.__class__ is not cls: - cls._instance = super(Prior, cls).__new__(cls, *args, **kwargs) - return cls._instance + newfunc = super(Prior, cls).__new__ + if newfunc is object.__new__: + cls._instance = newfunc(cls) + else: + cls._instance = newfunc(cls, *args, **kwargs) + return cls._instance def pdf(self, x): return np.exp(self.lnpdf(x)) @@ -52,7 +56,11 @@ class Gaussian(Prior): for instance in cls._instances: if instance().mu == mu and instance().sigma == sigma: return instance() - o = super(Prior, cls).__new__(cls, mu, sigma) + newfunc = super(Prior, cls).__new__ + if newfunc is object.__new__: + o = newfunc(cls) + else: + o = newfunc(cls, mu, sigma) cls._instances.append(weakref.ref(o)) return cls._instances[-1]() @@ -140,7 +148,11 @@ class LogGaussian(Gaussian): for instance in cls._instances: if instance().mu == mu and instance().sigma == sigma: return instance() - o = super(Prior, cls).__new__(cls, mu, sigma) + newfunc = super(Prior, cls).__new__ + if newfunc is object.__new__: + o = newfunc(cls) + else: + o = newfunc(cls, mu, sigma) cls._instances.append(weakref.ref(o)) return cls._instances[-1]() @@ -258,7 +270,11 @@ class Gamma(Prior): for instance in cls._instances: if instance().a == a and instance().b == b: return instance() - o = super(Prior, cls).__new__(cls, a, b) + newfunc = super(Prior, cls).__new__ + if newfunc is object.__new__: + o = newfunc(cls) + else: + o = newfunc(cls, a, b) cls._instances.append(weakref.ref(o)) return cls._instances[-1]() @@ -398,7 +414,7 @@ class DGPLVM_KFDA(Prior): def compute_cls(self, x): cls = {} # Appending each data point to its proper class - for j in xrange(self.datanum): + for j in range(self.datanum): class_label = self.get_class_label(self.lbl[j]) if class_label not in cls: cls[class_label] = [] @@ -504,6 +520,219 @@ class DGPLVM(Prior): .. Note:: DGPLVM for Classification paper implementation + """ + domain = _REAL + + def __new__(cls, sigma2, lbl, x_shape): + return super(Prior, cls).__new__(cls, sigma2, lbl, x_shape) + + def __init__(self, sigma2, lbl, x_shape): + self.sigma2 = sigma2 + # self.x = x + self.lbl = lbl + self.classnum = lbl.shape[1] + self.datanum = lbl.shape[0] + self.x_shape = x_shape + self.dim = x_shape[1] + + def get_class_label(self, y): + for idx, v in enumerate(y): + if v == 1: + return idx + return -1 + + # This function assigns each data point to its own class + # and returns the dictionary which contains the class name and parameters. + def compute_cls(self, x): + cls = {} + # Appending each data point to its proper class + for j in range(self.datanum): + class_label = self.get_class_label(self.lbl[j]) + if class_label not in cls: + cls[class_label] = [] + cls[class_label].append(x[j]) + return cls + + # This function computes mean of each class. The mean is calculated through each dimension + def compute_Mi(self, cls): + M_i = np.zeros((self.classnum, self.dim)) + for i in cls: + # Mean of each class + class_i = cls[i] + M_i[i] = np.mean(class_i, axis=0) + return M_i + + # Adding data points as tuple to the dictionary so that we can access indices + def compute_indices(self, x): + data_idx = {} + for j in range(self.datanum): + class_label = self.get_class_label(self.lbl[j]) + if class_label not in data_idx: + data_idx[class_label] = [] + t = (j, x[j]) + data_idx[class_label].append(t) + return data_idx + + # Adding indices to the list so we can access whole the indices + def compute_listIndices(self, data_idx): + lst_idx = [] + lst_idx_all = [] + for i in data_idx: + if len(lst_idx) == 0: + pass + #Do nothing, because it is the first time list is created so is empty + else: + lst_idx = [] + # Here we put indices of each class in to the list called lst_idx_all + for m in range(len(data_idx[i])): + lst_idx.append(data_idx[i][m][0]) + lst_idx_all.append(lst_idx) + return lst_idx_all + + # This function calculates between classes variances + def compute_Sb(self, cls, M_i, M_0): + Sb = np.zeros((self.dim, self.dim)) + for i in cls: + B = (M_i[i] - M_0).reshape(self.dim, 1) + B_trans = B.transpose() + Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans) + return Sb + + # This function calculates within classes variances + def compute_Sw(self, cls, M_i): + Sw = np.zeros((self.dim, self.dim)) + for i in cls: + N_i = float(len(cls[i])) + W_WT = np.zeros((self.dim, self.dim)) + for xk in cls[i]: + W = (xk - M_i[i]) + W_WT += np.outer(W, W) + Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT) + return Sw + + # Calculating beta and Bi for Sb + def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all): + # import pdb + # pdb.set_trace() + B_i = np.zeros((self.classnum, self.dim)) + Sig_beta_B_i_all = np.zeros((self.datanum, self.dim)) + for i in data_idx: + # pdb.set_trace() + # Calculating Bi + B_i[i] = (M_i[i] - M_0).reshape(1, self.dim) + for k in range(self.datanum): + for i in data_idx: + N_i = float(len(data_idx[i])) + if k in lst_idx_all[i]: + beta = (float(1) / N_i) - (float(1) / self.datanum) + Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i]) + else: + beta = -(float(1) / self.datanum) + Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i]) + Sig_beta_B_i_all = Sig_beta_B_i_all.transpose() + return Sig_beta_B_i_all + + + # Calculating W_j s separately so we can access all the W_j s anytime + def compute_wj(self, data_idx, M_i): + W_i = np.zeros((self.datanum, self.dim)) + for i in data_idx: + N_i = float(len(data_idx[i])) + for tpl in data_idx[i]: + xj = tpl[1] + j = tpl[0] + W_i[j] = (xj - M_i[i]) + return W_i + + # Calculating alpha and Wj for Sw + def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i): + Sig_alpha_W_i = np.zeros((self.datanum, self.dim)) + for i in data_idx: + N_i = float(len(data_idx[i])) + for tpl in data_idx[i]: + k = tpl[0] + for j in lst_idx_all[i]: + if k == j: + alpha = 1 - (float(1) / N_i) + Sig_alpha_W_i[k] += (alpha * W_i[j]) + else: + alpha = 0 - (float(1) / N_i) + Sig_alpha_W_i[k] += (alpha * W_i[j]) + Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i) + return Sig_alpha_W_i + + # This function calculates log of our prior + def lnpdf(self, x): + x = x.reshape(self.x_shape) + cls = self.compute_cls(x) + M_0 = np.mean(x, axis=0) + M_i = self.compute_Mi(cls) + Sb = self.compute_Sb(cls, M_i, M_0) + Sw = self.compute_Sw(cls, M_i) + # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) + #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] + return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) + + # This function calculates derivative of the log of prior function + def lnpdf_grad(self, x): + x = x.reshape(self.x_shape) + cls = self.compute_cls(x) + M_0 = np.mean(x, axis=0) + M_i = self.compute_Mi(cls) + Sb = self.compute_Sb(cls, M_i, M_0) + Sw = self.compute_Sw(cls, M_i) + data_idx = self.compute_indices(x) + lst_idx_all = self.compute_listIndices(data_idx) + Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all) + W_i = self.compute_wj(data_idx, M_i) + Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i) + + # Calculating inverse of Sb and its transpose and minus + # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) + #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] + Sb_inv_N_trans = np.transpose(Sb_inv_N) + Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans + Sw_trans = np.transpose(Sw) + + # Calculating DJ/DXk + DJ_Dxk = 2 * ( + Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot( + Sig_alpha_W_i)) + # Calculating derivative of the log of the prior + DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk) + return DPx_Dx.T + + # def frb(self, x): + # from functools import partial + # from GPy.models import GradientChecker + # f = partial(self.lnpdf) + # df = partial(self.lnpdf_grad) + # grad = GradientChecker(f, df, x, 'X') + # grad.checkgrad(verbose=1) + + def rvs(self, n): + return np.random.rand(n) # A WRONG implementation + + def __str__(self): + return 'DGPLVM_prior_Raq' + + +# ****************************************** + +from .. import Parameterized +from .. import Param +class DGPLVM_Lamda(Prior, Parameterized): + """ + Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel. + + :param sigma2: constant + + .. Note:: DGPLVM for Classification paper implementation + """ domain = _REAL # _instances = [] @@ -517,14 +746,18 @@ class DGPLVM(Prior): # cls._instances.append(weakref.ref(o)) # return cls._instances[-1]() - def __init__(self, sigma2, lbl, x_shape): + def __init__(self, sigma2, lbl, x_shape, lamda, name='DP_prior'): + super(DGPLVM_Lamda, self).__init__(name=name) self.sigma2 = sigma2 # self.x = x self.lbl = lbl + self.lamda = lamda self.classnum = lbl.shape[1] self.datanum = lbl.shape[0] self.x_shape = x_shape self.dim = x_shape[1] + self.lamda = Param('lamda', np.diag(lamda)) + self.link_parameter(self.lamda) def get_class_label(self, y): for idx, v in enumerate(y): @@ -549,7 +782,8 @@ class DGPLVM(Prior): M_i = np.zeros((self.classnum, self.dim)) for i in cls: # Mean of each class - M_i[i] = np.mean(cls[i], axis=0) + class_i = cls[i] + M_i[i] = np.mean(class_i, axis=0) return M_i # Adding data points as tuple to the dictionary so that we can access indices @@ -654,6 +888,13 @@ class DGPLVM(Prior): # This function calculates log of our prior def lnpdf(self, x): x = x.reshape(self.x_shape) + + #!!!!!!!!!!!!!!!!!!!!!!!!!!! + #self.lamda.values[:] = self.lamda.values/self.lamda.values.sum() + + xprime = x.dot(np.diagflat(self.lamda)) + x = xprime + # print x cls = self.compute_cls(x) M_0 = np.mean(x, axis=0) M_i = self.compute_Mi(cls) @@ -661,12 +902,16 @@ class DGPLVM(Prior): Sw = self.compute_Sw(cls, M_i) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0] return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) # This function calculates derivative of the log of prior function def lnpdf_grad(self, x): x = x.reshape(self.x_shape) + xprime = x.dot(np.diagflat(self.lamda)) + x = xprime + # print x cls = self.compute_cls(x) M_0 = np.mean(x, axis=0) M_i = self.compute_Mi(cls) @@ -680,8 +925,251 @@ class DGPLVM(Prior): # Calculating inverse of Sb and its transpose and minus # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) - # Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) - Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0] + Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0] + Sb_inv_N_trans = np.transpose(Sb_inv_N) + Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans + Sw_trans = np.transpose(Sw) + + # Calculating DJ/DXk + DJ_Dxk = 2 * ( + Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot( + Sig_alpha_W_i)) + # Calculating derivative of the log of the prior + DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk) + + DPxprim_Dx = np.diagflat(self.lamda).dot(DPx_Dx) + + # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) + DPxprim_Dx = DPxprim_Dx.T + + DPxprim_Dlamda = DPx_Dx.dot(x) + + # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) + DPxprim_Dlamda = DPxprim_Dlamda.T + + self.lamda.gradient = np.diag(DPxprim_Dlamda) + # print DPxprim_Dx + return DPxprim_Dx + + + # def frb(self, x): + # from functools import partial + # from GPy.models import GradientChecker + # f = partial(self.lnpdf) + # df = partial(self.lnpdf_grad) + # grad = GradientChecker(f, df, x, 'X') + # grad.checkgrad(verbose=1) + + def rvs(self, n): + return np.random.rand(n) # A WRONG implementation + + def __str__(self): + return 'DGPLVM_prior_Raq_Lamda' + +# ****************************************** + +class DGPLVM_T(Prior): + """ + Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel. + + :param sigma2: constant + + .. Note:: DGPLVM for Classification paper implementation + + """ + domain = _REAL + # _instances = [] + # def __new__(cls, mu, sigma): # Singleton: + # if cls._instances: + # cls._instances[:] = [instance for instance in cls._instances if instance()] + # for instance in cls._instances: + # if instance().mu == mu and instance().sigma == sigma: + # return instance() + # o = super(Prior, cls).__new__(cls, mu, sigma) + # cls._instances.append(weakref.ref(o)) + # return cls._instances[-1]() + + def __init__(self, sigma2, lbl, x_shape, vec): + self.sigma2 = sigma2 + # self.x = x + self.lbl = lbl + self.classnum = lbl.shape[1] + self.datanum = lbl.shape[0] + self.x_shape = x_shape + self.dim = x_shape[1] + self.vec = vec + + + def get_class_label(self, y): + for idx, v in enumerate(y): + if v == 1: + return idx + return -1 + + # This function assigns each data point to its own class + # and returns the dictionary which contains the class name and parameters. + def compute_cls(self, x): + cls = {} + # Appending each data point to its proper class + for j in range(self.datanum): + class_label = self.get_class_label(self.lbl[j]) + if class_label not in cls: + cls[class_label] = [] + cls[class_label].append(x[j]) + return cls + + # This function computes mean of each class. The mean is calculated through each dimension + def compute_Mi(self, cls): + M_i = np.zeros((self.classnum, self.dim)) + for i in cls: + # Mean of each class + # class_i = np.multiply(cls[i],vec) + class_i = cls[i] + M_i[i] = np.mean(class_i, axis=0) + return M_i + + # Adding data points as tuple to the dictionary so that we can access indices + def compute_indices(self, x): + data_idx = {} + for j in range(self.datanum): + class_label = self.get_class_label(self.lbl[j]) + if class_label not in data_idx: + data_idx[class_label] = [] + t = (j, x[j]) + data_idx[class_label].append(t) + return data_idx + + # Adding indices to the list so we can access whole the indices + def compute_listIndices(self, data_idx): + lst_idx = [] + lst_idx_all = [] + for i in data_idx: + if len(lst_idx) == 0: + pass + #Do nothing, because it is the first time list is created so is empty + else: + lst_idx = [] + # Here we put indices of each class in to the list called lst_idx_all + for m in range(len(data_idx[i])): + lst_idx.append(data_idx[i][m][0]) + lst_idx_all.append(lst_idx) + return lst_idx_all + + # This function calculates between classes variances + def compute_Sb(self, cls, M_i, M_0): + Sb = np.zeros((self.dim, self.dim)) + for i in cls: + B = (M_i[i] - M_0).reshape(self.dim, 1) + B_trans = B.transpose() + Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans) + return Sb + + # This function calculates within classes variances + def compute_Sw(self, cls, M_i): + Sw = np.zeros((self.dim, self.dim)) + for i in cls: + N_i = float(len(cls[i])) + W_WT = np.zeros((self.dim, self.dim)) + for xk in cls[i]: + W = (xk - M_i[i]) + W_WT += np.outer(W, W) + Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT) + return Sw + + # Calculating beta and Bi for Sb + def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all): + # import pdb + # pdb.set_trace() + B_i = np.zeros((self.classnum, self.dim)) + Sig_beta_B_i_all = np.zeros((self.datanum, self.dim)) + for i in data_idx: + # pdb.set_trace() + # Calculating Bi + B_i[i] = (M_i[i] - M_0).reshape(1, self.dim) + for k in range(self.datanum): + for i in data_idx: + N_i = float(len(data_idx[i])) + if k in lst_idx_all[i]: + beta = (float(1) / N_i) - (float(1) / self.datanum) + Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i]) + else: + beta = -(float(1) / self.datanum) + Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i]) + Sig_beta_B_i_all = Sig_beta_B_i_all.transpose() + return Sig_beta_B_i_all + + + # Calculating W_j s separately so we can access all the W_j s anytime + def compute_wj(self, data_idx, M_i): + W_i = np.zeros((self.datanum, self.dim)) + for i in data_idx: + N_i = float(len(data_idx[i])) + for tpl in data_idx[i]: + xj = tpl[1] + j = tpl[0] + W_i[j] = (xj - M_i[i]) + return W_i + + # Calculating alpha and Wj for Sw + def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i): + Sig_alpha_W_i = np.zeros((self.datanum, self.dim)) + for i in data_idx: + N_i = float(len(data_idx[i])) + for tpl in data_idx[i]: + k = tpl[0] + for j in lst_idx_all[i]: + if k == j: + alpha = 1 - (float(1) / N_i) + Sig_alpha_W_i[k] += (alpha * W_i[j]) + else: + alpha = 0 - (float(1) / N_i) + Sig_alpha_W_i[k] += (alpha * W_i[j]) + Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i) + return Sig_alpha_W_i + + # This function calculates log of our prior + def lnpdf(self, x): + x = x.reshape(self.x_shape) + xprim = x.dot(self.vec) + x = xprim + # print x + cls = self.compute_cls(x) + M_0 = np.mean(x, axis=0) + M_i = self.compute_Mi(cls) + Sb = self.compute_Sb(cls, M_i, M_0) + Sw = self.compute_Sw(cls, M_i) + # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) + #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + #print 'SB_inv: ', Sb_inv_N + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0] + return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) + + # This function calculates derivative of the log of prior function + def lnpdf_grad(self, x): + x = x.reshape(self.x_shape) + xprim = x.dot(self.vec) + x = xprim + # print x + cls = self.compute_cls(x) + M_0 = np.mean(x, axis=0) + M_i = self.compute_Mi(cls) + Sb = self.compute_Sb(cls, M_i, M_0) + Sw = self.compute_Sw(cls, M_i) + data_idx = self.compute_indices(x) + lst_idx_all = self.compute_listIndices(data_idx) + Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all) + W_i = self.compute_wj(data_idx, M_i) + Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i) + + # Calculating inverse of Sb and its transpose and minus + # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) + #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) + #print 'SB_inv: ',Sb_inv_N + #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] + Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0] Sb_inv_N_trans = np.transpose(Sb_inv_N) Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans Sw_trans = np.transpose(Sw) @@ -706,7 +1194,9 @@ class DGPLVM(Prior): return np.random.rand(n) # A WRONG implementation def __str__(self): - return 'DGPLVM_prior' + return 'DGPLVM_prior_Raq_TTT' + + class HalfT(Prior): """ diff --git a/GPy/core/parameterization/transformations.py b/GPy/core/parameterization/transformations.py index 235b9d1c..830809d6 100644 --- a/GPy/core/parameterization/transformations.py +++ b/GPy/core/parameterization/transformations.py @@ -3,7 +3,7 @@ import numpy as np -from domains import _POSITIVE,_NEGATIVE, _BOUNDED +from .domains import _POSITIVE,_NEGATIVE, _BOUNDED import weakref import sys @@ -31,6 +31,16 @@ class Transformation(object): raise NotImplementedError def finv(self, model_param): raise NotImplementedError + def log_jacobian(self, model_param): + """ + compute the log of the jacobian of f, evaluated at f(x)= model_param + """ + raise NotImplementedError + def log_jacobian_grad(self, model_param): + """ + compute the drivative of the log of the jacobian of f, evaluated at f(x)= model_param + """ + raise NotImplementedError def gradfactor(self, model_param, dL_dmodel_param): """ df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param, @@ -42,6 +52,8 @@ class Transformation(object): \frac{\frac{\partial L}{\partial f}\left(\left.\partial f(x)}{\partial x}\right|_{x=f^{-1}(f)\right)} """ raise NotImplementedError + def gradfactor_non_natural(self, model_param, dL_dmodel_param): + return self.gradfactor(model_param, dL_dmodel_param) def initialize(self, f): """ produce a sensible initial value for f(x)""" raise NotImplementedError @@ -50,7 +62,7 @@ class Transformation(object): import matplotlib.pyplot as plt from ...plotting.matplot_dep import base_plots x = np.linspace(-8,8) - base_plots.meanplot(x, self.f(x),axes=axes*args,**kw) + base_plots.meanplot(x, self.f(x), *args, ax=axes, **kw) axes = plt.gca() axes.set_xlabel(xlabel) axes.set_ylabel(ylabel) @@ -70,15 +82,41 @@ class Logexp(Transformation): return np.einsum('i,i->i', df, np.where(f>_lim_val, 1., 1. - np.exp(-f))) def initialize(self, f): if np.any(f < 0.): - print "Warning: changing parameters to satisfy constraints" + print("Warning: changing parameters to satisfy constraints") return np.abs(f) + def log_jacobian(self, model_param): + return np.where(model_param>_lim_val, model_param, np.log(np.exp(model_param+1e-20) - 1.)) - model_param + def log_jacobian_grad(self, model_param): + return 1./(np.exp(model_param)-1.) + def __str__(self): + return '+ve' + +class Exponent(Transformation): + domain = _POSITIVE + def f(self, x): + return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val)) + def finv(self, x): + return np.log(x) + def gradfactor(self, f, df): + return np.einsum('i,i->i', df, f) + def initialize(self, f): + if np.any(f < 0.): + print("Warning: changing parameters to satisfy constraints") + return np.abs(f) + def log_jacobian(self, model_param): + return np.log(model_param) + def log_jacobian_grad(self, model_param): + return 1./model_param def __str__(self): return '+ve' + class NormalTheta(Transformation): + "Do not use, not officially supported!" _instances = [] - def __new__(cls, mu_indices, var_indices): + def __new__(cls, mu_indices=None, var_indices=None): + "Do not use, not officially supported!" if cls._instances: cls._instances[:] = [instance for instance in cls._instances if instance()] for instance in cls._instances: @@ -98,6 +136,7 @@ class NormalTheta(Transformation): # that the values are ok # Before: theta[self.var_indices] = np.abs(-.5/theta[self.var_indices]) + #theta[self.var_indices] = np.exp(-.5/theta[self.var_indices]) theta[self.mu_indices] *= theta[self.var_indices] return theta # which is now {mu, var} @@ -106,6 +145,7 @@ class NormalTheta(Transformation): varp = muvar[self.var_indices] muvar[self.mu_indices] /= varp muvar[self.var_indices] = -.5/varp + #muvar[self.var_indices] = -.5/np.log(varp) return muvar # which is now {theta1, theta2} @@ -124,7 +164,7 @@ class NormalTheta(Transformation): def initialize(self, f): if np.any(f[self.var_indices] < 0.): - print "Warning: changing parameters to satisfy constraints" + print("Warning: changing parameters to satisfy constraints") f[self.var_indices] = np.abs(f[self.var_indices]) return f @@ -139,9 +179,10 @@ class NormalTheta(Transformation): self.var_indices = state[1] class NormalNaturalAntti(NormalTheta): + "Do not use, not officially supported!" _instances = [] - _logexp = Logexp() - def __new__(cls, mu_indices, var_indices): + def __new__(cls, mu_indices=None, var_indices=None): + "Do not use, not officially supported!" if cls._instances: cls._instances[:] = [instance for instance in cls._instances if instance()] for instance in cls._instances: @@ -170,7 +211,7 @@ class NormalNaturalAntti(NormalTheta): def initialize(self, f): if np.any(f[self.var_indices] < 0.): - print "Warning: changing parameters to satisfy constraints" + print("Warning: changing parameters to satisfy constraints") f[self.var_indices] = np.abs(f[self.var_indices]) return f @@ -178,8 +219,10 @@ class NormalNaturalAntti(NormalTheta): return "natantti" class NormalEta(Transformation): + "Do not use, not officially supported!" _instances = [] - def __new__(cls, mu_indices, var_indices): + def __new__(cls, mu_indices=None, var_indices=None): + "Do not use, not officially supported!" if cls._instances: cls._instances[:] = [instance for instance in cls._instances if instance()] for instance in cls._instances: @@ -211,7 +254,7 @@ class NormalEta(Transformation): def initialize(self, f): if np.any(f[self.var_indices] < 0.): - print "Warning: changing parameters to satisfy constraints" + print("Warning: changing parameters to satisfy constraints") f[self.var_indices] = np.abs(f[self.var_indices]) return f @@ -219,8 +262,10 @@ class NormalEta(Transformation): return "eta" class NormalNaturalThroughTheta(NormalTheta): + "Do not use, not officially supported!" _instances = [] - def __new__(cls, mu_indices, var_indices): + def __new__(cls, mu_indices=None, var_indices=None): + "Do not use, not officially supported!" if cls._instances: cls._instances[:] = [instance for instance in cls._instances if instance()] for instance in cls._instances: @@ -250,13 +295,28 @@ class NormalNaturalThroughTheta(NormalTheta): #======================================================================= return dmuvar # which is now the gradient multiplicator + def gradfactor_non_natural(self, muvar, dmuvar): + mu = muvar[self.mu_indices] + var = muvar[self.var_indices] + #======================================================================= + # theta gradients + # This works and the gradient checks! + dmuvar[self.mu_indices] *= var + dmuvar[self.var_indices] *= 2*(var)**2 + dmuvar[self.var_indices] += 2*dmuvar[self.mu_indices]*mu + #======================================================================= + + return dmuvar # which is now the gradient multiplicator for {theta1, theta2} + def __str__(self): return "natgrad" class NormalNaturalWhooot(NormalTheta): + "Do not use, not officially supported!" _instances = [] - def __new__(cls, mu_indices, var_indices): + def __new__(cls, mu_indices=None, var_indices=None): + "Do not use, not officially supported!" if cls._instances: cls._instances[:] = [instance for instance in cls._instances if instance()] for instance in cls._instances: @@ -290,8 +350,10 @@ class NormalNaturalWhooot(NormalTheta): return "natgrad" class NormalNaturalThroughEta(NormalEta): + "Do not use, not officially supported!" _instances = [] - def __new__(cls, mu_indices, var_indices): + def __new__(cls, mu_indices=None, var_indices=None): + "Do not use, not officially supported!" if cls._instances: cls._instances[:] = [instance for instance in cls._instances if instance()] for instance in cls._instances: @@ -332,7 +394,7 @@ class LogexpNeg(Transformation): return np.einsum('i,i->i', df, np.where(f>_lim_val, -1, -1 + np.exp(-f))) def initialize(self, f): if np.any(f < 0.): - print "Warning: changing parameters to satisfy constraints" + print("Warning: changing parameters to satisfy constraints") return np.abs(f) def __str__(self): return '+ve' @@ -384,27 +446,11 @@ class LogexpClipped(Logexp): return np.einsum('i,i->i', df, gf) # np.where(f < self.lower, 0, gf) def initialize(self, f): if np.any(f < 0.): - print "Warning: changing parameters to satisfy constraints" + print("Warning: changing parameters to satisfy constraints") return np.abs(f) def __str__(self): return '+ve_c' -class Exponent(Transformation): - # TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative Exponent below. See old MATLAB code. - domain = _POSITIVE - def f(self, x): - return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val)) - def finv(self, x): - return np.log(x) - def gradfactor(self, f, df): - return np.einsum('i,i->i', df, f) - def initialize(self, f): - if np.any(f < 0.): - print "Warning: changing parameters to satisfy constraints" - return np.abs(f) - def __str__(self): - return '+ve' - class NegativeExponent(Exponent): domain = _NEGATIVE def f(self, x): @@ -440,7 +486,11 @@ class Logistic(Transformation): for instance in cls._instances: if instance().lower == lower and instance().upper == upper: return instance() - o = super(Transformation, cls).__new__(cls, lower, upper, *args, **kwargs) + newfunc = super(Transformation, cls).__new__ + if newfunc is object.__new__: + o = newfunc(cls) + else: + o = newfunc(cls, lower, upper, *args, **kwargs) cls._instances.append(weakref.ref(o)) return cls._instances[-1]() def __init__(self, lower, upper): @@ -458,7 +508,7 @@ class Logistic(Transformation): return np.einsum('i,i->i', df, (f - self.lower) * (self.upper - f) / self.difference) def initialize(self, f): if np.any(np.logical_or(f < self.lower, f > self.upper)): - print "Warning: changing parameters to satisfy constraints" + print("Warning: changing parameters to satisfy constraints") #return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f) #FIXME: Max, zeros_like right? return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(np.zeros_like(f)), f) diff --git a/GPy/inference/mcmc/__init__.py b/GPy/inference/mcmc/__init__.py index 956448d4..b30b6ff0 100644 --- a/GPy/inference/mcmc/__init__.py +++ b/GPy/inference/mcmc/__init__.py @@ -1 +1,2 @@ from hmc import HMC +from samplers import * diff --git a/GPy/inference/mcmc/samplers.py b/GPy/inference/mcmc/samplers.py index 444d99d7..c2367359 100644 --- a/GPy/inference/mcmc/samplers.py +++ b/GPy/inference/mcmc/samplers.py @@ -4,23 +4,18 @@ import numpy as np from scipy import linalg, optimize -import Tango import sys -import re -import numdifftools as ndt -import pdb -import cPickle class Metropolis_Hastings: def __init__(self,model,cov=None): """Metropolis Hastings, with tunings according to Gelman et al. """ self.model = model - current = self.model._get_params_transformed() + current = self.model.optimizer_array self.D = current.size self.chains = [] if cov is None: - self.cov = model.Laplace_covariance() + self.cov = np.eye(self.D) else: self.cov = cov self.scale = 2.4/np.sqrt(self.D) @@ -31,20 +26,20 @@ class Metropolis_Hastings: if start is None: self.model.randomize() else: - self.model._set_params_transformed(start) + self.model.optimizer_array = start - - - def sample(self, Ntotal, Nburn, Nthin, tune=True, tune_throughout=False, tune_interval=400): - current = self.model._get_params_transformed() - fcurrent = self.model.log_likelihood() + self.model.log_prior() + def sample(self, Ntotal=10000, Nburn=1000, Nthin=10, tune=True, tune_throughout=False, tune_interval=400): + current = self.model.optimizer_array + fcurrent = self.model.log_likelihood() + self.model.log_prior() + \ + self.model._log_det_jacobian() accepted = np.zeros(Ntotal,dtype=np.bool) for it in range(Ntotal): print "sample %d of %d\r"%(it,Ntotal), sys.stdout.flush() prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale) - self.model._set_params_transformed(prop) - fprop = self.model.log_likelihood() + self.model.log_prior() + self.model.optimizer_array = prop + fprop = self.model.log_likelihood() + self.model.log_prior() + \ + self.model._log_det_jacobian() if fprop>fcurrent:#sample accepted, going 'uphill' accepted[it] = True @@ -72,10 +67,11 @@ class Metropolis_Hastings: def predict(self,function,args): """Make a prediction for the function, to which we will pass the additional arguments""" - param = self.model._get_params() + param = self.model.param_array fs = [] for p in self.chain: - self.model._set_params(p) + self.model.param_array = p fs.append(function(*args)) - self.model._set_params(param)# reset model to starting state + # reset model to starting state + self.model.param_array = param return fs diff --git a/GPy/testing/rv_transformation_tests.py b/GPy/testing/rv_transformation_tests.py new file mode 100644 index 00000000..e6b3e3e7 --- /dev/null +++ b/GPy/testing/rv_transformation_tests.py @@ -0,0 +1,101 @@ +# Written by Ilias Bilionis +""" +Test if hyperparameters in models are properly transformed. +""" + + +import unittest +import numpy as np +import scipy.stats as st +import GPy + + +class TestModel(GPy.core.Model): + """ + A simple GPy model with one parameter. + """ + def __init__(self): + GPy.core.Model.__init__(self, 'test_model') + theta = GPy.core.Param('theta', 1.) + self.link_parameter(theta) + + def log_likelihood(self): + return 0. + + +class RVTransformationTestCase(unittest.TestCase): + + def _test_trans(self, trans): + m = TestModel() + prior = GPy.priors.LogGaussian(.5, 0.1) + m.theta.set_prior(prior) + m.theta.unconstrain() + m.theta.constrain(trans) + # The PDF of the transformed variables + p_phi = lambda(phi): np.exp(-m._objective_grads(phi)[0]) + # To the empirical PDF of: + theta_s = prior.rvs(100000) + phi_s = trans.finv(theta_s) + # which is essentially a kernel density estimation + kde = st.gaussian_kde(phi_s) + # We will compare the PDF here: + phi = np.linspace(phi_s.min(), phi_s.max(), 100) + # The transformed PDF of phi should be this: + pdf_phi = np.array([p_phi(p) for p in phi]) + # UNCOMMENT TO SEE GRAPHICAL COMPARISON + #import matplotlib.pyplot as plt + #fig, ax = plt.subplots() + #ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Histogram') + #ax.plot(phi, kde(phi), '--', linewidth=2, label='Kernel Density Estimation') + #ax.plot(phi, pdf_phi, ':', linewidth=2, label='Transformed PDF') + #ax.set_xlabel(r'transformed $\theta$', fontsize=16) + #ax.set_ylabel('PDF', fontsize=16) + #plt.legend(loc='best') + #plt.show(block=True) + # END OF PLOT + # The following test cannot be very accurate + self.assertTrue(np.linalg.norm(pdf_phi - kde(phi)) / np.linalg.norm(kde(phi)) <= 1e-1) + # Check the gradients at a few random points + for i in xrange(10): + m.theta = theta_s[i] + self.assertTrue(m.checkgrad(verbose=True)) + + def test_Logexp(self): + self._test_trans(GPy.constraints.Logexp()) + self._test_trans(GPy.constraints.Exponent()) + + +if __name__ == '__main__': + unittest.main() + quit() + m = TestModel() + prior = GPy.priors.LogGaussian(0., .9) + m.theta.set_prior(prior) + + # The following should return the PDF in terms of the transformed quantities + p_phi = lambda(phi): np.exp(-m._objective_grads(phi)[0]) + + # Let's look at the transformation phi = log(exp(theta - 1)) + trans = GPy.constraints.Exponent() + m.theta.constrain(trans) + # Plot the transformed probability density + phi = np.linspace(-8, 8, 100) + fig, ax = plt.subplots() + # Let's draw some samples of theta and transform them so that we see + # which one is right + theta_s = prior.rvs(10000) + # Transform it to the new variables + phi_s = trans.finv(theta_s) + # And draw their histogram + ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Empirical') + # This is to be compared to the PDF of the model expressed in terms of these new + # variables + ax.plot(phi, [p_phi(p) for p in phi], label='Transformed PDF', linewidth=2) + ax.set_xlim(-3, 10) + ax.set_xlabel(r'transformed $\theta$', fontsize=16) + ax.set_ylabel('PDF', fontsize=16) + plt.legend(loc='best') + # Now let's test the gradients + m.checkgrad(verbose=True) + # And show the plot + plt.show(block=True)