Merge pull request #211 from PredictiveScienceLab/master

Fixes the PDF transformation bug by handpicking James Hensman's code from the devel branch
This commit is contained in:
Zhenwen Dai 2015-08-11 09:42:14 +01:00
commit 61b671e58e
6 changed files with 792 additions and 103 deletions

View file

@ -13,11 +13,12 @@ Observable Pattern for patameterization
"""
from transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
from .transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
import numpy as np
import re
import logging
from updateable import Updateable
from .updateable import Updateable
from functools import reduce
class HierarchyError(Exception):
"""
@ -36,7 +37,7 @@ def adjust_name_for_printing(name):
name = name.replace("/", "_l_").replace("@", '_at_')
name = name.replace("(", "_of_").replace(")", "")
if re.match(r'^[a-zA-Z_][a-zA-Z0-9-_]*$', name) is None:
raise NameError, "name {} converted to {} cannot be further converted to valid python variable name!".format(name2, name)
raise NameError("name {} converted to {} cannot be further converted to valid python variable name!".format(name2, name))
return name
return ''
@ -65,13 +66,13 @@ class Parentable(object):
Gets called, when the parent changed, so we can adjust our
inner attributes according to the new parent.
"""
raise NotImplementedError, "shouldnt happen, Parentable objects need to be able to change their parent"
raise NotImplementedError("shouldnt happen, Parentable objects need to be able to change their parent")
def _disconnect_parent(self, *args, **kw):
"""
Disconnect this object from its parent
"""
raise NotImplementedError, "Abstract superclass"
raise NotImplementedError("Abstract superclass")
@property
def _highest_parent_(self):
@ -109,7 +110,10 @@ class Pickleable(object):
it properly.
:param protocol: pickling protocol to use, python-pickle for details.
"""
try: #Py2
import cPickle as pickle
except ImportError: #Py3
import pickle
if isinstance(f, str):
with open(f, 'wb') as f:
pickle.dump(self, f, protocol)
@ -138,9 +142,9 @@ class Pickleable(object):
which = self
which.traverse_parents(parents.append) # collect parents
for p in parents:
if not memo.has_key(id(p)):memo[id(p)] = None # set all parents to be None, so they will not be copied
if not memo.has_key(id(self.gradient)):memo[id(self.gradient)] = None # reset the gradient
if not memo.has_key(id(self._fixes_)):memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
if not id(p) in memo :memo[id(p)] = None # set all parents to be None, so they will not be copied
if not id(self.gradient) in memo:memo[id(self.gradient)] = None # reset the gradient
if not id(self._fixes_) in memo :memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
copy = copy.deepcopy(self, memo) # and start the copy
copy._parent_index_ = None
copy._trigger_params_changed()
@ -163,14 +167,16 @@ class Pickleable(object):
'_Cacher_wrap__cachers', # never pickle cachers
]
dc = dict()
for k,v in self.__dict__.iteritems():
#py3 fix
#for k,v in self.__dict__.iteritems():
for k,v in self.__dict__.items():
if k not in ignore_list:
dc[k] = v
return dc
def __setstate__(self, state):
self.__dict__.update(state)
from lists_and_dicts import ObserverList
from .lists_and_dicts import ObserverList
self.observers = ObserverList()
self._setup_observers()
self._optimizer_copy_transformed = False
@ -214,7 +220,7 @@ class Gradcheckable(Pickleable, Parentable):
Perform the checkgrad on the model.
TODO: this can be done more efficiently, when doing it inside here
"""
raise HierarchyError, "This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!"
raise HierarchyError("This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!")
class Nameable(Gradcheckable):
"""
@ -268,7 +274,7 @@ class Indexable(Nameable, Updateable):
def __init__(self, name, default_constraint=None, *a, **kw):
super(Indexable, self).__init__(name=name, *a, **kw)
self._default_constraint_ = default_constraint
from index_operations import ParameterIndexOperations
from .index_operations import ParameterIndexOperations
self.constraints = ParameterIndexOperations()
self.priors = ParameterIndexOperations()
if self._default_constraint_ is not None:
@ -310,7 +316,7 @@ class Indexable(Nameable, Updateable):
that is an int array, containing the indexes for the flattened
param inside this parameterized logic.
"""
from param import ParamConcatenation
from .param import ParamConcatenation
if isinstance(param, ParamConcatenation):
return np.hstack((self._raveled_index_for(p) for p in param.params))
return param._raveled_index() + self._offset_for(param)
@ -407,7 +413,7 @@ class Indexable(Nameable, Updateable):
repriorized = self.unset_priors()
self._add_to_index_operations(self.priors, repriorized, prior, warning)
from domains import _REAL, _POSITIVE, _NEGATIVE
from .domains import _REAL, _POSITIVE, _NEGATIVE
if prior.domain is _POSITIVE:
self.constrain_positive(warning)
elif prior.domain is _NEGATIVE:
@ -424,19 +430,38 @@ class Indexable(Nameable, Updateable):
def log_prior(self):
"""evaluate the prior"""
if self.priors.size > 0:
x = self.param_array
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
if self.priors.size == 0:
return 0.
x = self.param_array
#evaluate the prior log densities
log_p = reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
#account for the transformation by evaluating the log Jacobian (where things are transformed)
log_j = 0.
priored_indexes = np.hstack([i for p, i in self.priors.items()])
for c,j in self.constraints.items():
if not isinstance(c, Transformation):continue
for jj in j:
if jj in priored_indexes:
log_j += c.log_jacobian(x[jj])
return log_p + log_j
def _log_prior_gradients(self):
"""evaluate the gradients of the priors"""
if self.priors.size > 0:
if self.priors.size == 0:
return 0.
x = self.param_array
ret = np.zeros(x.size)
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
#compute derivate of prior density
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
#add in jacobian derivatives if transformed
priored_indexes = np.hstack([i for p, i in self.priors.items()])
for c,j in self.constraints.items():
if not isinstance(c, Transformation):continue
for jj in j:
if jj in priored_indexes:
ret[jj] += c.log_jacobian_grad(x[jj])
return ret
return 0.
#===========================================================================
# Tie parameters together
@ -471,7 +496,7 @@ class Indexable(Nameable, Updateable):
self.param_array[...] = transform.initialize(self.param_array)
reconstrained = self.unconstrain()
added = self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
self.notify_observers(self, None if trigger_parent else -np.inf)
self.trigger_update(trigger_parent)
return added
def unconstrain(self, *transforms):
@ -536,7 +561,7 @@ class Indexable(Nameable, Updateable):
update the constraints and priors view, so that
constraining is automized for the parent.
"""
from index_operations import ParameterIndexOperationsView
from .index_operations import ParameterIndexOperationsView
#if getattr(self, "_in_init_"):
#import ipdb;ipdb.set_trace()
#self.constraints.update(param.constraints, start)
@ -558,7 +583,7 @@ class Indexable(Nameable, Updateable):
"""
if warning and reconstrained.size > 0:
# TODO: figure out which parameters have changed and only print those
print "WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name)
print("WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name))
index = self._raveled_index()
which.add(what, index)
return index
@ -571,7 +596,7 @@ class Indexable(Nameable, Updateable):
if len(transforms) == 0:
transforms = which.properties()
removed = np.empty((0,), dtype=int)
for t in transforms:
for t in list(transforms):
unconstrained = which.remove(t, self._raveled_index())
removed = np.union1d(removed, unconstrained)
if t is __fixed__:
@ -612,7 +637,9 @@ class OptimizationHandlable(Indexable):
if not self._optimizer_copy_transformed:
self._optimizer_copy_.flat = self.param_array.flat
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
#py3 fix
#[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.items() if c != __fixed__]
if self.has_parent() and (self.constraints[__fixed__].size != 0 or self._has_ties()):
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
@ -641,21 +668,25 @@ class OptimizationHandlable(Indexable):
if f is None:
self.param_array.flat = p
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
for c, ind in self.constraints.iteritems() if c != __fixed__]
#py3 fix
#for c, ind in self.constraints.iteritems() if c != __fixed__]
for c, ind in self.constraints.items() if c != __fixed__]
else:
self.param_array.flat[f] = p
[np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
for c, ind in self.constraints.iteritems() if c != __fixed__]
#py3 fix
#for c, ind in self.constraints.iteritems() if c != __fixed__]
for c, ind in self.constraints.items() if c != __fixed__]
#self._highest_parent_.tie.propagate_val()
self._optimizer_copy_transformed = False
self.trigger_update()
def _get_params_transformed(self):
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
raise DeprecationWarning("_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!")
#
def _set_params_transformed(self, p):
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
raise DeprecationWarning("_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!")
def _trigger_params_changed(self, trigger_parent=True):
"""
@ -680,17 +711,32 @@ class OptimizationHandlable(Indexable):
constraint to it.
"""
self._highest_parent_.tie.collate_gradient()
[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
#py3 fix
#[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.items() if c != __fixed__]
if self._has_fixes(): return g[self._fixes_]
return g
def _transform_gradients_non_natural(self, g):
"""
Transform the gradients by multiplying the gradient factor for each
constraint to it.
"""
self._highest_parent_.tie.collate_gradient()
#py3 fix
#[np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
[np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.items() if c != __fixed__]
if self._has_fixes(): return g[self._fixes_]
return g
@property
def num_params(self):
"""
Return the number of parameters of this parameter_handle.
Param objects will always return 0.
"""
raise NotImplemented, "Abstract, please implement in respective classes"
raise NotImplemented("Abstract, please implement in respective classes")
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
"""
@ -739,7 +785,9 @@ class OptimizationHandlable(Indexable):
self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...)
# now draw from prior where possible
x = self.param_array.copy()
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
#Py3 fix
#[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.items() if not p is None]
unfixlist = np.ones((self.size,),dtype=np.bool)
unfixlist[self.constraints[__fixed__]] = False
self.param_array.flat[unfixlist] = x.view(np.ndarray).ravel()[unfixlist]
@ -798,7 +846,7 @@ class Parameterizable(OptimizationHandlable):
A parameterisable class.
This class provides the parameters list (ArrayList) and standard parameter handling,
such as {add|remove}_parameter(), traverse hierarchy and param_array, gradient_array
such as {link|unlink}_parameter(), traverse hierarchy and param_array, gradient_array
and the empty parameters_changed().
This class is abstract and should not be instantiated.
@ -936,7 +984,7 @@ class Parameterizable(OptimizationHandlable):
self._add_parameter_name(param, ignore_added_names)
# and makes sure to not delete programmatically added parameters
for other in self.parameters[::-1]:
if other is not param and other.name.startswith(param.name):
if other is not param and other.name == param.name:
warn_and_retry(param, _name_digit.match(other.name))
return
if pname not in dir(self):
@ -1031,6 +1079,9 @@ class Parameterizable(OptimizationHandlable):
p = param_to_array(p)
d = f.create_dataset(n,p.shape,dtype=p.dtype)
d[:] = p
if hasattr(self, 'param_array'):
d = f.create_dataset('param_array',self.param_array.shape, dtype=self.param_array.dtype)
d[:] = self.param_array
f.close()
except:
raise 'Fails to write the parameters into a HDF5 file!'

View file

@ -5,7 +5,7 @@
import numpy as np
from scipy.special import gammaln, digamma
from ...util.linalg import pdinv
from domains import _REAL, _POSITIVE
from .domains import _REAL, _POSITIVE
import warnings
import weakref
@ -15,7 +15,11 @@ class Prior(object):
_instance = None
def __new__(cls, *args, **kwargs):
if not cls._instance or cls._instance.__class__ is not cls:
cls._instance = super(Prior, cls).__new__(cls, *args, **kwargs)
newfunc = super(Prior, cls).__new__
if newfunc is object.__new__:
cls._instance = newfunc(cls)
else:
cls._instance = newfunc(cls, *args, **kwargs)
return cls._instance
def pdf(self, x):
@ -52,7 +56,11 @@ class Gaussian(Prior):
for instance in cls._instances:
if instance().mu == mu and instance().sigma == sigma:
return instance()
o = super(Prior, cls).__new__(cls, mu, sigma)
newfunc = super(Prior, cls).__new__
if newfunc is object.__new__:
o = newfunc(cls)
else:
o = newfunc(cls, mu, sigma)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
@ -140,7 +148,11 @@ class LogGaussian(Gaussian):
for instance in cls._instances:
if instance().mu == mu and instance().sigma == sigma:
return instance()
o = super(Prior, cls).__new__(cls, mu, sigma)
newfunc = super(Prior, cls).__new__
if newfunc is object.__new__:
o = newfunc(cls)
else:
o = newfunc(cls, mu, sigma)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
@ -258,7 +270,11 @@ class Gamma(Prior):
for instance in cls._instances:
if instance().a == a and instance().b == b:
return instance()
o = super(Prior, cls).__new__(cls, a, b)
newfunc = super(Prior, cls).__new__
if newfunc is object.__new__:
o = newfunc(cls)
else:
o = newfunc(cls, a, b)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
@ -398,7 +414,7 @@ class DGPLVM_KFDA(Prior):
def compute_cls(self, x):
cls = {}
# Appending each data point to its proper class
for j in xrange(self.datanum):
for j in range(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in cls:
cls[class_label] = []
@ -504,6 +520,219 @@ class DGPLVM(Prior):
.. Note:: DGPLVM for Classification paper implementation
"""
domain = _REAL
def __new__(cls, sigma2, lbl, x_shape):
return super(Prior, cls).__new__(cls, sigma2, lbl, x_shape)
def __init__(self, sigma2, lbl, x_shape):
self.sigma2 = sigma2
# self.x = x
self.lbl = lbl
self.classnum = lbl.shape[1]
self.datanum = lbl.shape[0]
self.x_shape = x_shape
self.dim = x_shape[1]
def get_class_label(self, y):
for idx, v in enumerate(y):
if v == 1:
return idx
return -1
# This function assigns each data point to its own class
# and returns the dictionary which contains the class name and parameters.
def compute_cls(self, x):
cls = {}
# Appending each data point to its proper class
for j in range(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in cls:
cls[class_label] = []
cls[class_label].append(x[j])
return cls
# This function computes mean of each class. The mean is calculated through each dimension
def compute_Mi(self, cls):
M_i = np.zeros((self.classnum, self.dim))
for i in cls:
# Mean of each class
class_i = cls[i]
M_i[i] = np.mean(class_i, axis=0)
return M_i
# Adding data points as tuple to the dictionary so that we can access indices
def compute_indices(self, x):
data_idx = {}
for j in range(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in data_idx:
data_idx[class_label] = []
t = (j, x[j])
data_idx[class_label].append(t)
return data_idx
# Adding indices to the list so we can access whole the indices
def compute_listIndices(self, data_idx):
lst_idx = []
lst_idx_all = []
for i in data_idx:
if len(lst_idx) == 0:
pass
#Do nothing, because it is the first time list is created so is empty
else:
lst_idx = []
# Here we put indices of each class in to the list called lst_idx_all
for m in range(len(data_idx[i])):
lst_idx.append(data_idx[i][m][0])
lst_idx_all.append(lst_idx)
return lst_idx_all
# This function calculates between classes variances
def compute_Sb(self, cls, M_i, M_0):
Sb = np.zeros((self.dim, self.dim))
for i in cls:
B = (M_i[i] - M_0).reshape(self.dim, 1)
B_trans = B.transpose()
Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
return Sb
# This function calculates within classes variances
def compute_Sw(self, cls, M_i):
Sw = np.zeros((self.dim, self.dim))
for i in cls:
N_i = float(len(cls[i]))
W_WT = np.zeros((self.dim, self.dim))
for xk in cls[i]:
W = (xk - M_i[i])
W_WT += np.outer(W, W)
Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
return Sw
# Calculating beta and Bi for Sb
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
# import pdb
# pdb.set_trace()
B_i = np.zeros((self.classnum, self.dim))
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
for i in data_idx:
# pdb.set_trace()
# Calculating Bi
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
for k in range(self.datanum):
for i in data_idx:
N_i = float(len(data_idx[i]))
if k in lst_idx_all[i]:
beta = (float(1) / N_i) - (float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
else:
beta = -(float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
return Sig_beta_B_i_all
# Calculating W_j s separately so we can access all the W_j s anytime
def compute_wj(self, data_idx, M_i):
W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
xj = tpl[1]
j = tpl[0]
W_i[j] = (xj - M_i[i])
return W_i
# Calculating alpha and Wj for Sw
def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
k = tpl[0]
for j in lst_idx_all[i]:
if k == j:
alpha = 1 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
else:
alpha = 0 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
return Sig_alpha_W_i
# This function calculates log of our prior
def lnpdf(self, x):
x = x.reshape(self.x_shape)
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
data_idx = self.compute_indices(x)
lst_idx_all = self.compute_listIndices(data_idx)
Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
W_i = self.compute_wj(data_idx, M_i)
Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
# Calculating inverse of Sb and its transpose and minus
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw)
# Calculating DJ/DXk
DJ_Dxk = 2 * (
Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
Sig_alpha_W_i))
# Calculating derivative of the log of the prior
DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
return DPx_Dx.T
# def frb(self, x):
# from functools import partial
# from GPy.models import GradientChecker
# f = partial(self.lnpdf)
# df = partial(self.lnpdf_grad)
# grad = GradientChecker(f, df, x, 'X')
# grad.checkgrad(verbose=1)
def rvs(self, n):
return np.random.rand(n) # A WRONG implementation
def __str__(self):
return 'DGPLVM_prior_Raq'
# ******************************************
from .. import Parameterized
from .. import Param
class DGPLVM_Lamda(Prior, Parameterized):
"""
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
:param sigma2: constant
.. Note:: DGPLVM for Classification paper implementation
"""
domain = _REAL
# _instances = []
@ -517,14 +746,18 @@ class DGPLVM(Prior):
# cls._instances.append(weakref.ref(o))
# return cls._instances[-1]()
def __init__(self, sigma2, lbl, x_shape):
def __init__(self, sigma2, lbl, x_shape, lamda, name='DP_prior'):
super(DGPLVM_Lamda, self).__init__(name=name)
self.sigma2 = sigma2
# self.x = x
self.lbl = lbl
self.lamda = lamda
self.classnum = lbl.shape[1]
self.datanum = lbl.shape[0]
self.x_shape = x_shape
self.dim = x_shape[1]
self.lamda = Param('lamda', np.diag(lamda))
self.link_parameter(self.lamda)
def get_class_label(self, y):
for idx, v in enumerate(y):
@ -549,7 +782,8 @@ class DGPLVM(Prior):
M_i = np.zeros((self.classnum, self.dim))
for i in cls:
# Mean of each class
M_i[i] = np.mean(cls[i], axis=0)
class_i = cls[i]
M_i[i] = np.mean(class_i, axis=0)
return M_i
# Adding data points as tuple to the dictionary so that we can access indices
@ -654,6 +888,13 @@ class DGPLVM(Prior):
# This function calculates log of our prior
def lnpdf(self, x):
x = x.reshape(self.x_shape)
#!!!!!!!!!!!!!!!!!!!!!!!!!!!
#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
@ -661,12 +902,16 @@ class DGPLVM(Prior):
Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
@ -680,8 +925,251 @@ class DGPLVM(Prior):
# Calculating inverse of Sb and its transpose and minus
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
# Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw)
# Calculating DJ/DXk
DJ_Dxk = 2 * (
Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
Sig_alpha_W_i))
# Calculating derivative of the log of the prior
DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
DPxprim_Dx = np.diagflat(self.lamda).dot(DPx_Dx)
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
DPxprim_Dx = DPxprim_Dx.T
DPxprim_Dlamda = DPx_Dx.dot(x)
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
DPxprim_Dlamda = DPxprim_Dlamda.T
self.lamda.gradient = np.diag(DPxprim_Dlamda)
# print DPxprim_Dx
return DPxprim_Dx
# def frb(self, x):
# from functools import partial
# from GPy.models import GradientChecker
# f = partial(self.lnpdf)
# df = partial(self.lnpdf_grad)
# grad = GradientChecker(f, df, x, 'X')
# grad.checkgrad(verbose=1)
def rvs(self, n):
return np.random.rand(n) # A WRONG implementation
def __str__(self):
return 'DGPLVM_prior_Raq_Lamda'
# ******************************************
class DGPLVM_T(Prior):
"""
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
:param sigma2: constant
.. Note:: DGPLVM for Classification paper implementation
"""
domain = _REAL
# _instances = []
# def __new__(cls, mu, sigma): # Singleton:
# if cls._instances:
# cls._instances[:] = [instance for instance in cls._instances if instance()]
# for instance in cls._instances:
# if instance().mu == mu and instance().sigma == sigma:
# return instance()
# o = super(Prior, cls).__new__(cls, mu, sigma)
# cls._instances.append(weakref.ref(o))
# return cls._instances[-1]()
def __init__(self, sigma2, lbl, x_shape, vec):
self.sigma2 = sigma2
# self.x = x
self.lbl = lbl
self.classnum = lbl.shape[1]
self.datanum = lbl.shape[0]
self.x_shape = x_shape
self.dim = x_shape[1]
self.vec = vec
def get_class_label(self, y):
for idx, v in enumerate(y):
if v == 1:
return idx
return -1
# This function assigns each data point to its own class
# and returns the dictionary which contains the class name and parameters.
def compute_cls(self, x):
cls = {}
# Appending each data point to its proper class
for j in range(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in cls:
cls[class_label] = []
cls[class_label].append(x[j])
return cls
# This function computes mean of each class. The mean is calculated through each dimension
def compute_Mi(self, cls):
M_i = np.zeros((self.classnum, self.dim))
for i in cls:
# Mean of each class
# class_i = np.multiply(cls[i],vec)
class_i = cls[i]
M_i[i] = np.mean(class_i, axis=0)
return M_i
# Adding data points as tuple to the dictionary so that we can access indices
def compute_indices(self, x):
data_idx = {}
for j in range(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in data_idx:
data_idx[class_label] = []
t = (j, x[j])
data_idx[class_label].append(t)
return data_idx
# Adding indices to the list so we can access whole the indices
def compute_listIndices(self, data_idx):
lst_idx = []
lst_idx_all = []
for i in data_idx:
if len(lst_idx) == 0:
pass
#Do nothing, because it is the first time list is created so is empty
else:
lst_idx = []
# Here we put indices of each class in to the list called lst_idx_all
for m in range(len(data_idx[i])):
lst_idx.append(data_idx[i][m][0])
lst_idx_all.append(lst_idx)
return lst_idx_all
# This function calculates between classes variances
def compute_Sb(self, cls, M_i, M_0):
Sb = np.zeros((self.dim, self.dim))
for i in cls:
B = (M_i[i] - M_0).reshape(self.dim, 1)
B_trans = B.transpose()
Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
return Sb
# This function calculates within classes variances
def compute_Sw(self, cls, M_i):
Sw = np.zeros((self.dim, self.dim))
for i in cls:
N_i = float(len(cls[i]))
W_WT = np.zeros((self.dim, self.dim))
for xk in cls[i]:
W = (xk - M_i[i])
W_WT += np.outer(W, W)
Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
return Sw
# Calculating beta and Bi for Sb
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
# import pdb
# pdb.set_trace()
B_i = np.zeros((self.classnum, self.dim))
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
for i in data_idx:
# pdb.set_trace()
# Calculating Bi
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
for k in range(self.datanum):
for i in data_idx:
N_i = float(len(data_idx[i]))
if k in lst_idx_all[i]:
beta = (float(1) / N_i) - (float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
else:
beta = -(float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
return Sig_beta_B_i_all
# Calculating W_j s separately so we can access all the W_j s anytime
def compute_wj(self, data_idx, M_i):
W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
xj = tpl[1]
j = tpl[0]
W_i[j] = (xj - M_i[i])
return W_i
# Calculating alpha and Wj for Sw
def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
k = tpl[0]
for j in lst_idx_all[i]:
if k == j:
alpha = 1 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
else:
alpha = 0 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
return Sig_alpha_W_i
# This function calculates log of our prior
def lnpdf(self, x):
x = x.reshape(self.x_shape)
xprim = x.dot(self.vec)
x = xprim
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#print 'SB_inv: ', Sb_inv_N
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
xprim = x.dot(self.vec)
x = xprim
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
data_idx = self.compute_indices(x)
lst_idx_all = self.compute_listIndices(data_idx)
Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
W_i = self.compute_wj(data_idx, M_i)
Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
# Calculating inverse of Sb and its transpose and minus
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#print 'SB_inv: ',Sb_inv_N
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw)
@ -706,7 +1194,9 @@ class DGPLVM(Prior):
return np.random.rand(n) # A WRONG implementation
def __str__(self):
return 'DGPLVM_prior'
return 'DGPLVM_prior_Raq_TTT'
class HalfT(Prior):
"""

View file

@ -3,7 +3,7 @@
import numpy as np
from domains import _POSITIVE,_NEGATIVE, _BOUNDED
from .domains import _POSITIVE,_NEGATIVE, _BOUNDED
import weakref
import sys
@ -31,6 +31,16 @@ class Transformation(object):
raise NotImplementedError
def finv(self, model_param):
raise NotImplementedError
def log_jacobian(self, model_param):
"""
compute the log of the jacobian of f, evaluated at f(x)= model_param
"""
raise NotImplementedError
def log_jacobian_grad(self, model_param):
"""
compute the drivative of the log of the jacobian of f, evaluated at f(x)= model_param
"""
raise NotImplementedError
def gradfactor(self, model_param, dL_dmodel_param):
""" df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param,
@ -42,6 +52,8 @@ class Transformation(object):
\frac{\frac{\partial L}{\partial f}\left(\left.\partial f(x)}{\partial x}\right|_{x=f^{-1}(f)\right)}
"""
raise NotImplementedError
def gradfactor_non_natural(self, model_param, dL_dmodel_param):
return self.gradfactor(model_param, dL_dmodel_param)
def initialize(self, f):
""" produce a sensible initial value for f(x)"""
raise NotImplementedError
@ -50,7 +62,7 @@ class Transformation(object):
import matplotlib.pyplot as plt
from ...plotting.matplot_dep import base_plots
x = np.linspace(-8,8)
base_plots.meanplot(x, self.f(x),axes=axes*args,**kw)
base_plots.meanplot(x, self.f(x), *args, ax=axes, **kw)
axes = plt.gca()
axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel)
@ -70,15 +82,41 @@ class Logexp(Transformation):
return np.einsum('i,i->i', df, np.where(f>_lim_val, 1., 1. - np.exp(-f)))
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
print("Warning: changing parameters to satisfy constraints")
return np.abs(f)
def log_jacobian(self, model_param):
return np.where(model_param>_lim_val, model_param, np.log(np.exp(model_param+1e-20) - 1.)) - model_param
def log_jacobian_grad(self, model_param):
return 1./(np.exp(model_param)-1.)
def __str__(self):
return '+ve'
class Exponent(Transformation):
domain = _POSITIVE
def f(self, x):
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
def finv(self, x):
return np.log(x)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, f)
def initialize(self, f):
if np.any(f < 0.):
print("Warning: changing parameters to satisfy constraints")
return np.abs(f)
def log_jacobian(self, model_param):
return np.log(model_param)
def log_jacobian_grad(self, model_param):
return 1./model_param
def __str__(self):
return '+ve'
class NormalTheta(Transformation):
"Do not use, not officially supported!"
_instances = []
def __new__(cls, mu_indices, var_indices):
def __new__(cls, mu_indices=None, var_indices=None):
"Do not use, not officially supported!"
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
@ -98,6 +136,7 @@ class NormalTheta(Transformation):
# that the values are ok
# Before:
theta[self.var_indices] = np.abs(-.5/theta[self.var_indices])
#theta[self.var_indices] = np.exp(-.5/theta[self.var_indices])
theta[self.mu_indices] *= theta[self.var_indices]
return theta # which is now {mu, var}
@ -106,6 +145,7 @@ class NormalTheta(Transformation):
varp = muvar[self.var_indices]
muvar[self.mu_indices] /= varp
muvar[self.var_indices] = -.5/varp
#muvar[self.var_indices] = -.5/np.log(varp)
return muvar # which is now {theta1, theta2}
@ -124,7 +164,7 @@ class NormalTheta(Transformation):
def initialize(self, f):
if np.any(f[self.var_indices] < 0.):
print "Warning: changing parameters to satisfy constraints"
print("Warning: changing parameters to satisfy constraints")
f[self.var_indices] = np.abs(f[self.var_indices])
return f
@ -139,9 +179,10 @@ class NormalTheta(Transformation):
self.var_indices = state[1]
class NormalNaturalAntti(NormalTheta):
"Do not use, not officially supported!"
_instances = []
_logexp = Logexp()
def __new__(cls, mu_indices, var_indices):
def __new__(cls, mu_indices=None, var_indices=None):
"Do not use, not officially supported!"
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
@ -170,7 +211,7 @@ class NormalNaturalAntti(NormalTheta):
def initialize(self, f):
if np.any(f[self.var_indices] < 0.):
print "Warning: changing parameters to satisfy constraints"
print("Warning: changing parameters to satisfy constraints")
f[self.var_indices] = np.abs(f[self.var_indices])
return f
@ -178,8 +219,10 @@ class NormalNaturalAntti(NormalTheta):
return "natantti"
class NormalEta(Transformation):
"Do not use, not officially supported!"
_instances = []
def __new__(cls, mu_indices, var_indices):
def __new__(cls, mu_indices=None, var_indices=None):
"Do not use, not officially supported!"
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
@ -211,7 +254,7 @@ class NormalEta(Transformation):
def initialize(self, f):
if np.any(f[self.var_indices] < 0.):
print "Warning: changing parameters to satisfy constraints"
print("Warning: changing parameters to satisfy constraints")
f[self.var_indices] = np.abs(f[self.var_indices])
return f
@ -219,8 +262,10 @@ class NormalEta(Transformation):
return "eta"
class NormalNaturalThroughTheta(NormalTheta):
"Do not use, not officially supported!"
_instances = []
def __new__(cls, mu_indices, var_indices):
def __new__(cls, mu_indices=None, var_indices=None):
"Do not use, not officially supported!"
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
@ -250,13 +295,28 @@ class NormalNaturalThroughTheta(NormalTheta):
#=======================================================================
return dmuvar # which is now the gradient multiplicator
def gradfactor_non_natural(self, muvar, dmuvar):
mu = muvar[self.mu_indices]
var = muvar[self.var_indices]
#=======================================================================
# theta gradients
# This works and the gradient checks!
dmuvar[self.mu_indices] *= var
dmuvar[self.var_indices] *= 2*(var)**2
dmuvar[self.var_indices] += 2*dmuvar[self.mu_indices]*mu
#=======================================================================
return dmuvar # which is now the gradient multiplicator for {theta1, theta2}
def __str__(self):
return "natgrad"
class NormalNaturalWhooot(NormalTheta):
"Do not use, not officially supported!"
_instances = []
def __new__(cls, mu_indices, var_indices):
def __new__(cls, mu_indices=None, var_indices=None):
"Do not use, not officially supported!"
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
@ -290,8 +350,10 @@ class NormalNaturalWhooot(NormalTheta):
return "natgrad"
class NormalNaturalThroughEta(NormalEta):
"Do not use, not officially supported!"
_instances = []
def __new__(cls, mu_indices, var_indices):
def __new__(cls, mu_indices=None, var_indices=None):
"Do not use, not officially supported!"
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
@ -332,7 +394,7 @@ class LogexpNeg(Transformation):
return np.einsum('i,i->i', df, np.where(f>_lim_val, -1, -1 + np.exp(-f)))
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
print("Warning: changing parameters to satisfy constraints")
return np.abs(f)
def __str__(self):
return '+ve'
@ -384,27 +446,11 @@ class LogexpClipped(Logexp):
return np.einsum('i,i->i', df, gf) # np.where(f < self.lower, 0, gf)
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
print("Warning: changing parameters to satisfy constraints")
return np.abs(f)
def __str__(self):
return '+ve_c'
class Exponent(Transformation):
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative Exponent below. See old MATLAB code.
domain = _POSITIVE
def f(self, x):
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
def finv(self, x):
return np.log(x)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, f)
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '+ve'
class NegativeExponent(Exponent):
domain = _NEGATIVE
def f(self, x):
@ -440,7 +486,11 @@ class Logistic(Transformation):
for instance in cls._instances:
if instance().lower == lower and instance().upper == upper:
return instance()
o = super(Transformation, cls).__new__(cls, lower, upper, *args, **kwargs)
newfunc = super(Transformation, cls).__new__
if newfunc is object.__new__:
o = newfunc(cls)
else:
o = newfunc(cls, lower, upper, *args, **kwargs)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, lower, upper):
@ -458,7 +508,7 @@ class Logistic(Transformation):
return np.einsum('i,i->i', df, (f - self.lower) * (self.upper - f) / self.difference)
def initialize(self, f):
if np.any(np.logical_or(f < self.lower, f > self.upper)):
print "Warning: changing parameters to satisfy constraints"
print("Warning: changing parameters to satisfy constraints")
#return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
#FIXME: Max, zeros_like right?
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(np.zeros_like(f)), f)

View file

@ -1 +1,2 @@
from hmc import HMC
from samplers import *

View file

@ -4,23 +4,18 @@
import numpy as np
from scipy import linalg, optimize
import Tango
import sys
import re
import numdifftools as ndt
import pdb
import cPickle
class Metropolis_Hastings:
def __init__(self,model,cov=None):
"""Metropolis Hastings, with tunings according to Gelman et al. """
self.model = model
current = self.model._get_params_transformed()
current = self.model.optimizer_array
self.D = current.size
self.chains = []
if cov is None:
self.cov = model.Laplace_covariance()
self.cov = np.eye(self.D)
else:
self.cov = cov
self.scale = 2.4/np.sqrt(self.D)
@ -31,20 +26,20 @@ class Metropolis_Hastings:
if start is None:
self.model.randomize()
else:
self.model._set_params_transformed(start)
self.model.optimizer_array = start
def sample(self, Ntotal, Nburn, Nthin, tune=True, tune_throughout=False, tune_interval=400):
current = self.model._get_params_transformed()
fcurrent = self.model.log_likelihood() + self.model.log_prior()
def sample(self, Ntotal=10000, Nburn=1000, Nthin=10, tune=True, tune_throughout=False, tune_interval=400):
current = self.model.optimizer_array
fcurrent = self.model.log_likelihood() + self.model.log_prior() + \
self.model._log_det_jacobian()
accepted = np.zeros(Ntotal,dtype=np.bool)
for it in range(Ntotal):
print "sample %d of %d\r"%(it,Ntotal),
sys.stdout.flush()
prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
self.model._set_params_transformed(prop)
fprop = self.model.log_likelihood() + self.model.log_prior()
self.model.optimizer_array = prop
fprop = self.model.log_likelihood() + self.model.log_prior() + \
self.model._log_det_jacobian()
if fprop>fcurrent:#sample accepted, going 'uphill'
accepted[it] = True
@ -72,10 +67,11 @@ class Metropolis_Hastings:
def predict(self,function,args):
"""Make a prediction for the function, to which we will pass the additional arguments"""
param = self.model._get_params()
param = self.model.param_array
fs = []
for p in self.chain:
self.model._set_params(p)
self.model.param_array = p
fs.append(function(*args))
self.model._set_params(param)# reset model to starting state
# reset model to starting state
self.model.param_array = param
return fs

View file

@ -0,0 +1,101 @@
# Written by Ilias Bilionis
"""
Test if hyperparameters in models are properly transformed.
"""
import unittest
import numpy as np
import scipy.stats as st
import GPy
class TestModel(GPy.core.Model):
"""
A simple GPy model with one parameter.
"""
def __init__(self):
GPy.core.Model.__init__(self, 'test_model')
theta = GPy.core.Param('theta', 1.)
self.link_parameter(theta)
def log_likelihood(self):
return 0.
class RVTransformationTestCase(unittest.TestCase):
def _test_trans(self, trans):
m = TestModel()
prior = GPy.priors.LogGaussian(.5, 0.1)
m.theta.set_prior(prior)
m.theta.unconstrain()
m.theta.constrain(trans)
# The PDF of the transformed variables
p_phi = lambda(phi): np.exp(-m._objective_grads(phi)[0])
# To the empirical PDF of:
theta_s = prior.rvs(100000)
phi_s = trans.finv(theta_s)
# which is essentially a kernel density estimation
kde = st.gaussian_kde(phi_s)
# We will compare the PDF here:
phi = np.linspace(phi_s.min(), phi_s.max(), 100)
# The transformed PDF of phi should be this:
pdf_phi = np.array([p_phi(p) for p in phi])
# UNCOMMENT TO SEE GRAPHICAL COMPARISON
#import matplotlib.pyplot as plt
#fig, ax = plt.subplots()
#ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Histogram')
#ax.plot(phi, kde(phi), '--', linewidth=2, label='Kernel Density Estimation')
#ax.plot(phi, pdf_phi, ':', linewidth=2, label='Transformed PDF')
#ax.set_xlabel(r'transformed $\theta$', fontsize=16)
#ax.set_ylabel('PDF', fontsize=16)
#plt.legend(loc='best')
#plt.show(block=True)
# END OF PLOT
# The following test cannot be very accurate
self.assertTrue(np.linalg.norm(pdf_phi - kde(phi)) / np.linalg.norm(kde(phi)) <= 1e-1)
# Check the gradients at a few random points
for i in xrange(10):
m.theta = theta_s[i]
self.assertTrue(m.checkgrad(verbose=True))
def test_Logexp(self):
self._test_trans(GPy.constraints.Logexp())
self._test_trans(GPy.constraints.Exponent())
if __name__ == '__main__':
unittest.main()
quit()
m = TestModel()
prior = GPy.priors.LogGaussian(0., .9)
m.theta.set_prior(prior)
# The following should return the PDF in terms of the transformed quantities
p_phi = lambda(phi): np.exp(-m._objective_grads(phi)[0])
# Let's look at the transformation phi = log(exp(theta - 1))
trans = GPy.constraints.Exponent()
m.theta.constrain(trans)
# Plot the transformed probability density
phi = np.linspace(-8, 8, 100)
fig, ax = plt.subplots()
# Let's draw some samples of theta and transform them so that we see
# which one is right
theta_s = prior.rvs(10000)
# Transform it to the new variables
phi_s = trans.finv(theta_s)
# And draw their histogram
ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Empirical')
# This is to be compared to the PDF of the model expressed in terms of these new
# variables
ax.plot(phi, [p_phi(p) for p in phi], label='Transformed PDF', linewidth=2)
ax.set_xlim(-3, 10)
ax.set_xlabel(r'transformed $\theta$', fontsize=16)
ax.set_ylabel('PDF', fontsize=16)
plt.legend(loc='best')
# Now let's test the gradients
m.checkgrad(verbose=True)
# And show the plot
plt.show(block=True)