mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-24 14:15:14 +02:00
Merge pull request #211 from PredictiveScienceLab/master
Fixes the PDF transformation bug by handpicking James Hensman's code from the devel branch
This commit is contained in:
commit
61b671e58e
6 changed files with 792 additions and 103 deletions
|
|
@ -13,11 +13,12 @@ Observable Pattern for patameterization
|
|||
|
||||
"""
|
||||
|
||||
from transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
||||
from .transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
||||
import numpy as np
|
||||
import re
|
||||
import logging
|
||||
from updateable import Updateable
|
||||
from .updateable import Updateable
|
||||
from functools import reduce
|
||||
|
||||
class HierarchyError(Exception):
|
||||
"""
|
||||
|
|
@ -36,7 +37,7 @@ def adjust_name_for_printing(name):
|
|||
name = name.replace("/", "_l_").replace("@", '_at_')
|
||||
name = name.replace("(", "_of_").replace(")", "")
|
||||
if re.match(r'^[a-zA-Z_][a-zA-Z0-9-_]*$', name) is None:
|
||||
raise NameError, "name {} converted to {} cannot be further converted to valid python variable name!".format(name2, name)
|
||||
raise NameError("name {} converted to {} cannot be further converted to valid python variable name!".format(name2, name))
|
||||
return name
|
||||
return ''
|
||||
|
||||
|
|
@ -65,13 +66,13 @@ class Parentable(object):
|
|||
Gets called, when the parent changed, so we can adjust our
|
||||
inner attributes according to the new parent.
|
||||
"""
|
||||
raise NotImplementedError, "shouldnt happen, Parentable objects need to be able to change their parent"
|
||||
raise NotImplementedError("shouldnt happen, Parentable objects need to be able to change their parent")
|
||||
|
||||
def _disconnect_parent(self, *args, **kw):
|
||||
"""
|
||||
Disconnect this object from its parent
|
||||
"""
|
||||
raise NotImplementedError, "Abstract superclass"
|
||||
raise NotImplementedError("Abstract superclass")
|
||||
|
||||
@property
|
||||
def _highest_parent_(self):
|
||||
|
|
@ -109,7 +110,10 @@ class Pickleable(object):
|
|||
it properly.
|
||||
:param protocol: pickling protocol to use, python-pickle for details.
|
||||
"""
|
||||
import cPickle as pickle
|
||||
try: #Py2
|
||||
import cPickle as pickle
|
||||
except ImportError: #Py3
|
||||
import pickle
|
||||
if isinstance(f, str):
|
||||
with open(f, 'wb') as f:
|
||||
pickle.dump(self, f, protocol)
|
||||
|
|
@ -138,9 +142,9 @@ class Pickleable(object):
|
|||
which = self
|
||||
which.traverse_parents(parents.append) # collect parents
|
||||
for p in parents:
|
||||
if not memo.has_key(id(p)):memo[id(p)] = None # set all parents to be None, so they will not be copied
|
||||
if not memo.has_key(id(self.gradient)):memo[id(self.gradient)] = None # reset the gradient
|
||||
if not memo.has_key(id(self._fixes_)):memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
|
||||
if not id(p) in memo :memo[id(p)] = None # set all parents to be None, so they will not be copied
|
||||
if not id(self.gradient) in memo:memo[id(self.gradient)] = None # reset the gradient
|
||||
if not id(self._fixes_) in memo :memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
|
||||
copy = copy.deepcopy(self, memo) # and start the copy
|
||||
copy._parent_index_ = None
|
||||
copy._trigger_params_changed()
|
||||
|
|
@ -163,14 +167,16 @@ class Pickleable(object):
|
|||
'_Cacher_wrap__cachers', # never pickle cachers
|
||||
]
|
||||
dc = dict()
|
||||
for k,v in self.__dict__.iteritems():
|
||||
#py3 fix
|
||||
#for k,v in self.__dict__.iteritems():
|
||||
for k,v in self.__dict__.items():
|
||||
if k not in ignore_list:
|
||||
dc[k] = v
|
||||
return dc
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.__dict__.update(state)
|
||||
from lists_and_dicts import ObserverList
|
||||
from .lists_and_dicts import ObserverList
|
||||
self.observers = ObserverList()
|
||||
self._setup_observers()
|
||||
self._optimizer_copy_transformed = False
|
||||
|
|
@ -214,7 +220,7 @@ class Gradcheckable(Pickleable, Parentable):
|
|||
Perform the checkgrad on the model.
|
||||
TODO: this can be done more efficiently, when doing it inside here
|
||||
"""
|
||||
raise HierarchyError, "This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!"
|
||||
raise HierarchyError("This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!")
|
||||
|
||||
class Nameable(Gradcheckable):
|
||||
"""
|
||||
|
|
@ -268,7 +274,7 @@ class Indexable(Nameable, Updateable):
|
|||
def __init__(self, name, default_constraint=None, *a, **kw):
|
||||
super(Indexable, self).__init__(name=name, *a, **kw)
|
||||
self._default_constraint_ = default_constraint
|
||||
from index_operations import ParameterIndexOperations
|
||||
from .index_operations import ParameterIndexOperations
|
||||
self.constraints = ParameterIndexOperations()
|
||||
self.priors = ParameterIndexOperations()
|
||||
if self._default_constraint_ is not None:
|
||||
|
|
@ -310,7 +316,7 @@ class Indexable(Nameable, Updateable):
|
|||
that is an int array, containing the indexes for the flattened
|
||||
param inside this parameterized logic.
|
||||
"""
|
||||
from param import ParamConcatenation
|
||||
from .param import ParamConcatenation
|
||||
if isinstance(param, ParamConcatenation):
|
||||
return np.hstack((self._raveled_index_for(p) for p in param.params))
|
||||
return param._raveled_index() + self._offset_for(param)
|
||||
|
|
@ -407,7 +413,7 @@ class Indexable(Nameable, Updateable):
|
|||
repriorized = self.unset_priors()
|
||||
self._add_to_index_operations(self.priors, repriorized, prior, warning)
|
||||
|
||||
from domains import _REAL, _POSITIVE, _NEGATIVE
|
||||
from .domains import _REAL, _POSITIVE, _NEGATIVE
|
||||
if prior.domain is _POSITIVE:
|
||||
self.constrain_positive(warning)
|
||||
elif prior.domain is _NEGATIVE:
|
||||
|
|
@ -424,19 +430,38 @@ class Indexable(Nameable, Updateable):
|
|||
|
||||
def log_prior(self):
|
||||
"""evaluate the prior"""
|
||||
if self.priors.size > 0:
|
||||
x = self.param_array
|
||||
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
|
||||
return 0.
|
||||
if self.priors.size == 0:
|
||||
return 0.
|
||||
x = self.param_array
|
||||
#evaluate the prior log densities
|
||||
log_p = reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
|
||||
|
||||
#account for the transformation by evaluating the log Jacobian (where things are transformed)
|
||||
log_j = 0.
|
||||
priored_indexes = np.hstack([i for p, i in self.priors.items()])
|
||||
for c,j in self.constraints.items():
|
||||
if not isinstance(c, Transformation):continue
|
||||
for jj in j:
|
||||
if jj in priored_indexes:
|
||||
log_j += c.log_jacobian(x[jj])
|
||||
return log_p + log_j
|
||||
|
||||
def _log_prior_gradients(self):
|
||||
"""evaluate the gradients of the priors"""
|
||||
if self.priors.size > 0:
|
||||
x = self.param_array
|
||||
ret = np.zeros(x.size)
|
||||
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
|
||||
return ret
|
||||
return 0.
|
||||
if self.priors.size == 0:
|
||||
return 0.
|
||||
x = self.param_array
|
||||
ret = np.zeros(x.size)
|
||||
#compute derivate of prior density
|
||||
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
|
||||
#add in jacobian derivatives if transformed
|
||||
priored_indexes = np.hstack([i for p, i in self.priors.items()])
|
||||
for c,j in self.constraints.items():
|
||||
if not isinstance(c, Transformation):continue
|
||||
for jj in j:
|
||||
if jj in priored_indexes:
|
||||
ret[jj] += c.log_jacobian_grad(x[jj])
|
||||
return ret
|
||||
|
||||
#===========================================================================
|
||||
# Tie parameters together
|
||||
|
|
@ -471,7 +496,7 @@ class Indexable(Nameable, Updateable):
|
|||
self.param_array[...] = transform.initialize(self.param_array)
|
||||
reconstrained = self.unconstrain()
|
||||
added = self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
|
||||
self.notify_observers(self, None if trigger_parent else -np.inf)
|
||||
self.trigger_update(trigger_parent)
|
||||
return added
|
||||
|
||||
def unconstrain(self, *transforms):
|
||||
|
|
@ -536,7 +561,7 @@ class Indexable(Nameable, Updateable):
|
|||
update the constraints and priors view, so that
|
||||
constraining is automized for the parent.
|
||||
"""
|
||||
from index_operations import ParameterIndexOperationsView
|
||||
from .index_operations import ParameterIndexOperationsView
|
||||
#if getattr(self, "_in_init_"):
|
||||
#import ipdb;ipdb.set_trace()
|
||||
#self.constraints.update(param.constraints, start)
|
||||
|
|
@ -558,7 +583,7 @@ class Indexable(Nameable, Updateable):
|
|||
"""
|
||||
if warning and reconstrained.size > 0:
|
||||
# TODO: figure out which parameters have changed and only print those
|
||||
print "WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name)
|
||||
print("WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name))
|
||||
index = self._raveled_index()
|
||||
which.add(what, index)
|
||||
return index
|
||||
|
|
@ -571,7 +596,7 @@ class Indexable(Nameable, Updateable):
|
|||
if len(transforms) == 0:
|
||||
transforms = which.properties()
|
||||
removed = np.empty((0,), dtype=int)
|
||||
for t in transforms:
|
||||
for t in list(transforms):
|
||||
unconstrained = which.remove(t, self._raveled_index())
|
||||
removed = np.union1d(removed, unconstrained)
|
||||
if t is __fixed__:
|
||||
|
|
@ -612,7 +637,9 @@ class OptimizationHandlable(Indexable):
|
|||
|
||||
if not self._optimizer_copy_transformed:
|
||||
self._optimizer_copy_.flat = self.param_array.flat
|
||||
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
#py3 fix
|
||||
#[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.items() if c != __fixed__]
|
||||
if self.has_parent() and (self.constraints[__fixed__].size != 0 or self._has_ties()):
|
||||
fixes = np.ones(self.size).astype(bool)
|
||||
fixes[self.constraints[__fixed__]] = FIXED
|
||||
|
|
@ -641,21 +668,25 @@ class OptimizationHandlable(Indexable):
|
|||
if f is None:
|
||||
self.param_array.flat = p
|
||||
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
|
||||
for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
#py3 fix
|
||||
#for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
for c, ind in self.constraints.items() if c != __fixed__]
|
||||
else:
|
||||
self.param_array.flat[f] = p
|
||||
[np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
|
||||
for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
#py3 fix
|
||||
#for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
for c, ind in self.constraints.items() if c != __fixed__]
|
||||
#self._highest_parent_.tie.propagate_val()
|
||||
|
||||
self._optimizer_copy_transformed = False
|
||||
self.trigger_update()
|
||||
|
||||
def _get_params_transformed(self):
|
||||
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
|
||||
raise DeprecationWarning("_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!")
|
||||
#
|
||||
def _set_params_transformed(self, p):
|
||||
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
|
||||
raise DeprecationWarning("_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!")
|
||||
|
||||
def _trigger_params_changed(self, trigger_parent=True):
|
||||
"""
|
||||
|
|
@ -680,17 +711,32 @@ class OptimizationHandlable(Indexable):
|
|||
constraint to it.
|
||||
"""
|
||||
self._highest_parent_.tie.collate_gradient()
|
||||
[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||
#py3 fix
|
||||
#[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||
[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.items() if c != __fixed__]
|
||||
if self._has_fixes(): return g[self._fixes_]
|
||||
return g
|
||||
|
||||
def _transform_gradients_non_natural(self, g):
|
||||
"""
|
||||
Transform the gradients by multiplying the gradient factor for each
|
||||
constraint to it.
|
||||
"""
|
||||
self._highest_parent_.tie.collate_gradient()
|
||||
#py3 fix
|
||||
#[np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||
[np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.items() if c != __fixed__]
|
||||
if self._has_fixes(): return g[self._fixes_]
|
||||
return g
|
||||
|
||||
|
||||
@property
|
||||
def num_params(self):
|
||||
"""
|
||||
Return the number of parameters of this parameter_handle.
|
||||
Param objects will always return 0.
|
||||
"""
|
||||
raise NotImplemented, "Abstract, please implement in respective classes"
|
||||
raise NotImplemented("Abstract, please implement in respective classes")
|
||||
|
||||
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
|
||||
"""
|
||||
|
|
@ -739,7 +785,9 @@ class OptimizationHandlable(Indexable):
|
|||
self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
||||
# now draw from prior where possible
|
||||
x = self.param_array.copy()
|
||||
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
|
||||
#Py3 fix
|
||||
#[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
|
||||
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.items() if not p is None]
|
||||
unfixlist = np.ones((self.size,),dtype=np.bool)
|
||||
unfixlist[self.constraints[__fixed__]] = False
|
||||
self.param_array.flat[unfixlist] = x.view(np.ndarray).ravel()[unfixlist]
|
||||
|
|
@ -798,7 +846,7 @@ class Parameterizable(OptimizationHandlable):
|
|||
A parameterisable class.
|
||||
|
||||
This class provides the parameters list (ArrayList) and standard parameter handling,
|
||||
such as {add|remove}_parameter(), traverse hierarchy and param_array, gradient_array
|
||||
such as {link|unlink}_parameter(), traverse hierarchy and param_array, gradient_array
|
||||
and the empty parameters_changed().
|
||||
|
||||
This class is abstract and should not be instantiated.
|
||||
|
|
@ -936,7 +984,7 @@ class Parameterizable(OptimizationHandlable):
|
|||
self._add_parameter_name(param, ignore_added_names)
|
||||
# and makes sure to not delete programmatically added parameters
|
||||
for other in self.parameters[::-1]:
|
||||
if other is not param and other.name.startswith(param.name):
|
||||
if other is not param and other.name == param.name:
|
||||
warn_and_retry(param, _name_digit.match(other.name))
|
||||
return
|
||||
if pname not in dir(self):
|
||||
|
|
@ -1031,6 +1079,9 @@ class Parameterizable(OptimizationHandlable):
|
|||
p = param_to_array(p)
|
||||
d = f.create_dataset(n,p.shape,dtype=p.dtype)
|
||||
d[:] = p
|
||||
if hasattr(self, 'param_array'):
|
||||
d = f.create_dataset('param_array',self.param_array.shape, dtype=self.param_array.dtype)
|
||||
d[:] = self.param_array
|
||||
f.close()
|
||||
except:
|
||||
raise 'Fails to write the parameters into a HDF5 file!'
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
import numpy as np
|
||||
from scipy.special import gammaln, digamma
|
||||
from ...util.linalg import pdinv
|
||||
from domains import _REAL, _POSITIVE
|
||||
from .domains import _REAL, _POSITIVE
|
||||
import warnings
|
||||
import weakref
|
||||
|
||||
|
|
@ -15,8 +15,12 @@ class Prior(object):
|
|||
_instance = None
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if not cls._instance or cls._instance.__class__ is not cls:
|
||||
cls._instance = super(Prior, cls).__new__(cls, *args, **kwargs)
|
||||
return cls._instance
|
||||
newfunc = super(Prior, cls).__new__
|
||||
if newfunc is object.__new__:
|
||||
cls._instance = newfunc(cls)
|
||||
else:
|
||||
cls._instance = newfunc(cls, *args, **kwargs)
|
||||
return cls._instance
|
||||
|
||||
def pdf(self, x):
|
||||
return np.exp(self.lnpdf(x))
|
||||
|
|
@ -52,7 +56,11 @@ class Gaussian(Prior):
|
|||
for instance in cls._instances:
|
||||
if instance().mu == mu and instance().sigma == sigma:
|
||||
return instance()
|
||||
o = super(Prior, cls).__new__(cls, mu, sigma)
|
||||
newfunc = super(Prior, cls).__new__
|
||||
if newfunc is object.__new__:
|
||||
o = newfunc(cls)
|
||||
else:
|
||||
o = newfunc(cls, mu, sigma)
|
||||
cls._instances.append(weakref.ref(o))
|
||||
return cls._instances[-1]()
|
||||
|
||||
|
|
@ -140,7 +148,11 @@ class LogGaussian(Gaussian):
|
|||
for instance in cls._instances:
|
||||
if instance().mu == mu and instance().sigma == sigma:
|
||||
return instance()
|
||||
o = super(Prior, cls).__new__(cls, mu, sigma)
|
||||
newfunc = super(Prior, cls).__new__
|
||||
if newfunc is object.__new__:
|
||||
o = newfunc(cls)
|
||||
else:
|
||||
o = newfunc(cls, mu, sigma)
|
||||
cls._instances.append(weakref.ref(o))
|
||||
return cls._instances[-1]()
|
||||
|
||||
|
|
@ -258,7 +270,11 @@ class Gamma(Prior):
|
|||
for instance in cls._instances:
|
||||
if instance().a == a and instance().b == b:
|
||||
return instance()
|
||||
o = super(Prior, cls).__new__(cls, a, b)
|
||||
newfunc = super(Prior, cls).__new__
|
||||
if newfunc is object.__new__:
|
||||
o = newfunc(cls)
|
||||
else:
|
||||
o = newfunc(cls, a, b)
|
||||
cls._instances.append(weakref.ref(o))
|
||||
return cls._instances[-1]()
|
||||
|
||||
|
|
@ -398,7 +414,7 @@ class DGPLVM_KFDA(Prior):
|
|||
def compute_cls(self, x):
|
||||
cls = {}
|
||||
# Appending each data point to its proper class
|
||||
for j in xrange(self.datanum):
|
||||
for j in range(self.datanum):
|
||||
class_label = self.get_class_label(self.lbl[j])
|
||||
if class_label not in cls:
|
||||
cls[class_label] = []
|
||||
|
|
@ -504,6 +520,219 @@ class DGPLVM(Prior):
|
|||
|
||||
.. Note:: DGPLVM for Classification paper implementation
|
||||
|
||||
"""
|
||||
domain = _REAL
|
||||
|
||||
def __new__(cls, sigma2, lbl, x_shape):
|
||||
return super(Prior, cls).__new__(cls, sigma2, lbl, x_shape)
|
||||
|
||||
def __init__(self, sigma2, lbl, x_shape):
|
||||
self.sigma2 = sigma2
|
||||
# self.x = x
|
||||
self.lbl = lbl
|
||||
self.classnum = lbl.shape[1]
|
||||
self.datanum = lbl.shape[0]
|
||||
self.x_shape = x_shape
|
||||
self.dim = x_shape[1]
|
||||
|
||||
def get_class_label(self, y):
|
||||
for idx, v in enumerate(y):
|
||||
if v == 1:
|
||||
return idx
|
||||
return -1
|
||||
|
||||
# This function assigns each data point to its own class
|
||||
# and returns the dictionary which contains the class name and parameters.
|
||||
def compute_cls(self, x):
|
||||
cls = {}
|
||||
# Appending each data point to its proper class
|
||||
for j in range(self.datanum):
|
||||
class_label = self.get_class_label(self.lbl[j])
|
||||
if class_label not in cls:
|
||||
cls[class_label] = []
|
||||
cls[class_label].append(x[j])
|
||||
return cls
|
||||
|
||||
# This function computes mean of each class. The mean is calculated through each dimension
|
||||
def compute_Mi(self, cls):
|
||||
M_i = np.zeros((self.classnum, self.dim))
|
||||
for i in cls:
|
||||
# Mean of each class
|
||||
class_i = cls[i]
|
||||
M_i[i] = np.mean(class_i, axis=0)
|
||||
return M_i
|
||||
|
||||
# Adding data points as tuple to the dictionary so that we can access indices
|
||||
def compute_indices(self, x):
|
||||
data_idx = {}
|
||||
for j in range(self.datanum):
|
||||
class_label = self.get_class_label(self.lbl[j])
|
||||
if class_label not in data_idx:
|
||||
data_idx[class_label] = []
|
||||
t = (j, x[j])
|
||||
data_idx[class_label].append(t)
|
||||
return data_idx
|
||||
|
||||
# Adding indices to the list so we can access whole the indices
|
||||
def compute_listIndices(self, data_idx):
|
||||
lst_idx = []
|
||||
lst_idx_all = []
|
||||
for i in data_idx:
|
||||
if len(lst_idx) == 0:
|
||||
pass
|
||||
#Do nothing, because it is the first time list is created so is empty
|
||||
else:
|
||||
lst_idx = []
|
||||
# Here we put indices of each class in to the list called lst_idx_all
|
||||
for m in range(len(data_idx[i])):
|
||||
lst_idx.append(data_idx[i][m][0])
|
||||
lst_idx_all.append(lst_idx)
|
||||
return lst_idx_all
|
||||
|
||||
# This function calculates between classes variances
|
||||
def compute_Sb(self, cls, M_i, M_0):
|
||||
Sb = np.zeros((self.dim, self.dim))
|
||||
for i in cls:
|
||||
B = (M_i[i] - M_0).reshape(self.dim, 1)
|
||||
B_trans = B.transpose()
|
||||
Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
|
||||
return Sb
|
||||
|
||||
# This function calculates within classes variances
|
||||
def compute_Sw(self, cls, M_i):
|
||||
Sw = np.zeros((self.dim, self.dim))
|
||||
for i in cls:
|
||||
N_i = float(len(cls[i]))
|
||||
W_WT = np.zeros((self.dim, self.dim))
|
||||
for xk in cls[i]:
|
||||
W = (xk - M_i[i])
|
||||
W_WT += np.outer(W, W)
|
||||
Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
|
||||
return Sw
|
||||
|
||||
# Calculating beta and Bi for Sb
|
||||
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
|
||||
# import pdb
|
||||
# pdb.set_trace()
|
||||
B_i = np.zeros((self.classnum, self.dim))
|
||||
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
|
||||
for i in data_idx:
|
||||
# pdb.set_trace()
|
||||
# Calculating Bi
|
||||
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
|
||||
for k in range(self.datanum):
|
||||
for i in data_idx:
|
||||
N_i = float(len(data_idx[i]))
|
||||
if k in lst_idx_all[i]:
|
||||
beta = (float(1) / N_i) - (float(1) / self.datanum)
|
||||
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
|
||||
else:
|
||||
beta = -(float(1) / self.datanum)
|
||||
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
|
||||
Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
|
||||
return Sig_beta_B_i_all
|
||||
|
||||
|
||||
# Calculating W_j s separately so we can access all the W_j s anytime
|
||||
def compute_wj(self, data_idx, M_i):
|
||||
W_i = np.zeros((self.datanum, self.dim))
|
||||
for i in data_idx:
|
||||
N_i = float(len(data_idx[i]))
|
||||
for tpl in data_idx[i]:
|
||||
xj = tpl[1]
|
||||
j = tpl[0]
|
||||
W_i[j] = (xj - M_i[i])
|
||||
return W_i
|
||||
|
||||
# Calculating alpha and Wj for Sw
|
||||
def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
|
||||
Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
|
||||
for i in data_idx:
|
||||
N_i = float(len(data_idx[i]))
|
||||
for tpl in data_idx[i]:
|
||||
k = tpl[0]
|
||||
for j in lst_idx_all[i]:
|
||||
if k == j:
|
||||
alpha = 1 - (float(1) / N_i)
|
||||
Sig_alpha_W_i[k] += (alpha * W_i[j])
|
||||
else:
|
||||
alpha = 0 - (float(1) / N_i)
|
||||
Sig_alpha_W_i[k] += (alpha * W_i[j])
|
||||
Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
|
||||
return Sig_alpha_W_i
|
||||
|
||||
# This function calculates log of our prior
|
||||
def lnpdf(self, x):
|
||||
x = x.reshape(self.x_shape)
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
M_i = self.compute_Mi(cls)
|
||||
Sb = self.compute_Sb(cls, M_i, M_0)
|
||||
Sw = self.compute_Sw(cls, M_i)
|
||||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
||||
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
|
||||
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
|
||||
|
||||
# This function calculates derivative of the log of prior function
|
||||
def lnpdf_grad(self, x):
|
||||
x = x.reshape(self.x_shape)
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
M_i = self.compute_Mi(cls)
|
||||
Sb = self.compute_Sb(cls, M_i, M_0)
|
||||
Sw = self.compute_Sw(cls, M_i)
|
||||
data_idx = self.compute_indices(x)
|
||||
lst_idx_all = self.compute_listIndices(data_idx)
|
||||
Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
|
||||
W_i = self.compute_wj(data_idx, M_i)
|
||||
Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
|
||||
|
||||
# Calculating inverse of Sb and its transpose and minus
|
||||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
||||
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
|
||||
Sb_inv_N_trans = np.transpose(Sb_inv_N)
|
||||
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
|
||||
Sw_trans = np.transpose(Sw)
|
||||
|
||||
# Calculating DJ/DXk
|
||||
DJ_Dxk = 2 * (
|
||||
Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
|
||||
Sig_alpha_W_i))
|
||||
# Calculating derivative of the log of the prior
|
||||
DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
|
||||
return DPx_Dx.T
|
||||
|
||||
# def frb(self, x):
|
||||
# from functools import partial
|
||||
# from GPy.models import GradientChecker
|
||||
# f = partial(self.lnpdf)
|
||||
# df = partial(self.lnpdf_grad)
|
||||
# grad = GradientChecker(f, df, x, 'X')
|
||||
# grad.checkgrad(verbose=1)
|
||||
|
||||
def rvs(self, n):
|
||||
return np.random.rand(n) # A WRONG implementation
|
||||
|
||||
def __str__(self):
|
||||
return 'DGPLVM_prior_Raq'
|
||||
|
||||
|
||||
# ******************************************
|
||||
|
||||
from .. import Parameterized
|
||||
from .. import Param
|
||||
class DGPLVM_Lamda(Prior, Parameterized):
|
||||
"""
|
||||
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
|
||||
|
||||
:param sigma2: constant
|
||||
|
||||
.. Note:: DGPLVM for Classification paper implementation
|
||||
|
||||
"""
|
||||
domain = _REAL
|
||||
# _instances = []
|
||||
|
|
@ -517,14 +746,18 @@ class DGPLVM(Prior):
|
|||
# cls._instances.append(weakref.ref(o))
|
||||
# return cls._instances[-1]()
|
||||
|
||||
def __init__(self, sigma2, lbl, x_shape):
|
||||
def __init__(self, sigma2, lbl, x_shape, lamda, name='DP_prior'):
|
||||
super(DGPLVM_Lamda, self).__init__(name=name)
|
||||
self.sigma2 = sigma2
|
||||
# self.x = x
|
||||
self.lbl = lbl
|
||||
self.lamda = lamda
|
||||
self.classnum = lbl.shape[1]
|
||||
self.datanum = lbl.shape[0]
|
||||
self.x_shape = x_shape
|
||||
self.dim = x_shape[1]
|
||||
self.lamda = Param('lamda', np.diag(lamda))
|
||||
self.link_parameter(self.lamda)
|
||||
|
||||
def get_class_label(self, y):
|
||||
for idx, v in enumerate(y):
|
||||
|
|
@ -549,7 +782,8 @@ class DGPLVM(Prior):
|
|||
M_i = np.zeros((self.classnum, self.dim))
|
||||
for i in cls:
|
||||
# Mean of each class
|
||||
M_i[i] = np.mean(cls[i], axis=0)
|
||||
class_i = cls[i]
|
||||
M_i[i] = np.mean(class_i, axis=0)
|
||||
return M_i
|
||||
|
||||
# Adding data points as tuple to the dictionary so that we can access indices
|
||||
|
|
@ -654,6 +888,13 @@ class DGPLVM(Prior):
|
|||
# This function calculates log of our prior
|
||||
def lnpdf(self, x):
|
||||
x = x.reshape(self.x_shape)
|
||||
|
||||
#!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()
|
||||
|
||||
xprime = x.dot(np.diagflat(self.lamda))
|
||||
x = xprime
|
||||
# print x
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
M_i = self.compute_Mi(cls)
|
||||
|
|
@ -661,12 +902,16 @@ class DGPLVM(Prior):
|
|||
Sw = self.compute_Sw(cls, M_i)
|
||||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||
Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
||||
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
|
||||
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
|
||||
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
|
||||
|
||||
# This function calculates derivative of the log of prior function
|
||||
def lnpdf_grad(self, x):
|
||||
x = x.reshape(self.x_shape)
|
||||
xprime = x.dot(np.diagflat(self.lamda))
|
||||
x = xprime
|
||||
# print x
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
M_i = self.compute_Mi(cls)
|
||||
|
|
@ -680,8 +925,251 @@ class DGPLVM(Prior):
|
|||
|
||||
# Calculating inverse of Sb and its transpose and minus
|
||||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||
# Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||
Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
||||
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
|
||||
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
|
||||
Sb_inv_N_trans = np.transpose(Sb_inv_N)
|
||||
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
|
||||
Sw_trans = np.transpose(Sw)
|
||||
|
||||
# Calculating DJ/DXk
|
||||
DJ_Dxk = 2 * (
|
||||
Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
|
||||
Sig_alpha_W_i))
|
||||
# Calculating derivative of the log of the prior
|
||||
DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
|
||||
|
||||
DPxprim_Dx = np.diagflat(self.lamda).dot(DPx_Dx)
|
||||
|
||||
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
|
||||
DPxprim_Dx = DPxprim_Dx.T
|
||||
|
||||
DPxprim_Dlamda = DPx_Dx.dot(x)
|
||||
|
||||
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
|
||||
DPxprim_Dlamda = DPxprim_Dlamda.T
|
||||
|
||||
self.lamda.gradient = np.diag(DPxprim_Dlamda)
|
||||
# print DPxprim_Dx
|
||||
return DPxprim_Dx
|
||||
|
||||
|
||||
# def frb(self, x):
|
||||
# from functools import partial
|
||||
# from GPy.models import GradientChecker
|
||||
# f = partial(self.lnpdf)
|
||||
# df = partial(self.lnpdf_grad)
|
||||
# grad = GradientChecker(f, df, x, 'X')
|
||||
# grad.checkgrad(verbose=1)
|
||||
|
||||
def rvs(self, n):
|
||||
return np.random.rand(n) # A WRONG implementation
|
||||
|
||||
def __str__(self):
|
||||
return 'DGPLVM_prior_Raq_Lamda'
|
||||
|
||||
# ******************************************
|
||||
|
||||
class DGPLVM_T(Prior):
|
||||
"""
|
||||
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
|
||||
|
||||
:param sigma2: constant
|
||||
|
||||
.. Note:: DGPLVM for Classification paper implementation
|
||||
|
||||
"""
|
||||
domain = _REAL
|
||||
# _instances = []
|
||||
# def __new__(cls, mu, sigma): # Singleton:
|
||||
# if cls._instances:
|
||||
# cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||
# for instance in cls._instances:
|
||||
# if instance().mu == mu and instance().sigma == sigma:
|
||||
# return instance()
|
||||
# o = super(Prior, cls).__new__(cls, mu, sigma)
|
||||
# cls._instances.append(weakref.ref(o))
|
||||
# return cls._instances[-1]()
|
||||
|
||||
def __init__(self, sigma2, lbl, x_shape, vec):
|
||||
self.sigma2 = sigma2
|
||||
# self.x = x
|
||||
self.lbl = lbl
|
||||
self.classnum = lbl.shape[1]
|
||||
self.datanum = lbl.shape[0]
|
||||
self.x_shape = x_shape
|
||||
self.dim = x_shape[1]
|
||||
self.vec = vec
|
||||
|
||||
|
||||
def get_class_label(self, y):
|
||||
for idx, v in enumerate(y):
|
||||
if v == 1:
|
||||
return idx
|
||||
return -1
|
||||
|
||||
# This function assigns each data point to its own class
|
||||
# and returns the dictionary which contains the class name and parameters.
|
||||
def compute_cls(self, x):
|
||||
cls = {}
|
||||
# Appending each data point to its proper class
|
||||
for j in range(self.datanum):
|
||||
class_label = self.get_class_label(self.lbl[j])
|
||||
if class_label not in cls:
|
||||
cls[class_label] = []
|
||||
cls[class_label].append(x[j])
|
||||
return cls
|
||||
|
||||
# This function computes mean of each class. The mean is calculated through each dimension
|
||||
def compute_Mi(self, cls):
|
||||
M_i = np.zeros((self.classnum, self.dim))
|
||||
for i in cls:
|
||||
# Mean of each class
|
||||
# class_i = np.multiply(cls[i],vec)
|
||||
class_i = cls[i]
|
||||
M_i[i] = np.mean(class_i, axis=0)
|
||||
return M_i
|
||||
|
||||
# Adding data points as tuple to the dictionary so that we can access indices
|
||||
def compute_indices(self, x):
|
||||
data_idx = {}
|
||||
for j in range(self.datanum):
|
||||
class_label = self.get_class_label(self.lbl[j])
|
||||
if class_label not in data_idx:
|
||||
data_idx[class_label] = []
|
||||
t = (j, x[j])
|
||||
data_idx[class_label].append(t)
|
||||
return data_idx
|
||||
|
||||
# Adding indices to the list so we can access whole the indices
|
||||
def compute_listIndices(self, data_idx):
|
||||
lst_idx = []
|
||||
lst_idx_all = []
|
||||
for i in data_idx:
|
||||
if len(lst_idx) == 0:
|
||||
pass
|
||||
#Do nothing, because it is the first time list is created so is empty
|
||||
else:
|
||||
lst_idx = []
|
||||
# Here we put indices of each class in to the list called lst_idx_all
|
||||
for m in range(len(data_idx[i])):
|
||||
lst_idx.append(data_idx[i][m][0])
|
||||
lst_idx_all.append(lst_idx)
|
||||
return lst_idx_all
|
||||
|
||||
# This function calculates between classes variances
|
||||
def compute_Sb(self, cls, M_i, M_0):
|
||||
Sb = np.zeros((self.dim, self.dim))
|
||||
for i in cls:
|
||||
B = (M_i[i] - M_0).reshape(self.dim, 1)
|
||||
B_trans = B.transpose()
|
||||
Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
|
||||
return Sb
|
||||
|
||||
# This function calculates within classes variances
|
||||
def compute_Sw(self, cls, M_i):
|
||||
Sw = np.zeros((self.dim, self.dim))
|
||||
for i in cls:
|
||||
N_i = float(len(cls[i]))
|
||||
W_WT = np.zeros((self.dim, self.dim))
|
||||
for xk in cls[i]:
|
||||
W = (xk - M_i[i])
|
||||
W_WT += np.outer(W, W)
|
||||
Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
|
||||
return Sw
|
||||
|
||||
# Calculating beta and Bi for Sb
|
||||
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
|
||||
# import pdb
|
||||
# pdb.set_trace()
|
||||
B_i = np.zeros((self.classnum, self.dim))
|
||||
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
|
||||
for i in data_idx:
|
||||
# pdb.set_trace()
|
||||
# Calculating Bi
|
||||
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
|
||||
for k in range(self.datanum):
|
||||
for i in data_idx:
|
||||
N_i = float(len(data_idx[i]))
|
||||
if k in lst_idx_all[i]:
|
||||
beta = (float(1) / N_i) - (float(1) / self.datanum)
|
||||
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
|
||||
else:
|
||||
beta = -(float(1) / self.datanum)
|
||||
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
|
||||
Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
|
||||
return Sig_beta_B_i_all
|
||||
|
||||
|
||||
# Calculating W_j s separately so we can access all the W_j s anytime
|
||||
def compute_wj(self, data_idx, M_i):
|
||||
W_i = np.zeros((self.datanum, self.dim))
|
||||
for i in data_idx:
|
||||
N_i = float(len(data_idx[i]))
|
||||
for tpl in data_idx[i]:
|
||||
xj = tpl[1]
|
||||
j = tpl[0]
|
||||
W_i[j] = (xj - M_i[i])
|
||||
return W_i
|
||||
|
||||
# Calculating alpha and Wj for Sw
|
||||
def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
|
||||
Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
|
||||
for i in data_idx:
|
||||
N_i = float(len(data_idx[i]))
|
||||
for tpl in data_idx[i]:
|
||||
k = tpl[0]
|
||||
for j in lst_idx_all[i]:
|
||||
if k == j:
|
||||
alpha = 1 - (float(1) / N_i)
|
||||
Sig_alpha_W_i[k] += (alpha * W_i[j])
|
||||
else:
|
||||
alpha = 0 - (float(1) / N_i)
|
||||
Sig_alpha_W_i[k] += (alpha * W_i[j])
|
||||
Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
|
||||
return Sig_alpha_W_i
|
||||
|
||||
# This function calculates log of our prior
|
||||
def lnpdf(self, x):
|
||||
x = x.reshape(self.x_shape)
|
||||
xprim = x.dot(self.vec)
|
||||
x = xprim
|
||||
# print x
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
M_i = self.compute_Mi(cls)
|
||||
Sb = self.compute_Sb(cls, M_i, M_0)
|
||||
Sw = self.compute_Sw(cls, M_i)
|
||||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||
#print 'SB_inv: ', Sb_inv_N
|
||||
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
||||
Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
|
||||
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
|
||||
|
||||
# This function calculates derivative of the log of prior function
|
||||
def lnpdf_grad(self, x):
|
||||
x = x.reshape(self.x_shape)
|
||||
xprim = x.dot(self.vec)
|
||||
x = xprim
|
||||
# print x
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
M_i = self.compute_Mi(cls)
|
||||
Sb = self.compute_Sb(cls, M_i, M_0)
|
||||
Sw = self.compute_Sw(cls, M_i)
|
||||
data_idx = self.compute_indices(x)
|
||||
lst_idx_all = self.compute_listIndices(data_idx)
|
||||
Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
|
||||
W_i = self.compute_wj(data_idx, M_i)
|
||||
Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
|
||||
|
||||
# Calculating inverse of Sb and its transpose and minus
|
||||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||
#print 'SB_inv: ',Sb_inv_N
|
||||
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
||||
Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
|
||||
Sb_inv_N_trans = np.transpose(Sb_inv_N)
|
||||
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
|
||||
Sw_trans = np.transpose(Sw)
|
||||
|
|
@ -706,7 +1194,9 @@ class DGPLVM(Prior):
|
|||
return np.random.rand(n) # A WRONG implementation
|
||||
|
||||
def __str__(self):
|
||||
return 'DGPLVM_prior'
|
||||
return 'DGPLVM_prior_Raq_TTT'
|
||||
|
||||
|
||||
|
||||
class HalfT(Prior):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
|
||||
import numpy as np
|
||||
from domains import _POSITIVE,_NEGATIVE, _BOUNDED
|
||||
from .domains import _POSITIVE,_NEGATIVE, _BOUNDED
|
||||
import weakref
|
||||
|
||||
import sys
|
||||
|
|
@ -31,6 +31,16 @@ class Transformation(object):
|
|||
raise NotImplementedError
|
||||
def finv(self, model_param):
|
||||
raise NotImplementedError
|
||||
def log_jacobian(self, model_param):
|
||||
"""
|
||||
compute the log of the jacobian of f, evaluated at f(x)= model_param
|
||||
"""
|
||||
raise NotImplementedError
|
||||
def log_jacobian_grad(self, model_param):
|
||||
"""
|
||||
compute the drivative of the log of the jacobian of f, evaluated at f(x)= model_param
|
||||
"""
|
||||
raise NotImplementedError
|
||||
def gradfactor(self, model_param, dL_dmodel_param):
|
||||
""" df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param,
|
||||
|
||||
|
|
@ -42,6 +52,8 @@ class Transformation(object):
|
|||
\frac{\frac{\partial L}{\partial f}\left(\left.\partial f(x)}{\partial x}\right|_{x=f^{-1}(f)\right)}
|
||||
"""
|
||||
raise NotImplementedError
|
||||
def gradfactor_non_natural(self, model_param, dL_dmodel_param):
|
||||
return self.gradfactor(model_param, dL_dmodel_param)
|
||||
def initialize(self, f):
|
||||
""" produce a sensible initial value for f(x)"""
|
||||
raise NotImplementedError
|
||||
|
|
@ -50,7 +62,7 @@ class Transformation(object):
|
|||
import matplotlib.pyplot as plt
|
||||
from ...plotting.matplot_dep import base_plots
|
||||
x = np.linspace(-8,8)
|
||||
base_plots.meanplot(x, self.f(x),axes=axes*args,**kw)
|
||||
base_plots.meanplot(x, self.f(x), *args, ax=axes, **kw)
|
||||
axes = plt.gca()
|
||||
axes.set_xlabel(xlabel)
|
||||
axes.set_ylabel(ylabel)
|
||||
|
|
@ -70,15 +82,41 @@ class Logexp(Transformation):
|
|||
return np.einsum('i,i->i', df, np.where(f>_lim_val, 1., 1. - np.exp(-f)))
|
||||
def initialize(self, f):
|
||||
if np.any(f < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
return np.abs(f)
|
||||
def log_jacobian(self, model_param):
|
||||
return np.where(model_param>_lim_val, model_param, np.log(np.exp(model_param+1e-20) - 1.)) - model_param
|
||||
def log_jacobian_grad(self, model_param):
|
||||
return 1./(np.exp(model_param)-1.)
|
||||
def __str__(self):
|
||||
return '+ve'
|
||||
|
||||
class Exponent(Transformation):
|
||||
domain = _POSITIVE
|
||||
def f(self, x):
|
||||
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
|
||||
def finv(self, x):
|
||||
return np.log(x)
|
||||
def gradfactor(self, f, df):
|
||||
return np.einsum('i,i->i', df, f)
|
||||
def initialize(self, f):
|
||||
if np.any(f < 0.):
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
return np.abs(f)
|
||||
def log_jacobian(self, model_param):
|
||||
return np.log(model_param)
|
||||
def log_jacobian_grad(self, model_param):
|
||||
return 1./model_param
|
||||
def __str__(self):
|
||||
return '+ve'
|
||||
|
||||
|
||||
|
||||
class NormalTheta(Transformation):
|
||||
"Do not use, not officially supported!"
|
||||
_instances = []
|
||||
def __new__(cls, mu_indices, var_indices):
|
||||
def __new__(cls, mu_indices=None, var_indices=None):
|
||||
"Do not use, not officially supported!"
|
||||
if cls._instances:
|
||||
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||
for instance in cls._instances:
|
||||
|
|
@ -98,6 +136,7 @@ class NormalTheta(Transformation):
|
|||
# that the values are ok
|
||||
# Before:
|
||||
theta[self.var_indices] = np.abs(-.5/theta[self.var_indices])
|
||||
#theta[self.var_indices] = np.exp(-.5/theta[self.var_indices])
|
||||
theta[self.mu_indices] *= theta[self.var_indices]
|
||||
return theta # which is now {mu, var}
|
||||
|
||||
|
|
@ -106,6 +145,7 @@ class NormalTheta(Transformation):
|
|||
varp = muvar[self.var_indices]
|
||||
muvar[self.mu_indices] /= varp
|
||||
muvar[self.var_indices] = -.5/varp
|
||||
#muvar[self.var_indices] = -.5/np.log(varp)
|
||||
|
||||
return muvar # which is now {theta1, theta2}
|
||||
|
||||
|
|
@ -124,7 +164,7 @@ class NormalTheta(Transformation):
|
|||
|
||||
def initialize(self, f):
|
||||
if np.any(f[self.var_indices] < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
f[self.var_indices] = np.abs(f[self.var_indices])
|
||||
return f
|
||||
|
||||
|
|
@ -139,9 +179,10 @@ class NormalTheta(Transformation):
|
|||
self.var_indices = state[1]
|
||||
|
||||
class NormalNaturalAntti(NormalTheta):
|
||||
"Do not use, not officially supported!"
|
||||
_instances = []
|
||||
_logexp = Logexp()
|
||||
def __new__(cls, mu_indices, var_indices):
|
||||
def __new__(cls, mu_indices=None, var_indices=None):
|
||||
"Do not use, not officially supported!"
|
||||
if cls._instances:
|
||||
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||
for instance in cls._instances:
|
||||
|
|
@ -170,7 +211,7 @@ class NormalNaturalAntti(NormalTheta):
|
|||
|
||||
def initialize(self, f):
|
||||
if np.any(f[self.var_indices] < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
f[self.var_indices] = np.abs(f[self.var_indices])
|
||||
return f
|
||||
|
||||
|
|
@ -178,8 +219,10 @@ class NormalNaturalAntti(NormalTheta):
|
|||
return "natantti"
|
||||
|
||||
class NormalEta(Transformation):
|
||||
"Do not use, not officially supported!"
|
||||
_instances = []
|
||||
def __new__(cls, mu_indices, var_indices):
|
||||
def __new__(cls, mu_indices=None, var_indices=None):
|
||||
"Do not use, not officially supported!"
|
||||
if cls._instances:
|
||||
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||
for instance in cls._instances:
|
||||
|
|
@ -211,7 +254,7 @@ class NormalEta(Transformation):
|
|||
|
||||
def initialize(self, f):
|
||||
if np.any(f[self.var_indices] < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
f[self.var_indices] = np.abs(f[self.var_indices])
|
||||
return f
|
||||
|
||||
|
|
@ -219,8 +262,10 @@ class NormalEta(Transformation):
|
|||
return "eta"
|
||||
|
||||
class NormalNaturalThroughTheta(NormalTheta):
|
||||
"Do not use, not officially supported!"
|
||||
_instances = []
|
||||
def __new__(cls, mu_indices, var_indices):
|
||||
def __new__(cls, mu_indices=None, var_indices=None):
|
||||
"Do not use, not officially supported!"
|
||||
if cls._instances:
|
||||
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||
for instance in cls._instances:
|
||||
|
|
@ -250,13 +295,28 @@ class NormalNaturalThroughTheta(NormalTheta):
|
|||
#=======================================================================
|
||||
return dmuvar # which is now the gradient multiplicator
|
||||
|
||||
def gradfactor_non_natural(self, muvar, dmuvar):
|
||||
mu = muvar[self.mu_indices]
|
||||
var = muvar[self.var_indices]
|
||||
#=======================================================================
|
||||
# theta gradients
|
||||
# This works and the gradient checks!
|
||||
dmuvar[self.mu_indices] *= var
|
||||
dmuvar[self.var_indices] *= 2*(var)**2
|
||||
dmuvar[self.var_indices] += 2*dmuvar[self.mu_indices]*mu
|
||||
#=======================================================================
|
||||
|
||||
return dmuvar # which is now the gradient multiplicator for {theta1, theta2}
|
||||
|
||||
def __str__(self):
|
||||
return "natgrad"
|
||||
|
||||
|
||||
class NormalNaturalWhooot(NormalTheta):
|
||||
"Do not use, not officially supported!"
|
||||
_instances = []
|
||||
def __new__(cls, mu_indices, var_indices):
|
||||
def __new__(cls, mu_indices=None, var_indices=None):
|
||||
"Do not use, not officially supported!"
|
||||
if cls._instances:
|
||||
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||
for instance in cls._instances:
|
||||
|
|
@ -290,8 +350,10 @@ class NormalNaturalWhooot(NormalTheta):
|
|||
return "natgrad"
|
||||
|
||||
class NormalNaturalThroughEta(NormalEta):
|
||||
"Do not use, not officially supported!"
|
||||
_instances = []
|
||||
def __new__(cls, mu_indices, var_indices):
|
||||
def __new__(cls, mu_indices=None, var_indices=None):
|
||||
"Do not use, not officially supported!"
|
||||
if cls._instances:
|
||||
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||
for instance in cls._instances:
|
||||
|
|
@ -332,7 +394,7 @@ class LogexpNeg(Transformation):
|
|||
return np.einsum('i,i->i', df, np.where(f>_lim_val, -1, -1 + np.exp(-f)))
|
||||
def initialize(self, f):
|
||||
if np.any(f < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
return np.abs(f)
|
||||
def __str__(self):
|
||||
return '+ve'
|
||||
|
|
@ -384,27 +446,11 @@ class LogexpClipped(Logexp):
|
|||
return np.einsum('i,i->i', df, gf) # np.where(f < self.lower, 0, gf)
|
||||
def initialize(self, f):
|
||||
if np.any(f < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
return np.abs(f)
|
||||
def __str__(self):
|
||||
return '+ve_c'
|
||||
|
||||
class Exponent(Transformation):
|
||||
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative Exponent below. See old MATLAB code.
|
||||
domain = _POSITIVE
|
||||
def f(self, x):
|
||||
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
|
||||
def finv(self, x):
|
||||
return np.log(x)
|
||||
def gradfactor(self, f, df):
|
||||
return np.einsum('i,i->i', df, f)
|
||||
def initialize(self, f):
|
||||
if np.any(f < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
return np.abs(f)
|
||||
def __str__(self):
|
||||
return '+ve'
|
||||
|
||||
class NegativeExponent(Exponent):
|
||||
domain = _NEGATIVE
|
||||
def f(self, x):
|
||||
|
|
@ -440,7 +486,11 @@ class Logistic(Transformation):
|
|||
for instance in cls._instances:
|
||||
if instance().lower == lower and instance().upper == upper:
|
||||
return instance()
|
||||
o = super(Transformation, cls).__new__(cls, lower, upper, *args, **kwargs)
|
||||
newfunc = super(Transformation, cls).__new__
|
||||
if newfunc is object.__new__:
|
||||
o = newfunc(cls)
|
||||
else:
|
||||
o = newfunc(cls, lower, upper, *args, **kwargs)
|
||||
cls._instances.append(weakref.ref(o))
|
||||
return cls._instances[-1]()
|
||||
def __init__(self, lower, upper):
|
||||
|
|
@ -458,7 +508,7 @@ class Logistic(Transformation):
|
|||
return np.einsum('i,i->i', df, (f - self.lower) * (self.upper - f) / self.difference)
|
||||
def initialize(self, f):
|
||||
if np.any(np.logical_or(f < self.lower, f > self.upper)):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
#return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
|
||||
#FIXME: Max, zeros_like right?
|
||||
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(np.zeros_like(f)), f)
|
||||
|
|
|
|||
|
|
@ -1 +1,2 @@
|
|||
from hmc import HMC
|
||||
from samplers import *
|
||||
|
|
|
|||
|
|
@ -4,23 +4,18 @@
|
|||
|
||||
import numpy as np
|
||||
from scipy import linalg, optimize
|
||||
import Tango
|
||||
import sys
|
||||
import re
|
||||
import numdifftools as ndt
|
||||
import pdb
|
||||
import cPickle
|
||||
|
||||
|
||||
class Metropolis_Hastings:
|
||||
def __init__(self,model,cov=None):
|
||||
"""Metropolis Hastings, with tunings according to Gelman et al. """
|
||||
self.model = model
|
||||
current = self.model._get_params_transformed()
|
||||
current = self.model.optimizer_array
|
||||
self.D = current.size
|
||||
self.chains = []
|
||||
if cov is None:
|
||||
self.cov = model.Laplace_covariance()
|
||||
self.cov = np.eye(self.D)
|
||||
else:
|
||||
self.cov = cov
|
||||
self.scale = 2.4/np.sqrt(self.D)
|
||||
|
|
@ -31,20 +26,20 @@ class Metropolis_Hastings:
|
|||
if start is None:
|
||||
self.model.randomize()
|
||||
else:
|
||||
self.model._set_params_transformed(start)
|
||||
self.model.optimizer_array = start
|
||||
|
||||
|
||||
|
||||
def sample(self, Ntotal, Nburn, Nthin, tune=True, tune_throughout=False, tune_interval=400):
|
||||
current = self.model._get_params_transformed()
|
||||
fcurrent = self.model.log_likelihood() + self.model.log_prior()
|
||||
def sample(self, Ntotal=10000, Nburn=1000, Nthin=10, tune=True, tune_throughout=False, tune_interval=400):
|
||||
current = self.model.optimizer_array
|
||||
fcurrent = self.model.log_likelihood() + self.model.log_prior() + \
|
||||
self.model._log_det_jacobian()
|
||||
accepted = np.zeros(Ntotal,dtype=np.bool)
|
||||
for it in range(Ntotal):
|
||||
print "sample %d of %d\r"%(it,Ntotal),
|
||||
sys.stdout.flush()
|
||||
prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
|
||||
self.model._set_params_transformed(prop)
|
||||
fprop = self.model.log_likelihood() + self.model.log_prior()
|
||||
self.model.optimizer_array = prop
|
||||
fprop = self.model.log_likelihood() + self.model.log_prior() + \
|
||||
self.model._log_det_jacobian()
|
||||
|
||||
if fprop>fcurrent:#sample accepted, going 'uphill'
|
||||
accepted[it] = True
|
||||
|
|
@ -72,10 +67,11 @@ class Metropolis_Hastings:
|
|||
|
||||
def predict(self,function,args):
|
||||
"""Make a prediction for the function, to which we will pass the additional arguments"""
|
||||
param = self.model._get_params()
|
||||
param = self.model.param_array
|
||||
fs = []
|
||||
for p in self.chain:
|
||||
self.model._set_params(p)
|
||||
self.model.param_array = p
|
||||
fs.append(function(*args))
|
||||
self.model._set_params(param)# reset model to starting state
|
||||
# reset model to starting state
|
||||
self.model.param_array = param
|
||||
return fs
|
||||
|
|
|
|||
101
GPy/testing/rv_transformation_tests.py
Normal file
101
GPy/testing/rv_transformation_tests.py
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
# Written by Ilias Bilionis
|
||||
"""
|
||||
Test if hyperparameters in models are properly transformed.
|
||||
"""
|
||||
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import scipy.stats as st
|
||||
import GPy
|
||||
|
||||
|
||||
class TestModel(GPy.core.Model):
|
||||
"""
|
||||
A simple GPy model with one parameter.
|
||||
"""
|
||||
def __init__(self):
|
||||
GPy.core.Model.__init__(self, 'test_model')
|
||||
theta = GPy.core.Param('theta', 1.)
|
||||
self.link_parameter(theta)
|
||||
|
||||
def log_likelihood(self):
|
||||
return 0.
|
||||
|
||||
|
||||
class RVTransformationTestCase(unittest.TestCase):
|
||||
|
||||
def _test_trans(self, trans):
|
||||
m = TestModel()
|
||||
prior = GPy.priors.LogGaussian(.5, 0.1)
|
||||
m.theta.set_prior(prior)
|
||||
m.theta.unconstrain()
|
||||
m.theta.constrain(trans)
|
||||
# The PDF of the transformed variables
|
||||
p_phi = lambda(phi): np.exp(-m._objective_grads(phi)[0])
|
||||
# To the empirical PDF of:
|
||||
theta_s = prior.rvs(100000)
|
||||
phi_s = trans.finv(theta_s)
|
||||
# which is essentially a kernel density estimation
|
||||
kde = st.gaussian_kde(phi_s)
|
||||
# We will compare the PDF here:
|
||||
phi = np.linspace(phi_s.min(), phi_s.max(), 100)
|
||||
# The transformed PDF of phi should be this:
|
||||
pdf_phi = np.array([p_phi(p) for p in phi])
|
||||
# UNCOMMENT TO SEE GRAPHICAL COMPARISON
|
||||
#import matplotlib.pyplot as plt
|
||||
#fig, ax = plt.subplots()
|
||||
#ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Histogram')
|
||||
#ax.plot(phi, kde(phi), '--', linewidth=2, label='Kernel Density Estimation')
|
||||
#ax.plot(phi, pdf_phi, ':', linewidth=2, label='Transformed PDF')
|
||||
#ax.set_xlabel(r'transformed $\theta$', fontsize=16)
|
||||
#ax.set_ylabel('PDF', fontsize=16)
|
||||
#plt.legend(loc='best')
|
||||
#plt.show(block=True)
|
||||
# END OF PLOT
|
||||
# The following test cannot be very accurate
|
||||
self.assertTrue(np.linalg.norm(pdf_phi - kde(phi)) / np.linalg.norm(kde(phi)) <= 1e-1)
|
||||
# Check the gradients at a few random points
|
||||
for i in xrange(10):
|
||||
m.theta = theta_s[i]
|
||||
self.assertTrue(m.checkgrad(verbose=True))
|
||||
|
||||
def test_Logexp(self):
|
||||
self._test_trans(GPy.constraints.Logexp())
|
||||
self._test_trans(GPy.constraints.Exponent())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
quit()
|
||||
m = TestModel()
|
||||
prior = GPy.priors.LogGaussian(0., .9)
|
||||
m.theta.set_prior(prior)
|
||||
|
||||
# The following should return the PDF in terms of the transformed quantities
|
||||
p_phi = lambda(phi): np.exp(-m._objective_grads(phi)[0])
|
||||
|
||||
# Let's look at the transformation phi = log(exp(theta - 1))
|
||||
trans = GPy.constraints.Exponent()
|
||||
m.theta.constrain(trans)
|
||||
# Plot the transformed probability density
|
||||
phi = np.linspace(-8, 8, 100)
|
||||
fig, ax = plt.subplots()
|
||||
# Let's draw some samples of theta and transform them so that we see
|
||||
# which one is right
|
||||
theta_s = prior.rvs(10000)
|
||||
# Transform it to the new variables
|
||||
phi_s = trans.finv(theta_s)
|
||||
# And draw their histogram
|
||||
ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Empirical')
|
||||
# This is to be compared to the PDF of the model expressed in terms of these new
|
||||
# variables
|
||||
ax.plot(phi, [p_phi(p) for p in phi], label='Transformed PDF', linewidth=2)
|
||||
ax.set_xlim(-3, 10)
|
||||
ax.set_xlabel(r'transformed $\theta$', fontsize=16)
|
||||
ax.set_ylabel('PDF', fontsize=16)
|
||||
plt.legend(loc='best')
|
||||
# Now let's test the gradients
|
||||
m.checkgrad(verbose=True)
|
||||
# And show the plot
|
||||
plt.show(block=True)
|
||||
Loading…
Add table
Add a link
Reference in a new issue