stability enhancing clipping in logexp_clipped and reverse of stability clipping of parameters

This commit is contained in:
Max Zwiessele 2013-05-16 15:17:54 +01:00
parent 524c6e26bf
commit ba4bd50924
2 changed files with 46 additions and 45 deletions

View file

@ -39,23 +39,25 @@ class logexp(transformation):
return '(+ve)' return '(+ve)'
class logexp_clipped(transformation): class logexp_clipped(transformation):
max_bound = 1e300 max_bound = 1e10
min_bound = 1e-10
log_max_bound = np.log(max_bound) log_max_bound = np.log(max_bound)
def __init__(self, lower=1e-15): log_min_bound = np.log(min_bound)
def __init__(self, lower=1e-6):
self.domain = 'positive' self.domain = 'positive'
self.lower = lower self.lower = lower
def f(self, x): def f(self, x):
exp = np.exp(np.where(x > self.log_max_bound, self.log_max_bound, x)) exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
f = np.log(1. + exp) f = np.log(1. + exp)
return f return f
def finv(self, f): def finv(self, f):
return np.log(np.exp(f) - 1.) return np.log(np.exp(np.clip(f, self.min_bound, self.max_bound)) - 1.)
def gradfactor(self, f): def gradfactor(self, f):
ef = np.exp(f) ef = np.exp(f)
gf = (ef - 1.) / ef gf = (ef - 1.) / ef
return np.where(f < self.lower, 0, gf) return np.where(f < self.lower, 0, gf)
def initialize(self,f): def initialize(self, f):
if np.any(f<0.): if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints" print "Warning: changing parameters to satisfy constraints"
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
@ -71,7 +73,7 @@ class exponent(transformation):
def gradfactor(self, f): def gradfactor(self, f):
return f return f
def initialize(self, f): def initialize(self, f):
if np.any(f<0.): if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints" print "Warning: changing parameters to satisfy constraints"
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
@ -87,7 +89,7 @@ class negative_exponent(transformation):
def gradfactor(self, f): def gradfactor(self, f):
return f return f
def initialize(self, f): def initialize(self, f):
if np.any(f>0.): if np.any(f > 0.):
print "Warning: changing parameters to satisfy constraints" print "Warning: changing parameters to satisfy constraints"
return -np.abs(f) return -np.abs(f)
def __str__(self): def __str__(self):
@ -118,11 +120,11 @@ class logistic(transformation):
def finv(self, f): def finv(self, f):
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf)) return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
def gradfactor(self, f): def gradfactor(self, f):
return (f-self.lower)*(self.upper-f)/self.difference return (f - self.lower) * (self.upper - f) / self.difference
def initialize(self,f): def initialize(self, f):
if np.any(np.logical_or(f<self.lower,f>self.upper)): if np.any(np.logical_or(f < self.lower, f > self.upper)):
print "Warning: changing parameters to satisfy constraints" print "Warning: changing parameters to satisfy constraints"
return np.where(np.logical_or(f<self.lower,f>self.upper),self.f(f*0.),f) return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
def __str__(self): def __str__(self):
return '({},{})'.format(self.lower, self.upper) return '({},{})'.format(self.lower, self.upper)

View file

@ -66,7 +66,7 @@ class kern(parameterised):
def _transform_gradients(self, g): def _transform_gradients(self, g):
x = self._get_params() x = self._get_params()
[np.put(x,i,x*t.gradfactor(x[i])) for i,t in zip(self.constrained_indices, self.constraints)] [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]] [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
if len(self.tied_indices) or len(self.fixed_indices): if len(self.tied_indices) or len(self.fixed_indices):
to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices])) to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
@ -88,7 +88,7 @@ class kern(parameterised):
""" """
return self.add(other) return self.add(other)
def add(self, other,tensor=False): def add(self, other, tensor=False):
""" """
Add another kernel to this one. Both kernels are defined on the same _space_ Add another kernel to this one. Both kernels are defined on the same _space_
:param other: the other kernel to be added :param other: the other kernel to be added
@ -103,7 +103,7 @@ class kern(parameterised):
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices) newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
# transfer constraints: # transfer constraints:
newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices] newkern.constrained_indices = self.constrained_indices + [x + self.Nparam for x in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices] newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values newkern.fixed_values = self.fixed_values + other.fixed_values
@ -113,7 +113,7 @@ class kern(parameterised):
assert self.D == other.D assert self.D == other.D
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices) newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints: # transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices] newkern.constrained_indices = self.constrained_indices + [i + self.Nparam for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices] newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values newkern.fixed_values = self.fixed_values + other.fixed_values
@ -126,7 +126,7 @@ class kern(parameterised):
""" """
return self.prod(other) return self.prod(other)
def prod(self, other,tensor=False): def prod(self, other, tensor=False):
""" """
multiply two kernels (either on the same space, or on the tensor product of the input space) multiply two kernels (either on the same space, or on the tensor product of the input space)
:param other: the other kernel to be added :param other: the other kernel to be added
@ -136,12 +136,12 @@ class kern(parameterised):
K2 = other.copy() K2 = other.copy()
slices = [] slices = []
for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices): for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False]*K1.D, [False]*K2.D s1, s2 = [False] * K1.D, [False] * K2.D
s1[sl1], s2[sl2] = [True], [True] s1[sl1], s2[sl2] = [True], [True]
slices += [s1+s2] slices += [s1 + s2]
newkernparts = [prod(k1, k2,tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)] newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
if tensor: if tensor:
newkern = kern(K1.D + K2.D, newkernparts, slices) newkern = kern(K1.D + K2.D, newkernparts, slices)
@ -189,14 +189,13 @@ class kern(parameterised):
index = np.where(index_param == i)[0] index = np.where(index_param == i)[0]
if index.size > 1: if index.size > 1:
self.tie_params(index) self.tie_params(index)
for i,t in zip(prev_constr_ind,prev_constr): for i, t in zip(prev_constr_ind, prev_constr):
self.constrain(np.where(index_param == i)[0],t) self.constrain(np.where(index_param == i)[0], t)
def _get_params(self): def _get_params(self):
return np.hstack([p._get_params() for p in self.parts]) return np.hstack([p._get_params() for p in self.parts])
def _set_params(self, x): def _set_params(self, x):
x = np.clip(x, -1e300, 1e300)
[p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)] [p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)]
def _get_param_names(self): def _get_param_names(self):
@ -209,15 +208,15 @@ class kern(parameterised):
return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], []) return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], [])
def K(self, X, X2=None, which_parts='all'): def K(self, X, X2=None, which_parts='all'):
if which_parts=='all': if which_parts == 'all':
which_parts = [True]*self.Nparts which_parts = [True] * self.Nparts
assert X.shape[1] == self.D assert X.shape[1] == self.D
if X2 is None: if X2 is None:
target = np.zeros((X.shape[0], X.shape[0])) target = np.zeros((X.shape[0], X.shape[0]))
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used] [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
else: else:
target = np.zeros((X.shape[0], X2.shape[0])) target = np.zeros((X.shape[0], X2.shape[0]))
[p.K(X[:, i_s], X2[:,i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used] [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
return target return target
def dK_dtheta(self, dL_dK, X, X2=None): def dK_dtheta(self, dL_dK, X, X2=None):
@ -249,8 +248,8 @@ class kern(parameterised):
return target return target
def Kdiag(self, X, which_parts='all'): def Kdiag(self, X, which_parts='all'):
if which_parts=='all': if which_parts == 'all':
which_parts = [True]*self.Nparts which_parts = [True] * self.Nparts
assert X.shape[1] == self.D assert X.shape[1] == self.D
target = np.zeros(X.shape[0]) target = np.zeros(X.shape[0])
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on] [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on]
@ -271,22 +270,22 @@ class kern(parameterised):
def psi0(self, Z, mu, S): def psi0(self, Z, mu, S):
target = np.zeros(mu.shape[0]) target = np.zeros(mu.shape[0])
[p.psi0(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)] [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
return target return target
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
target = np.zeros(self.Nparam) target = np.zeros(self.Nparam)
[p.dpsi0_dtheta(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
return self._transform_gradients(target) return self._transform_gradients(target)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
[p.dpsi0_dmuS(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target_mu[:,i_s], target_S[:,i_s]) for p, i_s in zip(self.parts, self.input_slices)] [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target_mu, target_S return target_mu, target_S
def psi1(self, Z, mu, S): def psi1(self, Z, mu, S):
target = np.zeros((mu.shape[0], Z.shape[0])) target = np.zeros((mu.shape[0], Z.shape[0]))
[p.psi1(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)] [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
return target return target
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
@ -315,7 +314,7 @@ class kern(parameterised):
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
# compute the "cross" terms # compute the "cross" terms
#TODO: input_slices needed # TODO: input_slices needed
for p1, p2 in itertools.combinations(self.parts, 2): for p1, p2 in itertools.combinations(self.parts, 2):
# white doesn;t combine with anything # white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white': if p1.name == 'white' or p2.name == 'white':
@ -336,9 +335,9 @@ class kern(parameterised):
target += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) target += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
# rbf X linear # rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf': elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf': elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO raise NotImplementedError # TODO
else: else:
raise NotImplementedError, "psi2 cannot be computed for this kernel" raise NotImplementedError, "psi2 cannot be computed for this kernel"
return target return target
@ -366,7 +365,7 @@ class kern(parameterised):
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2]) p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2])
# linear X bias # linear X bias
elif p1.name == 'bias' and p2.name == 'linear': elif p1.name == 'bias' and p2.name == 'linear':
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1]) p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1])
psi1 = np.zeros((mu.shape[0], Z.shape[0])) psi1 = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z, mu, S, psi1) p2.psi1(Z, mu, S, psi1)
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps1]) p1.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps1])
@ -377,9 +376,9 @@ class kern(parameterised):
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2]) p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2])
# rbf X linear # rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf': elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf': elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO raise NotImplementedError # TODO
else: else:
raise NotImplementedError, "psi2 cannot be computed for this kernel" raise NotImplementedError, "psi2 cannot be computed for this kernel"
@ -390,7 +389,7 @@ class kern(parameterised):
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
# compute the "cross" terms # compute the "cross" terms
#TODO: we need input_slices here. # TODO: we need input_slices here.
for p1, p2 in itertools.combinations(self.parts, 2): for p1, p2 in itertools.combinations(self.parts, 2):
# white doesn;t combine with anything # white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white': if p1.name == 'white' or p2.name == 'white':
@ -407,9 +406,9 @@ class kern(parameterised):
p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target) p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
# rbf X linear # rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf': elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf': elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO raise NotImplementedError # TODO
else: else:
raise NotImplementedError, "psi2 cannot be computed for this kernel" raise NotImplementedError, "psi2 cannot be computed for this kernel"
@ -420,7 +419,7 @@ class kern(parameterised):
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
# compute the "cross" terms # compute the "cross" terms
#TODO: we need input_slices here. # TODO: we need input_slices here.
for p1, p2 in itertools.combinations(self.parts, 2): for p1, p2 in itertools.combinations(self.parts, 2):
# white doesn;t combine with anything # white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white': if p1.name == 'white' or p2.name == 'white':
@ -437,9 +436,9 @@ class kern(parameterised):
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S) p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
# rbf X linear # rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf': elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf': elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO raise NotImplementedError # TODO
else: else:
raise NotImplementedError, "psi2 cannot be computed for this kernel" raise NotImplementedError, "psi2 cannot be computed for this kernel"