From ba4bd50924aa9d3bcbdf2e644094cff8ac290cb9 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 16 May 2013 15:17:54 +0100 Subject: [PATCH] stability enhancing clipping in logexp_clipped and reverse of stability clipping of parameters --- GPy/core/transformations.py | 26 ++++++++------- GPy/kern/kern.py | 65 ++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 45 deletions(-) diff --git a/GPy/core/transformations.py b/GPy/core/transformations.py index 1ad36f78..6e5741fe 100644 --- a/GPy/core/transformations.py +++ b/GPy/core/transformations.py @@ -39,23 +39,25 @@ class logexp(transformation): return '(+ve)' class logexp_clipped(transformation): - max_bound = 1e300 + max_bound = 1e10 + min_bound = 1e-10 log_max_bound = np.log(max_bound) - def __init__(self, lower=1e-15): + log_min_bound = np.log(min_bound) + def __init__(self, lower=1e-6): self.domain = 'positive' self.lower = lower def f(self, x): - exp = np.exp(np.where(x > self.log_max_bound, self.log_max_bound, x)) + exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound)) f = np.log(1. + exp) return f def finv(self, f): - return np.log(np.exp(f) - 1.) + return np.log(np.exp(np.clip(f, self.min_bound, self.max_bound)) - 1.) def gradfactor(self, f): ef = np.exp(f) gf = (ef - 1.) / ef return np.where(f < self.lower, 0, gf) - def initialize(self,f): - if np.any(f<0.): + def initialize(self, f): + if np.any(f < 0.): print "Warning: changing parameters to satisfy constraints" return np.abs(f) def __str__(self): @@ -71,7 +73,7 @@ class exponent(transformation): def gradfactor(self, f): return f def initialize(self, f): - if np.any(f<0.): + if np.any(f < 0.): print "Warning: changing parameters to satisfy constraints" return np.abs(f) def __str__(self): @@ -87,7 +89,7 @@ class negative_exponent(transformation): def gradfactor(self, f): return f def initialize(self, f): - if np.any(f>0.): + if np.any(f > 0.): print "Warning: changing parameters to satisfy constraints" return -np.abs(f) def __str__(self): @@ -118,11 +120,11 @@ class logistic(transformation): def finv(self, f): return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf)) def gradfactor(self, f): - return (f-self.lower)*(self.upper-f)/self.difference - def initialize(self,f): - if np.any(np.logical_or(fself.upper)): + return (f - self.lower) * (self.upper - f) / self.difference + def initialize(self, f): + if np.any(np.logical_or(f < self.lower, f > self.upper)): print "Warning: changing parameters to satisfy constraints" - return np.where(np.logical_or(fself.upper),self.f(f*0.),f) + return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f) def __str__(self): return '({},{})'.format(self.lower, self.upper) diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 13c17139..c682fdcc 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -66,7 +66,7 @@ class kern(parameterised): def _transform_gradients(self, g): x = self._get_params() - [np.put(x,i,x*t.gradfactor(x[i])) for i,t in zip(self.constrained_indices, self.constraints)] + [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)] [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]] if len(self.tied_indices) or len(self.fixed_indices): to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices])) @@ -88,7 +88,7 @@ class kern(parameterised): """ return self.add(other) - def add(self, other,tensor=False): + def add(self, other, tensor=False): """ Add another kernel to this one. Both kernels are defined on the same _space_ :param other: the other kernel to be added @@ -103,7 +103,7 @@ class kern(parameterised): newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices) # transfer constraints: - newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices] + newkern.constrained_indices = self.constrained_indices + [x + self.Nparam for x in other.constrained_indices] newkern.constraints = self.constraints + other.constraints newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices] newkern.fixed_values = self.fixed_values + other.fixed_values @@ -113,7 +113,7 @@ class kern(parameterised): assert self.D == other.D newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices) # transfer constraints: - newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices] + newkern.constrained_indices = self.constrained_indices + [i + self.Nparam for i in other.constrained_indices] newkern.constraints = self.constraints + other.constraints newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices] newkern.fixed_values = self.fixed_values + other.fixed_values @@ -126,7 +126,7 @@ class kern(parameterised): """ return self.prod(other) - def prod(self, other,tensor=False): + def prod(self, other, tensor=False): """ multiply two kernels (either on the same space, or on the tensor product of the input space) :param other: the other kernel to be added @@ -136,12 +136,12 @@ class kern(parameterised): K2 = other.copy() slices = [] - for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices): - s1, s2 = [False]*K1.D, [False]*K2.D + for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices): + s1, s2 = [False] * K1.D, [False] * K2.D s1[sl1], s2[sl2] = [True], [True] - slices += [s1+s2] + slices += [s1 + s2] - newkernparts = [prod(k1, k2,tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)] + newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)] if tensor: newkern = kern(K1.D + K2.D, newkernparts, slices) @@ -189,14 +189,13 @@ class kern(parameterised): index = np.where(index_param == i)[0] if index.size > 1: self.tie_params(index) - for i,t in zip(prev_constr_ind,prev_constr): - self.constrain(np.where(index_param == i)[0],t) + for i, t in zip(prev_constr_ind, prev_constr): + self.constrain(np.where(index_param == i)[0], t) def _get_params(self): return np.hstack([p._get_params() for p in self.parts]) def _set_params(self, x): - x = np.clip(x, -1e300, 1e300) [p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)] def _get_param_names(self): @@ -209,15 +208,15 @@ class kern(parameterised): return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], []) def K(self, X, X2=None, which_parts='all'): - if which_parts=='all': - which_parts = [True]*self.Nparts + if which_parts == 'all': + which_parts = [True] * self.Nparts assert X.shape[1] == self.D if X2 is None: target = np.zeros((X.shape[0], X.shape[0])) [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used] else: target = np.zeros((X.shape[0], X2.shape[0])) - [p.K(X[:, i_s], X2[:,i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used] + [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used] return target def dK_dtheta(self, dL_dK, X, X2=None): @@ -249,8 +248,8 @@ class kern(parameterised): return target def Kdiag(self, X, which_parts='all'): - if which_parts=='all': - which_parts = [True]*self.Nparts + if which_parts == 'all': + which_parts = [True] * self.Nparts assert X.shape[1] == self.D target = np.zeros(X.shape[0]) [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on] @@ -271,22 +270,22 @@ class kern(parameterised): def psi0(self, Z, mu, S): target = np.zeros(mu.shape[0]) - [p.psi0(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)] + [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] return target def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): target = np.zeros(self.Nparam) - [p.dpsi0_dtheta(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] + [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] return self._transform_gradients(target) def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) - [p.dpsi0_dmuS(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target_mu[:,i_s], target_S[:,i_s]) for p, i_s in zip(self.parts, self.input_slices)] + [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] return target_mu, target_S def psi1(self, Z, mu, S): target = np.zeros((mu.shape[0], Z.shape[0])) - [p.psi1(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)] + [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] return target def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): @@ -315,7 +314,7 @@ class kern(parameterised): [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] # compute the "cross" terms - #TODO: input_slices needed + # TODO: input_slices needed for p1, p2 in itertools.combinations(self.parts, 2): # white doesn;t combine with anything if p1.name == 'white' or p2.name == 'white': @@ -336,9 +335,9 @@ class kern(parameterised): target += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) # rbf X linear elif p1.name == 'linear' and p2.name == 'rbf': - raise NotImplementedError # TODO + raise NotImplementedError # TODO elif p2.name == 'linear' and p1.name == 'rbf': - raise NotImplementedError # TODO + raise NotImplementedError # TODO else: raise NotImplementedError, "psi2 cannot be computed for this kernel" return target @@ -366,7 +365,7 @@ class kern(parameterised): p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2]) # linear X bias elif p1.name == 'bias' and p2.name == 'linear': - p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1]) + p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1]) psi1 = np.zeros((mu.shape[0], Z.shape[0])) p2.psi1(Z, mu, S, psi1) p1.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps1]) @@ -377,9 +376,9 @@ class kern(parameterised): p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2]) # rbf X linear elif p1.name == 'linear' and p2.name == 'rbf': - raise NotImplementedError # TODO + raise NotImplementedError # TODO elif p2.name == 'linear' and p1.name == 'rbf': - raise NotImplementedError # TODO + raise NotImplementedError # TODO else: raise NotImplementedError, "psi2 cannot be computed for this kernel" @@ -390,7 +389,7 @@ class kern(parameterised): [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] # compute the "cross" terms - #TODO: we need input_slices here. + # TODO: we need input_slices here. for p1, p2 in itertools.combinations(self.parts, 2): # white doesn;t combine with anything if p1.name == 'white' or p2.name == 'white': @@ -407,9 +406,9 @@ class kern(parameterised): p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target) # rbf X linear elif p1.name == 'linear' and p2.name == 'rbf': - raise NotImplementedError # TODO + raise NotImplementedError # TODO elif p2.name == 'linear' and p1.name == 'rbf': - raise NotImplementedError # TODO + raise NotImplementedError # TODO else: raise NotImplementedError, "psi2 cannot be computed for this kernel" @@ -420,7 +419,7 @@ class kern(parameterised): [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] # compute the "cross" terms - #TODO: we need input_slices here. + # TODO: we need input_slices here. for p1, p2 in itertools.combinations(self.parts, 2): # white doesn;t combine with anything if p1.name == 'white' or p2.name == 'white': @@ -437,9 +436,9 @@ class kern(parameterised): p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S) # rbf X linear elif p1.name == 'linear' and p2.name == 'rbf': - raise NotImplementedError # TODO + raise NotImplementedError # TODO elif p2.name == 'linear' and p1.name == 'rbf': - raise NotImplementedError # TODO + raise NotImplementedError # TODO else: raise NotImplementedError, "psi2 cannot be computed for this kernel"