From 4dc6f0ec6c5e025642910dc5a9597a2b3d14577b Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Wed, 24 Feb 2016 13:08:52 +0000 Subject: [PATCH] first try on logistic function --- GPy/util/warping_functions.py | 150 ++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py index e8c99540..10fddb50 100644 --- a/GPy/util/warping_functions.py +++ b/GPy/util/warping_functions.py @@ -241,3 +241,153 @@ class IdentityFunction(WarpingFunction): def f_inv(self, z, y=None): return z + + +class LogisticFunction(WarpingFunction): + """ + A sum of logistic functions. + """ + def __init__(self, n_terms=3, initial_y=None): + """ + n_terms specifies the number of logistic terms to be used + """ + self.n_terms = n_terms + self.num_parameters = 3 * self.n_terms + 1 + self.psi = np.ones((self.n_terms, 3)) + super(LogisticFunction, self).__init__(name='warp_logistic') + self.psi = Param('psi', self.psi) + self.psi[:, :2].constrain_positive() + self.d = Param('%s' % ('d'), 1.0, Logexp()) + self.link_parameter(self.psi) + self.link_parameter(self.d) + self.initial_y = initial_y + + def _logistic(self, x): + return 1. / (1 + np.exp(-x)) + + def f(self, y): + """ + Transform y with f using parameter vector psi + psi = [[a,b,c]] + + :math:`f = (y * d) + \\sum_{terms} a * logistic(b *(y + c))` + """ + d = self.d + mpsi = self.psi + z = d * y.copy() + for i in xrange(self.n_terms): + a, b, c = mpsi[i] + z += a * self._logistic(b * (y + c)) + return z + + def f_inv(self, z, max_iterations=100, y=None): + """ + calculate the numerical inverse of f + + :param max_iterations: maximum number of N.R. iterations + """ + z = z.copy() + if y is None: + # The idea here is to initialize y with +1 where + # z is positive and -1 where it is negative. + # For negative z, Newton-Raphson diverges + # if we initialize y with a positive value (and vice-versa). + y = ((z > 0) * 1.) - (z <= 0) + if self.initial_y is not None: + y *= self.initial_y + it = 0 + update = np.inf + while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations): + fy = self.f(y) + fgrady = self.fgrad_y(y) + update = (fy - z) / fgrady + y -= update + it += 1 + if it == max_iterations: + print("WARNING!!! Maximum number of iterations reached in f_inv ") + print("Sum of updates: %.4f" % np.sum(update)) + return y + + def fgrad_y(self, y, return_precalc=False): + """ + Gradient of f w.r.t to y ([N x 1]) + This vectorized version calculates all summation terms + at the same time (since the grad of a sum is the sum of the grads). + + :returns: Nx1 vector of derivatives, unless return_precalc is true, + then it also returns the precomputed stuff + """ + d = self.d + mpsi = self.psi + + # vectorized version + # term = b * (y + c) + term = (mpsi[:, 1] * (y[:, :, None] + mpsi[:, 2])).T + logistic_term = self._logistic(term) + logistic_grad = logistic_term - (logistic_term ** 2) + + #S = (mpsi[:,1] * (y[:,:,None] + mpsi[:,2])).T + #R = np.tanh(S) + #D = 1 - (R ** 2) + + #GRAD = (d + (mpsi[:,0:1][:,:,None] * mpsi[:,1:2][:,:,None] * D).sum(axis=0)).T + grad = (d + (mpsi[:, :1][:, : ,None] * mpsi[:, 1:2][:, :, None] * + logistic_grad).sum(axis=0)).T + + if return_precalc: + return grad, logistic_term, logistic_grad + return grad + + def fgrad_y_psi(self, y, psi): + """ + gradient of f w.r.t to y and psi + + :returns: NxIx4 tensor of partial derivatives + """ + mpsi = self.psi + + grad, l_term, l_grad = self.fgrad_y(y, return_precalc=True) + gradients = np.zeros((y.shape[0], y.shape[1], self.n_terms, 4)) + for i in xrange(self.n_terms): + a, b, c = mpsi[i] + gradients[:, :, i, 0] = b * l_grad[i].T + b2l_term = b - (2 * l_term[i]) + al_grad = a * l_grad[i].T + #gradients[:, :, i, 1] = a * (d[i] - 2.0 * s[i] * r[i] * (1.0/np.cosh(s[i])) ** 2).T + gradients[:, :, i, 1] = (1 + ((y + c) * b2l_term)) * al_grad + #gradients[:, :, i, 2] = (-2.0 * a * (b ** 2) * r[i] * ((1.0 / np.cosh(s[i])) ** 2)).T + gradients[:, :, i, 2] = b * b2l_term * al_grad + gradients[:, :, 0, 3] = 1.0 + + if return_covar_chain: + covar_grad_chain = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4)) + for i in xrange(self.n_terms): + a, b, c = mpsi[i] + covar_grad_chain[:, :, i, 0] = l_term[i].T + al_grad = a * l_grad[i].T + covar_grad_chain[:, :, i, 1] = (y + c) * al_grad + covar_grad_chain[:, :, i, 2] = b * al_grad + covar_grad_chain[:, :, 0, 3] = y + return gradients, covar_grad_chain + + return gradients + + def _get_param_names(self): + variables = ['a', 'b', 'c', 'd'] + names = sum([['warp_logistic_%s_t%i' % (variables[n],q) for n in range(3)] + for q in range(self.n_terms)],[]) + names.append('warp_logistic') + return names + + def update_grads(self, Y_untransformed, Kiy): + grad_y = self.fgrad_y(Y_untransformed) + grad_y_psi, grad_psi = self.fgrad_y_psi(Y_untransformed, + return_covar_chain=True) + djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0) + dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0) + + warping_grads = -dquad_dpsi + djac_dpsi + + self.psi.gradient[:] = warping_grads[:, :-1] + self.d.gradient[:] = warping_grads[0, -1] +