diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py index 419a4104..2e93ddd5 100644 --- a/GPy/models/warped_gp.py +++ b/GPy/models/warped_gp.py @@ -5,6 +5,7 @@ import numpy as np from ..util.warping_functions import * from ..core import GP from .. import likelihoods +from paramz import ObsAr from GPy.util.warping_functions import TanhFunction from GPy import kern @@ -31,6 +32,10 @@ class WarpedGP(GP): GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel) self.link_parameter(self.warping_function) + def set_XY(self, X=None, Y=None): + self.Y_untransformed = Y.copy() + GP.set_XY(self, X, self.transform_data()) + def _scale_data(self, Y): self._Ymax = Y.max() self._Ymin = Y.min() diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py index 13cda810..ab7e2547 100644 --- a/GPy/util/warping_functions.py +++ b/GPy/util/warping_functions.py @@ -59,7 +59,6 @@ class WarpingFunction(Parameterized): raise NotImplementedError def plot(self, xmin, xmax): - psi = self.psi y = np.arange(xmin, xmax, 0.01) f_y = self.f(y) from matplotlib import pyplot as plt @@ -179,6 +178,111 @@ class TanhFunction(WarpingFunction): self.d.gradient[:] = warping_grads[0, -1] +class LogTanhFunction(WarpingFunction): + """ + Trying to get the best of both worlds... + """ + def __init__(self, n_terms=3, initial_y=None): + """ + n_terms specifies the number of tanh terms to be used + """ + self.n_terms = n_terms + self.num_parameters = 3 * self.n_terms + 1 + self.psi = np.ones((self.n_terms, 3)) + super(LogTanhFunction, self).__init__(name='warp_tanh') + self.psi = Param('psi', self.psi) + self.psi[:, :2].constrain_positive() + self.d = Param('%s' % ('d'), 1.0, Logexp()) + self.link_parameter(self.psi) + self.link_parameter(self.d) + self.initial_y = initial_y + + def f(self, y): + """ + Transform y with f using parameter vector psi + psi = [[a,b,c]] + + :math:`f = (y * d) + \\sum_{terms} a * tanh(b *(y + c))` + """ + d = self.d + mpsi = self.psi + z = d * np.log(y) + for i in range(len(mpsi)): + a, b, c = mpsi[i] + z += a * np.tanh(b * (np.log(y) + c)) + return z + + def fgrad_y(self, y, return_precalc=False): + """ + gradient of f w.r.t to y ([N x 1]) + + :returns: Nx1 vector of derivatives, unless return_precalc is true, + then it also returns the precomputed stuff + """ + d = self.d + mpsi = self.psi + + # vectorized version + S = (mpsi[:,1] * (np.log(y)[:,:,None] + mpsi[:,2])).T + R = np.tanh(S) + D = 1 - (R ** 2) + + GRAD = (d + (mpsi[:,0:1][:,:,None] * mpsi[:,1:2][:,:,None] * D).sum(axis=0)).T / y + + if return_precalc: + return GRAD, S, R, D + + return GRAD + + def fgrad_y_psi(self, y, return_covar_chain=False): + """ + gradient of f w.r.t to y and psi + + :returns: NxIx4 tensor of partial derivatives + """ + mpsi = self.psi + + w, s, r, d = self.fgrad_y(y, return_precalc=True) + gradients = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4)) + for i in range(len(mpsi)): + a,b,c = mpsi[i] + gradients[:, :, i, 0] = (b * (1.0/np.cosh(s[i])) ** 2).T / y + gradients[:, :, i, 1] = a * (d[i] - 2.0 * s[i] * r[i] * (1.0/np.cosh(s[i])) ** 2).T / y + gradients[:, :, i, 2] = (-2.0 * a * (b ** 2) * r[i] * ((1.0 / np.cosh(s[i])) ** 2)).T / y + gradients[:, :, 0, 3] = 1.0 / y + + if return_covar_chain: + covar_grad_chain = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4)) + for i in range(len(mpsi)): + a,b,c = mpsi[i] + covar_grad_chain[:, :, i, 0] = (r[i]).T + covar_grad_chain[:, :, i, 1] = (a * (np.log(y) + c) * ((1.0 / np.cosh(s[i])) ** 2).T) + covar_grad_chain[:, :, i, 2] = a * b * ((1.0 / np.cosh(s[i])) ** 2).T + covar_grad_chain[:, :, 0, 3] = np.log(y) + return gradients, covar_grad_chain + + return gradients + + def _get_param_names(self): + variables = ['a', 'b', 'c', 'd'] + names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)] + for q in range(self.n_terms)],[]) + names.append('warp_tanh') + return names + + def update_grads(self, Y_untransformed, Kiy): + grad_y = self.fgrad_y(Y_untransformed) + grad_y_psi, grad_psi = self.fgrad_y_psi(Y_untransformed, + return_covar_chain=True) + djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0) + dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0) + + warping_grads = -dquad_dpsi + djac_dpsi + + self.psi.gradient[:] = warping_grads[:, :-1] + self.d.gradient[:] = warping_grads[0, -1] + + class LogFunction(WarpingFunction): """ Easy wrapper for applying a fixed log warping function to