From c22167153085b9534a8b335de6b28c06bbc4a84b Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Thu, 30 Jul 2015 17:05:03 +0100 Subject: [PATCH 01/14] added a small test for warped gps --- GPy/testing/model_tests.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index ce78ee88..f609f378 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -203,6 +203,13 @@ class MiscTests(unittest.TestCase): m.optimize() print(m) + def test_warped_gp(self): + k = GPy.kern.RBF(1) + m = GPy.models.WarpedGP(self.X, self.Y, kernel=k) + m.randomize() + m.optimize() + print(m) + class GradientTests(np.testing.TestCase): def setUp(self): ###################################### From 7a7d563576c2d98fc33acee0ac8ba9c61fde6176 Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Thu, 30 Jul 2015 17:09:31 +0100 Subject: [PATCH 02/14] commented out has_uncertain_inputs in warped gps since it breaks plotting --- GPy/models/warped_gp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py index 5bc9a417..b5654053 100644 --- a/GPy/models/warped_gp.py +++ b/GPy/models/warped_gp.py @@ -23,7 +23,7 @@ class WarpedGP(GP): self.scale_data = False if self.scale_data: Y = self._scale_data(Y) - self.has_uncertain_inputs = False + #self.has_uncertain_inputs = False self.Y_untransformed = Y.copy() self.predict_in_warped_space = False likelihood = likelihoods.Gaussian() From 45321f536bc93b34524d0c5737fd25e73bfdcaba Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Thu, 30 Jul 2015 17:19:16 +0100 Subject: [PATCH 03/14] added keywords to predict in warped gps because they are used in the plotting method --- GPy/models/warped_gp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py index b5654053..55825be0 100644 --- a/GPy/models/warped_gp.py +++ b/GPy/models/warped_gp.py @@ -69,7 +69,7 @@ class WarpedGP(GP): def plot_warping(self): self.warping_function.plot(self.Y_untransformed.min(), self.Y_untransformed.max()) - def predict(self, Xnew, which_parts='all', pred_init=None): + def predict(self, Xnew, which_parts='all', pred_init=None, Y_metadata=None, full_cov=False): # normalize X values # Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale mu, var = GP._raw_predict(self, Xnew) From 00a209f4cb91cd40557c78142443e43ef78ddbed Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Thu, 30 Jul 2015 17:33:42 +0100 Subject: [PATCH 04/14] added predict_quantiles method to warped gps --- GPy/models/warped_gp.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py index 55825be0..eec37e48 100644 --- a/GPy/models/warped_gp.py +++ b/GPy/models/warped_gp.py @@ -69,7 +69,7 @@ class WarpedGP(GP): def plot_warping(self): self.warping_function.plot(self.Y_untransformed.min(), self.Y_untransformed.max()) - def predict(self, Xnew, which_parts='all', pred_init=None, Y_metadata=None, full_cov=False): + def predict(self, Xnew, which_parts='all', pred_init=None, full_cov=False, Y_metadata=None): # normalize X values # Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale mu, var = GP._raw_predict(self, Xnew) @@ -86,6 +86,27 @@ class WarpedGP(GP): return mean, var + def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None): + """ + Get the predictive quantiles around the prediction at X + + :param X: The points at which to make a prediction + :type X: np.ndarray (Xnew x self.input_dim) + :param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval + :type quantiles: tuple + :returns: list of quantiles for each X and predictive quantiles for interval combination + :rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)] + """ + m, v = self._raw_predict(X, full_cov=False) + if self.normalizer is not None: + m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v) + a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata) + #print a.shape + new_a = self.warping_function.f_inv(a) + new_b = self.warping_function.f_inv(b) + return [new_a, new_b] + #return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata) + if __name__ == '__main__': X = np.random.randn(100, 1) Y = np.sin(X) + np.random.randn(100, 1)*0.05 From cf4c5af48764762cf141b901fe4fef7f1b9c00b0 Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Thu, 30 Jul 2015 17:38:21 +0100 Subject: [PATCH 05/14] if the plot is a warped gp, then it plots Y before warping --- GPy/plotting/matplot_dep/models_plots.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 9f841372..9ad166b5 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -10,6 +10,7 @@ import numpy as np from base_plots import gpplot, x_frame1D, x_frame2D from ...models.gp_coregionalized_regression import GPCoregionalizedRegression from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression +from ...models.warped_gp import WarpedGP from scipy import sparse from ...core.parameterization.variational import VariationalPosterior @@ -73,6 +74,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', else: X = model.X Y = model.Y + + if isinstance(model, WarpedGP): + Y = model.Y_untransformed + if sparse.issparse(Y): Y = Y.todense().view(np.ndarray) if hasattr(model, 'Z'): Z = model.Z From 3aa563d5bad488d3e577ca3c457d2519e0255f21 Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Tue, 4 Aug 2015 14:22:46 +0100 Subject: [PATCH 06/14] first try in implementing warped mean --- GPy/models/warped_gp.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py index eec37e48..540b6cb2 100644 --- a/GPy/models/warped_gp.py +++ b/GPy/models/warped_gp.py @@ -25,7 +25,7 @@ class WarpedGP(GP): Y = self._scale_data(Y) #self.has_uncertain_inputs = False self.Y_untransformed = Y.copy() - self.predict_in_warped_space = False + self.predict_in_warped_space = True likelihood = likelihoods.Gaussian() GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel) @@ -69,7 +69,19 @@ class WarpedGP(GP): def plot_warping(self): self.warping_function.plot(self.Y_untransformed.min(), self.Y_untransformed.max()) - def predict(self, Xnew, which_parts='all', pred_init=None, full_cov=False, Y_metadata=None): + def _get_warped_mean(self, mean, var, pred_init=None, deg_gauss_hermite=100): + """ + Calculate the warped mean by using Gauss-Hermite quadrature. + """ + gh_samples, gh_weights = np.polynomial.hermite.hermgauss(deg_gauss_hermite) + gh_samples = gh_samples[:,None] + gh_weights = gh_weights[None,:] + arg1 = gh_samples.dot(var.T) * np.sqrt(2) + arg2 = np.ones(shape=gh_samples.shape).dot(mean.T) + return gh_weights.dot(self.warping_function.f_inv(arg1 + arg2, y=pred_init)) / np.sqrt(np.pi) + + def predict(self, Xnew, which_parts='all', pred_init=None, full_cov=False, Y_metadata=None, + median=False, deg_gauss_hermite=100): # normalize X values # Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale mu, var = GP._raw_predict(self, Xnew) @@ -78,13 +90,17 @@ class WarpedGP(GP): mean, var = self.likelihood.predictive_values(mu, var) if self.predict_in_warped_space: - mean = self.warping_function.f_inv(mean, y=pred_init) + if median: + pred = self.warping_function.f_inv(mean, y=pred_init) + else: + pred = self._get_warped_mean(mean, var, pred_init=pred_init, + deg_gauss_hermite=deg_gauss_hermite).T var = self.warping_function.f_inv(var) if self.scale_data: - mean = self._unscale_data(mean) + pred = self._unscale_data(pred) - return mean, var + return pred, var def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None): """ From 2426580d4d2f98a0b8fcb92208ed91330acc466f Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Tue, 4 Aug 2015 18:46:10 +0100 Subject: [PATCH 07/14] implemented variance using gauss-hermite --- GPy/models/warped_gp.py | 36 ++++++++++++++++++++++++++++------- GPy/util/warping_functions.py | 1 + 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py index 540b6cb2..0be3d2fe 100644 --- a/GPy/models/warped_gp.py +++ b/GPy/models/warped_gp.py @@ -64,11 +64,17 @@ class WarpedGP(GP): def log_likelihood(self): ll = GP.log_likelihood(self) jacobian = self.warping_function.fgrad_y(self.Y_untransformed) + print np.log(jacobian) return ll + np.log(jacobian).sum() def plot_warping(self): self.warping_function.plot(self.Y_untransformed.min(), self.Y_untransformed.max()) + def _get_warped_term(self, mean, var, gh_samples, pred_init=None): + arg1 = gh_samples.dot(var.T) * np.sqrt(2) + arg2 = np.ones(shape=gh_samples.shape).dot(mean.T) + return self.warping_function.f_inv(arg1 + arg2, y=pred_init) + def _get_warped_mean(self, mean, var, pred_init=None, deg_gauss_hermite=100): """ Calculate the warped mean by using Gauss-Hermite quadrature. @@ -76,9 +82,17 @@ class WarpedGP(GP): gh_samples, gh_weights = np.polynomial.hermite.hermgauss(deg_gauss_hermite) gh_samples = gh_samples[:,None] gh_weights = gh_weights[None,:] - arg1 = gh_samples.dot(var.T) * np.sqrt(2) - arg2 = np.ones(shape=gh_samples.shape).dot(mean.T) - return gh_weights.dot(self.warping_function.f_inv(arg1 + arg2, y=pred_init)) / np.sqrt(np.pi) + return gh_weights.dot(self._get_warped_term(mean, var, gh_samples)) / np.sqrt(np.pi) + + def _get_warped_variance(self, mean, var, pred_init=None, deg_gauss_hermite=100): + gh_samples, gh_weights = np.polynomial.hermite.hermgauss(deg_gauss_hermite) + gh_samples = gh_samples[:,None] + gh_weights = gh_weights[None,:] + arg1 = gh_weights.dot(self._get_warped_term(mean, var, gh_samples, + pred_init=pred_init) ** 2) / np.sqrt(np.pi) + arg2 = self._get_warped_mean(mean, var, pred_init=pred_init, + deg_gauss_hermite=deg_gauss_hermite) + return arg1 - (arg2 ** 2) def predict(self, Xnew, which_parts='all', pred_init=None, full_cov=False, Y_metadata=None, median=False, deg_gauss_hermite=100): @@ -91,16 +105,24 @@ class WarpedGP(GP): if self.predict_in_warped_space: if median: - pred = self.warping_function.f_inv(mean, y=pred_init) + #print 'MEDIAN!' + wmean = self.warping_function.f_inv(mean, y=pred_init) else: - pred = self._get_warped_mean(mean, var, pred_init=pred_init, + #print 'MEAN!' + wmean = self._get_warped_mean(mean, var, pred_init=pred_init, + deg_gauss_hermite=deg_gauss_hermite).T + #var = self.warping_function.f_inv(var) + wvar = self._get_warped_variance(mean, var, pred_init=pred_init, deg_gauss_hermite=deg_gauss_hermite).T - var = self.warping_function.f_inv(var) + else: + wmean = mean + #wvar = var + wvar = self.warping_function.f_inv(var) if self.scale_data: pred = self._unscale_data(pred) - return pred, var + return wmean, wvar def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None): """ diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py index a56f8c27..37f4ef3e 100644 --- a/GPy/util/warping_functions.py +++ b/GPy/util/warping_functions.py @@ -45,6 +45,7 @@ class WarpingFunction(Parameterized): plt.xlabel('y') plt.ylabel('f(y)') plt.title('warping function') + plt.show() class TanhWarpingFunction(WarpingFunction): From 7fd8c9c556d9bc42d2623433b4c3f454e0baaf1e Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Wed, 5 Aug 2015 10:35:15 +0100 Subject: [PATCH 08/14] added identity warp function and change np.argmin to np.nanargmin in optimize_restarts --- GPy/core/model.py | 2 +- GPy/util/warping_functions.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 9d8b89f4..cf45ee3c 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -104,7 +104,7 @@ class Model(Parameterized): raise e if len(self.optimization_runs): - i = np.argmin([o.f_opt for o in self.optimization_runs]) + i = np.nanargmin([o.f_opt for o in self.optimization_runs]) self.optimizer_array = self.optimization_runs[i].x_opt else: self.optimizer_array = initial_parameters diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py index 37f4ef3e..00e24c4b 100644 --- a/GPy/util/warping_functions.py +++ b/GPy/util/warping_functions.py @@ -293,3 +293,25 @@ class TanhWarpingFunction_d(WarpingFunction): names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)] for q in range(self.n_terms)],[]) names.append('warp_tanh_d') return names + + +class IdentityFunction(WarpingFunction): + """ + Identity warping function. This is for testing and sanity check purposes + and should not be used in practice. + """ + def __init__(self): + self.num_parameters = 0 + super(IdentityFunction, self).__init__(name='identity') + + def f(self, y): + return y + + def fgrad_y(self, y): + return 1.0 + + def fgrad_y_psi(self,y): + return 1.0 + + def f_inv(self,z): + return z From 76bc0bec254f84aa7ffc5a4ef930b38b33aa385f Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Wed, 5 Aug 2015 10:50:59 +0100 Subject: [PATCH 09/14] added initial test for warped gps using identity function --- GPy/models/warped_gp.py | 1 - GPy/testing/model_tests.py | 3 ++- GPy/util/warping_functions.py | 17 +++++++++++++---- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py index 0be3d2fe..1ce0e92b 100644 --- a/GPy/models/warped_gp.py +++ b/GPy/models/warped_gp.py @@ -64,7 +64,6 @@ class WarpedGP(GP): def log_likelihood(self): ll = GP.log_likelihood(self) jacobian = self.warping_function.fgrad_y(self.Y_untransformed) - print np.log(jacobian) return ll + np.log(jacobian).sum() def plot_warping(self): diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index f609f378..feec7211 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -205,7 +205,8 @@ class MiscTests(unittest.TestCase): def test_warped_gp(self): k = GPy.kern.RBF(1) - m = GPy.models.WarpedGP(self.X, self.Y, kernel=k) + warp = GPy.util.warping_functions.IdentityFunction() + m = GPy.models.WarpedGP(self.X, self.Y, kernel=k, warping_function=warp) m.randomize() m.optimize() print(m) diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py index 00e24c4b..a3a282c8 100644 --- a/GPy/util/warping_functions.py +++ b/GPy/util/warping_functions.py @@ -301,17 +301,26 @@ class IdentityFunction(WarpingFunction): and should not be used in practice. """ def __init__(self): - self.num_parameters = 0 + self.num_parameters = 4 + self.psi = Param('psi', np.zeros((1,3))) + self.d = Param('%s' % ('d'), 1.0, Logexp()) super(IdentityFunction, self).__init__(name='identity') + self.link_parameter(self.psi) + self.link_parameter(self.d) + def f(self, y): return y def fgrad_y(self, y): - return 1.0 + return np.ones(y.shape) - def fgrad_y_psi(self,y): - return 1.0 + def fgrad_y_psi(self, y, return_covar_chain=False): + gradients = np.ones((y.shape[0], y.shape[1], len(self.psi), 4)) + if return_covar_chain: + return gradients, gradients + return gradients + def f_inv(self,z): return z From 995de0f3992eb2bbc2c31b289f8d02149068dcaa Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Wed, 5 Aug 2015 19:05:48 +0100 Subject: [PATCH 10/14] added a new test which tries to replicate Snelson's toy 1D but NR seems to diverge... --- GPy/models/warped_gp.py | 27 ++++++----- GPy/plotting/matplot_dep/models_plots.py | 8 +++- GPy/testing/model_tests.py | 60 ++++++++++++++++++++++-- GPy/util/warping_functions.py | 18 ++++--- 4 files changed, 88 insertions(+), 25 deletions(-) diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py index 1ce0e92b..77ded72d 100644 --- a/GPy/models/warped_gp.py +++ b/GPy/models/warped_gp.py @@ -69,27 +69,27 @@ class WarpedGP(GP): def plot_warping(self): self.warping_function.plot(self.Y_untransformed.min(), self.Y_untransformed.max()) - def _get_warped_term(self, mean, var, gh_samples, pred_init=None): - arg1 = gh_samples.dot(var.T) * np.sqrt(2) + def _get_warped_term(self, mean, std, gh_samples, pred_init=None): + arg1 = gh_samples.dot(std.T) * np.sqrt(2) arg2 = np.ones(shape=gh_samples.shape).dot(mean.T) return self.warping_function.f_inv(arg1 + arg2, y=pred_init) - def _get_warped_mean(self, mean, var, pred_init=None, deg_gauss_hermite=100): + def _get_warped_mean(self, mean, std, pred_init=None, deg_gauss_hermite=100): """ Calculate the warped mean by using Gauss-Hermite quadrature. """ gh_samples, gh_weights = np.polynomial.hermite.hermgauss(deg_gauss_hermite) gh_samples = gh_samples[:,None] gh_weights = gh_weights[None,:] - return gh_weights.dot(self._get_warped_term(mean, var, gh_samples)) / np.sqrt(np.pi) + return gh_weights.dot(self._get_warped_term(mean, std, gh_samples)) / np.sqrt(np.pi) - def _get_warped_variance(self, mean, var, pred_init=None, deg_gauss_hermite=100): + def _get_warped_variance(self, mean, std, pred_init=None, deg_gauss_hermite=100): gh_samples, gh_weights = np.polynomial.hermite.hermgauss(deg_gauss_hermite) gh_samples = gh_samples[:,None] gh_weights = gh_weights[None,:] - arg1 = gh_weights.dot(self._get_warped_term(mean, var, gh_samples, + arg1 = gh_weights.dot(self._get_warped_term(mean, std, gh_samples, pred_init=pred_init) ** 2) / np.sqrt(np.pi) - arg2 = self._get_warped_mean(mean, var, pred_init=pred_init, + arg2 = self._get_warped_mean(mean, std, pred_init=pred_init, deg_gauss_hermite=deg_gauss_hermite) return arg1 - (arg2 ** 2) @@ -103,20 +103,20 @@ class WarpedGP(GP): mean, var = self.likelihood.predictive_values(mu, var) if self.predict_in_warped_space: + std = np.sqrt(var) if median: #print 'MEDIAN!' - wmean = self.warping_function.f_inv(mean, y=pred_init) + wmean = self.warping_function.f_inv(mean, y=pred_init) else: #print 'MEAN!' - wmean = self._get_warped_mean(mean, var, pred_init=pred_init, + wmean = self._get_warped_mean(mean, std, pred_init=pred_init, deg_gauss_hermite=deg_gauss_hermite).T #var = self.warping_function.f_inv(var) - wvar = self._get_warped_variance(mean, var, pred_init=pred_init, + wvar = self._get_warped_variance(mean, std, pred_init=pred_init, deg_gauss_hermite=deg_gauss_hermite).T else: wmean = mean - #wvar = var - wvar = self.warping_function.f_inv(var) + wvar = var if self.scale_data: pred = self._unscale_data(pred) @@ -138,9 +138,12 @@ class WarpedGP(GP): if self.normalizer is not None: m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v) a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata) + if not self.predict_in_warped_space: + return [a, b] #print a.shape new_a = self.warping_function.f_inv(a) new_b = self.warping_function.f_inv(b) + return [new_a, new_b] #return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata) diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 9ad166b5..e3bef675 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -75,7 +75,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', X = model.X Y = model.Y - if isinstance(model, WarpedGP): + if isinstance(model, WarpedGP) and model.predict_in_warped_space: Y = model.Y_untransformed if sparse.issparse(Y): Y = Y.todense().view(np.ndarray) @@ -117,7 +117,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', Y_metadata = {'output_index': extra_data} else: Y_metadata['output_index'] = extra_data - m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw) + if isinstance(model, WarpedGP): + m, v = model.predict(Xgrid, full_cov=False, median=True, Y_metadata=Y_metadata, **predict_kw) + #print np.concatenate((Xgrid, m), axis=1) + else: + m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw) lower, upper = model.predict_quantiles(Xgrid, Y_metadata=Y_metadata) diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index feec7211..ca2dce07 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -203,13 +203,63 @@ class MiscTests(unittest.TestCase): m.optimize() print(m) - def test_warped_gp(self): + def test_warped_gp_identity(self): + """ + A WarpedGP with the identity warping function should be + equal to a standard GP. + """ k = GPy.kern.RBF(1) - warp = GPy.util.warping_functions.IdentityFunction() - m = GPy.models.WarpedGP(self.X, self.Y, kernel=k, warping_function=warp) - m.randomize() + m = GPy.models.GPRegression(self.X, self.Y, kernel=k) m.optimize() - print(m) + preds = m.predict(self.X) + + warp_k = GPy.kern.RBF(1) + warp_f = GPy.util.warping_functions.IdentityFunction() + warp_m = GPy.models.WarpedGP(self.X, self.Y, kernel=warp_k, warping_function=warp_f) + warp_m.optimize() + warp_preds = warp_m.predict(self.X) + + np.testing.assert_almost_equal(preds, warp_preds) + + @unittest.skip('Comment this to plot the modified sine function') + def test_warped_gp_sine(self): + """ + A test replicating the sine regression problem from + Snelson's paper. + """ + X = (2 * np.pi) * np.random.random(151) - np.pi + Y = np.sin(X) + np.random.normal(0,0.1,151) + Y = np.exp(Y) - 5 + #Y = np.array([np.power(abs(y),float(1)/3) * (1,-1)[y<0] for y in Y]) + 0 + + #np.seterr(over='raise') + import matplotlib.pyplot as plt + warp_k = GPy.kern.RBF(1) + warp_f = GPy.util.warping_functions.TanhWarpingFunction_d(n_terms=2) + warp_m = GPy.models.WarpedGP(X[:, None], Y[:, None], kernel=warp_k, warping_function=warp_f) + #warp_m['.*variance.*'].constrain_fixed(0.25) + #warp_m['.*lengthscale.*'].constrain_fixed(1) + #warp_m['warp_tanh.d'].constrain_fixed(1) + #warp_m.randomize() + #warp_m['.*warp_tanh.psi*'][:,0:2].constrain_bounded(0,100) + #warp_m['.*warp_tanh.psi*'][:,0:1].constrain_fixed(1) + + #print(warp_m.checkgrad()) + #warp_m.plot() + #plt.show() + + warp_m.optimize_restarts(parallel=True, robust=True) + #print(warp_m.checkgrad()) + print(warp_m) + print(warp_m['.*warp.*']) + warp_m.predict_in_warped_space = False + warp_m.plot() + warp_m.predict_in_warped_space = True + warp_m.plot() + warp_f.plot(X.min()-10, X.max()+10) + plt.show() + + class GradientTests(np.testing.TestCase): def setUp(self): diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py index a3a282c8..88e688b8 100644 --- a/GPy/util/warping_functions.py +++ b/GPy/util/warping_functions.py @@ -211,17 +211,24 @@ class TanhWarpingFunction_d(WarpingFunction): z = z.copy() if y is None: - y = np.ones_like(z) + y = np.ones_like(z) * 0.1 + #y = np.zeros_like(z) it = 0 update = np.inf + #import ipdb; ipdb.set_trace() while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations): - update = (self.f(y) - z)/self.fgrad_y(y) + fy = self.f(y) + fgrady = self.fgrad_y(y) + update = (fy - z)/fgrady y -= update it += 1 + #print it + #print y if it == max_iterations: print("WARNING!!! Maximum number of iterations reached in f_inv ") + #print np.abs(update) return y @@ -265,7 +272,7 @@ class TanhWarpingFunction_d(WarpingFunction): mpsi = self.psi w, s, r, d = self.fgrad_y(y, return_precalc = True) - + #print s gradients = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4)) for i in range(len(mpsi)): a,b,c = mpsi[i] @@ -316,11 +323,10 @@ class IdentityFunction(WarpingFunction): return np.ones(y.shape) def fgrad_y_psi(self, y, return_covar_chain=False): - gradients = np.ones((y.shape[0], y.shape[1], len(self.psi), 4)) + gradients = np.zeros((y.shape[0], y.shape[1], len(self.psi), 4)) if return_covar_chain: return gradients, gradients return gradients - - def f_inv(self,z): + def f_inv(self, z, y=None): return z From 96441382b95bab88c252328a717211494eabfc08 Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Wed, 5 Aug 2015 19:15:34 +0100 Subject: [PATCH 11/14] code cleaning on warping_functions --- GPy/util/warping_functions.py | 70 ++++++++++------------------------- 1 file changed, 19 insertions(+), 51 deletions(-) diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py index 88e688b8..b8b9a261 100644 --- a/GPy/util/warping_functions.py +++ b/GPy/util/warping_functions.py @@ -5,6 +5,7 @@ import numpy as np from ..core.parameterization import Parameterized, Param from ..core.parameterization.transformations import Logexp + class WarpingFunction(Parameterized): """ abstract function for warping @@ -14,28 +15,28 @@ class WarpingFunction(Parameterized): def __init__(self, name): super(WarpingFunction, self).__init__(name=name) - def f(self,y,psi): + def f(self, y, psi): """function transformation - y is a list of values (GP training data) of shpape [N,1] + y is a list of values (GP training data) of shape [N, 1] """ raise NotImplementedError - def fgrad_y(self,y,psi): + def fgrad_y(self, y, psi): """gradient of f w.r.t to y""" raise NotImplementedError - def fgrad_y_psi(self,y,psi): + def fgrad_y_psi(self, y, psi): """gradient of f w.r.t to y""" raise NotImplementedError - def f_inv(self,z,psi): + def f_inv(self, z, psi): """inverse function transformation""" raise NotImplementedError def _get_param_names(self): raise NotImplementedError - def plot(self, xmin, xmax): + def plot(self, xmin, xmax): psi = self.psi y = np.arange(xmin, xmax, 0.01) f_y = self.f(y) @@ -47,22 +48,21 @@ class WarpingFunction(Parameterized): plt.title('warping function') plt.show() + class TanhWarpingFunction(WarpingFunction): - def __init__(self,n_terms=3): + def __init__(self, n_terms=3): """n_terms specifies the number of tanh terms to be used""" self.n_terms = n_terms self.num_parameters = 3 * self.n_terms super(TanhWarpingFunction, self).__init__(name='warp_tanh') - def f(self,y,psi): + def f(self, y, psi): """ transform y with f using parameter vector psi psi = [[a,b,c]] ::math::`f = \\sum_{terms} a * tanh(b*(y+c))` - """ - #1. check that number of params is consistent assert psi.shape[0] == self.n_terms, 'inconsistent parameter dimensions' assert psi.shape[1] == 3, 'inconsistent parameter dimensions' @@ -77,25 +77,19 @@ class TanhWarpingFunction(WarpingFunction): z += a*np.tanh(b*(y+c)) return z - - def f_inv(self, y, psi, iterations = 10): + def f_inv(self, y, psi, iterations=10): """ calculate the numerical inverse of f - :param iterations: number of N.R. iterations - """ y = y.copy() z = np.ones_like(y) - for i in range(iterations): z -= (self.f(z, psi) - y)/self.fgrad_y(z,psi) - return z - - def fgrad_y(self, y, psi, return_precalc = False): + def fgrad_y(self, y, psi, return_precalc=False): """ gradient of f w.r.t to y ([N x 1]) returns: Nx1 vector of derivatives, unless return_precalc is true, @@ -118,16 +112,12 @@ class TanhWarpingFunction(WarpingFunction): # return GRAD,S.sum(axis=1),R.sum(axis=1),D.sum(axis=1) return GRAD, S, R, D - return GRAD - - def fgrad_y_psi(self, y, psi, return_covar_chain = False): + def fgrad_y_psi(self, y, psi, return_covar_chain=False): """ gradient of f w.r.t to y and psi - returns: NxIx3 tensor of partial derivatives - """ # 1. exponentiate the a and b (positive!) @@ -141,7 +131,6 @@ class TanhWarpingFunction(WarpingFunction): gradients[:,:,i,1] = a*(d[i] - 2.0*s[i]*r[i]*(1.0/np.cosh(s[i]))**2).T gradients[:,:,i,2] = (-2.0*a*(b**2)*r[i]*((1.0/np.cosh(s[i]))**2)).T - if return_covar_chain: covar_grad_chain = np.zeros((y.shape[0], y.shape[1], len(mpsi), 3)) @@ -163,7 +152,7 @@ class TanhWarpingFunction(WarpingFunction): class TanhWarpingFunction_d(WarpingFunction): - def __init__(self,n_terms=3): + def __init__(self, n_terms=3): """n_terms specifies the number of tanh terms to be used""" self.n_terms = n_terms self.num_parameters = 3 * self.n_terms + 1 @@ -177,15 +166,13 @@ class TanhWarpingFunction_d(WarpingFunction): self.link_parameter(self.psi) self.link_parameter(self.d) - - def f(self,y): + def f(self, y): """ Transform y with f using parameter vector psi psi = [[a,b,c]] :math:`f = \\sum_{terms} a * tanh(b*(y+c))` """ - #1. check that number of params is consistent # assert psi.shape[0] == self.n_terms, 'inconsistent parameter dimensions' # assert psi.shape[1] == 4, 'inconsistent parameter dimensions' @@ -200,23 +187,19 @@ class TanhWarpingFunction_d(WarpingFunction): z += a*np.tanh(b*(y+c)) return z - def f_inv(self, z, max_iterations=1000, y=None): """ calculate the numerical inverse of f :param max_iterations: maximum number of N.R. iterations - """ z = z.copy() if y is None: - y = np.ones_like(z) * 0.1 - #y = np.zeros_like(z) + y = np.ones_like(z) it = 0 update = np.inf - #import ipdb; ipdb.set_trace() while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations): fy = self.f(y) @@ -224,29 +207,20 @@ class TanhWarpingFunction_d(WarpingFunction): update = (fy - z)/fgrady y -= update it += 1 - #print it - #print y if it == max_iterations: print("WARNING!!! Maximum number of iterations reached in f_inv ") - #print np.abs(update) - return y - - def fgrad_y(self, y,return_precalc = False): + def fgrad_y(self, y, return_precalc=False): """ gradient of f w.r.t to y ([N x 1]) :returns: Nx1 vector of derivatives, unless return_precalc is true, then it also returns the precomputed stuff - """ - - d = self.d mpsi = self.psi # vectorized version - S = (mpsi[:,1]*(y[:,:,None] + mpsi[:,2])).T R = np.tanh(S) D = 1-R**2 @@ -256,23 +230,17 @@ class TanhWarpingFunction_d(WarpingFunction): if return_precalc: return GRAD, S, R, D - return GRAD - - def fgrad_y_psi(self, y, return_covar_chain = False): + def fgrad_y_psi(self, y, return_covar_chain=False): """ gradient of f w.r.t to y and psi :returns: NxIx4 tensor of partial derivatives - """ - - mpsi = self.psi - w, s, r, d = self.fgrad_y(y, return_precalc = True) - #print s + w, s, r, d = self.fgrad_y(y, return_precalc=True) gradients = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4)) for i in range(len(mpsi)): a,b,c = mpsi[i] From 20a1e8d3c7d9641a244159ab12867112046babcc Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Wed, 5 Aug 2015 19:18:25 +0100 Subject: [PATCH 12/14] some cleaning on WarpedGP code --- GPy/models/warped_gp.py | 10 +++++----- GPy/testing/model_tests.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py index 77ded72d..06bc85cb 100644 --- a/GPy/models/warped_gp.py +++ b/GPy/models/warped_gp.py @@ -16,7 +16,7 @@ class WarpedGP(GP): if warping_function == None: self.warping_function = TanhWarpingFunction_d(warping_terms) - self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1,) * 1) + self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1) * 1) else: self.warping_function = warping_function @@ -56,7 +56,6 @@ class WarpedGP(GP): self.warping_function.psi.gradient[:] = warping_grads[:, :-1] self.warping_function.d.gradient[:] = warping_grads[0, -1] - def transform_data(self): Y = self.warping_function.f(self.Y_untransformed.copy()).copy() return Y @@ -84,6 +83,9 @@ class WarpedGP(GP): return gh_weights.dot(self._get_warped_term(mean, std, gh_samples)) / np.sqrt(np.pi) def _get_warped_variance(self, mean, std, pred_init=None, deg_gauss_hermite=100): + """ + Calculate the warped variance by using Gauss-Hermite quadrature. + """ gh_samples, gh_weights = np.polynomial.hermite.hermgauss(deg_gauss_hermite) gh_samples = gh_samples[:,None] gh_weights = gh_weights[None,:] @@ -105,13 +107,10 @@ class WarpedGP(GP): if self.predict_in_warped_space: std = np.sqrt(var) if median: - #print 'MEDIAN!' wmean = self.warping_function.f_inv(mean, y=pred_init) else: - #print 'MEAN!' wmean = self._get_warped_mean(mean, std, pred_init=pred_init, deg_gauss_hermite=deg_gauss_hermite).T - #var = self.warping_function.f_inv(var) wvar = self._get_warped_variance(mean, std, pred_init=pred_init, deg_gauss_hermite=deg_gauss_hermite).T else: @@ -147,6 +146,7 @@ class WarpedGP(GP): return [new_a, new_b] #return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata) + if __name__ == '__main__': X = np.random.randn(100, 1) Y = np.sin(X) + np.random.randn(100, 1)*0.05 diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index ca2dce07..03bb0bb5 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -221,7 +221,7 @@ class MiscTests(unittest.TestCase): np.testing.assert_almost_equal(preds, warp_preds) - @unittest.skip('Comment this to plot the modified sine function') + #@unittest.skip('Comment this to plot the modified sine function') def test_warped_gp_sine(self): """ A test replicating the sine regression problem from From 4dd2f4feb724a62f0482395e52be66e3ab7fffdf Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Wed, 5 Aug 2015 19:18:57 +0100 Subject: [PATCH 13/14] commenting plotting test --- GPy/testing/model_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 03bb0bb5..ca2dce07 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -221,7 +221,7 @@ class MiscTests(unittest.TestCase): np.testing.assert_almost_equal(preds, warp_preds) - #@unittest.skip('Comment this to plot the modified sine function') + @unittest.skip('Comment this to plot the modified sine function') def test_warped_gp_sine(self): """ A test replicating the sine regression problem from From a350e74b317bae9c28467b2c42d952c71954c4da Mon Sep 17 00:00:00 2001 From: beckdaniel Date: Sat, 26 Sep 2015 13:30:23 +0100 Subject: [PATCH 14/14] merged master --- GPy/models/warped_gp.py | 1 + GPy/plotting/matplot_dep/models_plots.py | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py index 06bc85cb..df947d3e 100644 --- a/GPy/models/warped_gp.py +++ b/GPy/models/warped_gp.py @@ -137,6 +137,7 @@ class WarpedGP(GP): if self.normalizer is not None: m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v) a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata) + #return [a, b] if not self.predict_in_warped_space: return [a, b] #print a.shape diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 0986c20e..8fe0b2c2 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -46,6 +46,8 @@ def plot_data(model, which_data_rows='all', #data X = model.X Y = model.Y + if isinstance(model, WarpedGP) and model.predict_in_warped_space: + Y = model.Y_untransformed #work out what the inputs are for plotting (1D or 2D) if visible_dims is None: @@ -184,11 +186,12 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', if isinstance(model, WarpedGP): m, v = model.predict(Xgrid, full_cov=False, median=True, Y_metadata=Y_metadata, **predict_kw) + lower, upper = model.predict_quantiles(Xgrid) #print np.concatenate((Xgrid, m), axis=1) else: m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw) - fmu, fv = model._raw_predict(Xgrid, full_cov=False, **predict_kw) - lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=Y_metadata) + fmu, fv = model._raw_predict(Xgrid, full_cov=False, **predict_kw) + lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=Y_metadata)