diff --git a/GPy/__init__.py b/GPy/__init__.py index 1dc3e6b1..9ee3d935 100644 --- a/GPy/__init__.py +++ b/GPy/__init__.py @@ -14,6 +14,8 @@ from . import testing from . import kern from . import plotting +from .util import normalizer + # backwards compatibility import sys backwards_compatibility = ['lists_and_dicts', 'observable_array', 'ties_and_remappings', 'index_operations'] diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 9cbe3daf..f0910d9d 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -2,17 +2,17 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from .. import kern -from GPy.core.model import Model -from paramz import ObsAr +from .model import Model +from .parameterization.variational import VariationalPosterior from .mapping import Mapping from .. import likelihoods +from .. import kern from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation -from GPy.core.parameterization.variational import VariationalPosterior +from ..util.normalizer import Standardize +from paramz import ObsAr import logging import warnings -from GPy.util.normalizer import MeanNorm logger = logging.getLogger("GP") class GP(Model): @@ -28,7 +28,7 @@ class GP(Model): :param Norm normalizer: normalize the outputs Y. Prediction will be un-normalized using this normalizer. - If normalizer is None, we will normalize using MeanNorm. + If normalizer is None, we will normalize using Standardize. If normalizer is False, no normalization will be done. .. Note:: Multiple independent outputs are allowed using columns of Y @@ -49,7 +49,7 @@ class GP(Model): logger.info("initializing Y") if normalizer is True: - self.normalizer = MeanNorm() + self.normalizer = Standardize() elif normalizer is False: self.normalizer = None else: @@ -64,6 +64,7 @@ class GP(Model): self.Y_normalized = self.Y else: self.Y = Y + self.Y_normalized = self.Y if Y.shape[0] != self.num_data: #There can be cases where we want inputs than outputs, for example if we have multiple latent @@ -248,14 +249,16 @@ class GP(Model): """ #predict the latent function values mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern) - if self.normalizer is not None: - mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var) if include_likelihood: # now push through likelihood if likelihood is None: likelihood = self.likelihood mu, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata) + + if self.normalizer is not None: + mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var) + return mu, var def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None): @@ -300,11 +303,14 @@ class GP(Model): :rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)] """ m, v = self._raw_predict(X, full_cov=False, kern=kern) - if self.normalizer is not None: - m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v) if likelihood is None: likelihood = self.likelihood - return likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata) + + quantiles = likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata) + + if self.normalizer is not None: + quantiles = [self.normalizer.inverse_mean(q) for q in quantiles] + return quantiles def predictive_gradients(self, Xnew, kern=None): """ diff --git a/GPy/kern/src/static.py b/GPy/kern/src/static.py index 5cf4a1c9..0ffd8112 100644 --- a/GPy/kern/src/static.py +++ b/GPy/kern/src/static.py @@ -135,9 +135,7 @@ class Bias(Static): def K(self, X, X2=None): shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0]) - ret = np.empty(shape, dtype=np.float64) - ret[:] = self.variance - return ret + return np.full(shape, self.variance, dtype=np.float64) def update_gradients_full(self, dL_dK, X, X2=None): self.variance.gradient = dL_dK.sum() @@ -146,9 +144,7 @@ class Bias(Static): self.variance.gradient = dL_dKdiag.sum() def psi2(self, Z, variational_posterior): - ret = np.empty((Z.shape[0], Z.shape[0]), dtype=np.float64) - ret[:] = self.variance*self.variance*variational_posterior.shape[0] - return ret + return np.full((Z.shape[0], Z.shape[0]), self.variance*self.variance*variational_posterior.shape[0], dtype=np.float64) def psi2n(self, Z, variational_posterior): ret = np.empty((variational_posterior.mean.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py index aa9e8c90..a24401fe 100644 --- a/GPy/models/warped_gp.py +++ b/GPy/models/warped_gp.py @@ -15,7 +15,7 @@ class WarpedGP(GP): This defines a GP Regression model that applies a warping function to the output. """ - def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3): + def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalizer=False): if kernel is None: kernel = kern.RBF(X.shape[1]) if warping_function == None: @@ -23,33 +23,23 @@ class WarpedGP(GP): self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1) * 1) else: self.warping_function = warping_function - self.scale_data = False - if self.scale_data: - Y = self._scale_data(Y) - #self.has_uncertain_inputs = False - self.Y_untransformed = Y.copy() - self.predict_in_warped_space = True likelihood = likelihoods.Gaussian() - GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel) + super(WarpedGP, self).__init__(X, Y.copy(), likelihood=likelihood, kernel=kernel, normalizer=normalizer) + self.Y_normalized = self.Y_normalized.copy() + self.Y_untransformed = self.Y_normalized.copy() + self.predict_in_warped_space = True self.link_parameter(self.warping_function) def set_XY(self, X=None, Y=None): - self.Y_untransformed = Y.copy() - GP.set_XY(self, X, self.transform_data()) - - def _scale_data(self, Y): - self._Ymax = Y.max() - self._Ymin = Y.min() - return (Y - self._Ymin) / (self._Ymax - self._Ymin) - 0.5 - - def _unscale_data(self, Y): - return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin + super(WarpedGP, self).set_XY(X, Y) + self.Y_untransformed = self.Y_normalized.copy() + self.update_model(True) def parameters_changed(self): """ Notice that we update the warping function gradients here. """ - self.Y[:] = self.transform_data() + self.Y_normalized[:] = self.transform_data() super(WarpedGP, self).parameters_changed() Kiy = self.posterior.woodbury_vector.flatten() self.warping_function.update_grads(self.Y_untransformed, Kiy) @@ -96,7 +86,7 @@ class WarpedGP(GP): deg_gauss_hermite=deg_gauss_hermite) return arg1 - (arg2 ** 2) - def predict(self, Xnew, which_parts='all', pred_init=None, full_cov=False, Y_metadata=None, + def predict(self, Xnew, kern=None, pred_init=None, Y_metadata=None, median=False, deg_gauss_hermite=20, likelihood=None): """ Prediction results depend on: @@ -104,9 +94,12 @@ class WarpedGP(GP): - The median flag passed as argument The likelihood keyword is never used, it is just to follow the plotting API. """ - mu, var = GP._raw_predict(self, Xnew) + #mu, var = GP._raw_predict(self, Xnew) # now push through likelihood - mean, var = self.likelihood.predictive_values(mu, var) + #mean, var = self.likelihood.predictive_values(mu, var) + + mean, var = super(WarpedGP, self).predict(Xnew, kern=kern, full_cov=False, likelihood=likelihood) + if self.predict_in_warped_space: std = np.sqrt(var) @@ -120,11 +113,9 @@ class WarpedGP(GP): else: wmean = mean wvar = var - if self.scale_data: - pred = self._unscale_data(pred) return wmean, wvar - def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, likelihood=None, median=False): + def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, likelihood=None, kern=None): """ Get the predictive quantiles around the prediction at X @@ -135,15 +126,19 @@ class WarpedGP(GP): :returns: list of quantiles for each X and predictive quantiles for interval combination :rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)] """ - m, v = self._raw_predict(X, full_cov=False) - if self.normalizer is not None: - m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v) - a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata) - if not self.predict_in_warped_space: - return [a, b] - new_a = self.warping_function.f_inv(a) - new_b = self.warping_function.f_inv(b) - return [new_a, new_b] + qs = super(WarpedGP, self).predict_quantiles(X, quantiles, Y_metadata=Y_metadata, likelihood=likelihood, kern=kern) + if self.predict_in_warped_space: + return [self.warping_function.f_inv(q) for q in qs] + return qs + #m, v = self._raw_predict(X, full_cov=False) + #if self.normalizer is not None: + # m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v) + #a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata) + #if not self.predict_in_warped_space: + # return [a, b] + #new_a = self.warping_function.f_inv(a) + #new_b = self.warping_function.f_inv(b) + #return [new_a, new_b] def log_predictive_density(self, x_test, y_test, Y_metadata=None): """ diff --git a/GPy/plotting/gpy_plot/plot_util.py b/GPy/plotting/gpy_plot/plot_util.py index 937f35ab..7bd1723f 100644 --- a/GPy/plotting/gpy_plot/plot_util.py +++ b/GPy/plotting/gpy_plot/plot_util.py @@ -74,9 +74,6 @@ def helper_predict_with_model(self, Xgrid, plot_raw, apply_link, percentiles, wh if 'output_index' not in predict_kw['Y_metadata']: predict_kw['Y_metadata']['output_index'] = Xgrid[:,-1:].astype(np.int) - if isinstance(self, WarpedGP) and self.predict_in_warped_space: - predict_kw['median'] = True - mu, _ = self.predict(Xgrid, **predict_kw) if percentiles is not None: @@ -299,8 +296,10 @@ def get_x_y_var(model): Y = model.Y.values except AttributeError: Y = model.Y - if isinstance(model, WarpedGP) and model.predict_in_warped_space: - Y = model.Y_untransformed + + if isinstance(model, WarpedGP) and not model.predict_in_warped_space: + Y = model.Y_normalized + if sparse.issparse(Y): Y = Y.todense().view(np.ndarray) return X, X_variance, Y diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 010befe4..1e0731ab 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -91,18 +91,32 @@ class MiscTests(unittest.TestCase): k = GPy.kern.RBF(1) m2 = GPy.models.GPRegression(self.X, (Y-mu)/std, kernel=k, normalizer=False) m2[:] = m[:] + mu1, var1 = m.predict(m.X, full_cov=True) mu2, var2 = m2.predict(m2.X, full_cov=True) np.testing.assert_allclose(mu1, (mu2*std)+mu) - np.testing.assert_allclose(var1, var2) + np.testing.assert_allclose(var1, var2*std**2) + mu1, var1 = m.predict(m.X, full_cov=False) mu2, var2 = m2.predict(m2.X, full_cov=False) + np.testing.assert_allclose(mu1, (mu2*std)+mu) - np.testing.assert_allclose(var1, var2) + np.testing.assert_allclose(var1, var2*std**2) q50n = m.predict_quantiles(m.X, (50,)) q50 = m2.predict_quantiles(m2.X, (50,)) + np.testing.assert_allclose(q50n[0], (q50[0]*std)+mu) + + # Test variance component: + qs = np.array([2.5, 97.5]) + # The quantiles get computed before unormalization + # And transformed using the mean transformation: + c = np.random.choice(self.X.shape[0]) + q95 = m2.predict_quantiles(self.X[[c]], qs) + mu, var = m2.predict(self.X[[c]]) + from scipy.stats import norm + np.testing.assert_allclose((mu+(norm.ppf(qs/100.)*np.sqrt(var))).flatten(), np.array(q95).flatten()) def check_jacobian(self): try: diff --git a/GPy/util/normalizer.py b/GPy/util/normalizer.py index 854726c4..78ad945d 100644 --- a/GPy/util/normalizer.py +++ b/GPy/util/normalizer.py @@ -6,7 +6,7 @@ Created on Aug 27, 2014 import logging import numpy as np -class Norm(object): +class _Norm(object): def __init__(self): pass def scale_by(self, Y): @@ -18,7 +18,8 @@ class Norm(object): """ Project Y into normalized space """ - raise NotImplementedError + if not self.scaled(): + raise AttributeError("Norm object not initialized yet, try calling scale_by(data) first.") def inverse_mean(self, X): """ Project the normalized object X into space of Y @@ -31,15 +32,46 @@ class Norm(object): Whether this Norm object has been initialized. """ raise NotImplementedError -class MeanNorm(Norm): + +class Standardize(_Norm): def __init__(self): self.mean = None def scale_by(self, Y): Y = np.ma.masked_invalid(Y, copy=False) self.mean = Y.mean(0).view(np.ndarray) + self.std = Y.std(0).view(np.ndarray) def normalize(self, Y): - return Y-self.mean + super(Standardize, self).normalize(Y) + return (Y-self.mean)/self.std def inverse_mean(self, X): - return X+self.mean + return (X*self.std)+self.mean + def inverse_variance(self, var): + return (var*(self.std**2)) def scaled(self): return self.mean is not None + +# Inverse variance to be implemented, disabling for now +# If someone in the future want to implement this, +# we need to implement the inverse variance for +# normalization. This means, we need to know the factor +# for the variance to be multiplied to the variance in +# normalized space. This is easy to compute for standardization +# (see above) but gets tricky here. +# class Normalize(_Norm): +# def __init__(self): +# self.ymin = None +# self.ymax = None +# def scale_by(self, Y): +# Y = np.ma.masked_invalid(Y, copy=False) +# self.ymin = Y.min(0).view(np.ndarray) +# self.ymax = Y.max(0).view(np.ndarray) +# def normalize(self, Y): +# super(Normalize, self).normalize(Y) +# return (Y - self.ymin) / (self.ymax - self.ymin) - .5 +# def inverse_mean(self, X): +# return (X + .5) * (self.ymax - self.ymin) + self.ymin +# def inverse_variance(self, var): +# +# return (var*(self.std**2)) +# def scaled(self): +# return (self.ymin is not None) and (self.ymax is not None) \ No newline at end of file diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py index 6b8b73e9..3af28d43 100644 --- a/GPy/util/warping_functions.py +++ b/GPy/util/warping_functions.py @@ -31,7 +31,7 @@ class WarpingFunction(Parameterized): """gradient of f w.r.t to y""" raise NotImplementedError - def f_inv(self, z, max_iterations=100, y=None): + def f_inv(self, z, max_iterations=250, y=None): """ Calculate the numerical inverse of f. This should be overwritten for specific warping functions where the @@ -51,14 +51,11 @@ class WarpingFunction(Parameterized): update = (fy - z) / fgrady y -= self.rate * update it += 1 - if it == max_iterations: - print("WARNING!!! Maximum number of iterations reached in f_inv ") - print("Sum of roots: %.4f" % np.sum(fy - z)) + #if it == max_iterations: + # print("WARNING!!! Maximum number of iterations reached in f_inv ") + # print("Sum of roots: %.4f" % np.sum(fy - z)) return y - def _get_param_names(self): - raise NotImplementedError - def plot(self, xmin, xmax): y = np.arange(xmin, xmax, 0.01) f_y = self.f(y) @@ -159,13 +156,6 @@ class TanhFunction(WarpingFunction): return gradients - def _get_param_names(self): - variables = ['a', 'b', 'c', 'd'] - names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)] - for q in range(self.n_terms)],[]) - names.append('warp_tanh') - return names - def update_grads(self, Y_untransformed, Kiy): grad_y = self.fgrad_y(Y_untransformed) grad_y_psi, grad_psi = self.fgrad_y_psi(Y_untransformed,