[warped stuff] plotting and normalizer in warped gps

This commit is contained in:
mzwiessele 2016-08-17 14:51:29 +01:00
parent f50b691ec6
commit d343ec8b41
8 changed files with 112 additions and 78 deletions

View file

@ -14,6 +14,8 @@ from . import testing
from . import kern from . import kern
from . import plotting from . import plotting
from .util import normalizer
# backwards compatibility # backwards compatibility
import sys import sys
backwards_compatibility = ['lists_and_dicts', 'observable_array', 'ties_and_remappings', 'index_operations'] backwards_compatibility = ['lists_and_dicts', 'observable_array', 'ties_and_remappings', 'index_operations']

View file

@ -2,17 +2,17 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from .. import kern from .model import Model
from GPy.core.model import Model from .parameterization.variational import VariationalPosterior
from paramz import ObsAr
from .mapping import Mapping from .mapping import Mapping
from .. import likelihoods from .. import likelihoods
from .. import kern
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from GPy.core.parameterization.variational import VariationalPosterior from ..util.normalizer import Standardize
from paramz import ObsAr
import logging import logging
import warnings import warnings
from GPy.util.normalizer import MeanNorm
logger = logging.getLogger("GP") logger = logging.getLogger("GP")
class GP(Model): class GP(Model):
@ -28,7 +28,7 @@ class GP(Model):
:param Norm normalizer: :param Norm normalizer:
normalize the outputs Y. normalize the outputs Y.
Prediction will be un-normalized using this normalizer. Prediction will be un-normalized using this normalizer.
If normalizer is None, we will normalize using MeanNorm. If normalizer is None, we will normalize using Standardize.
If normalizer is False, no normalization will be done. If normalizer is False, no normalization will be done.
.. Note:: Multiple independent outputs are allowed using columns of Y .. Note:: Multiple independent outputs are allowed using columns of Y
@ -49,7 +49,7 @@ class GP(Model):
logger.info("initializing Y") logger.info("initializing Y")
if normalizer is True: if normalizer is True:
self.normalizer = MeanNorm() self.normalizer = Standardize()
elif normalizer is False: elif normalizer is False:
self.normalizer = None self.normalizer = None
else: else:
@ -64,6 +64,7 @@ class GP(Model):
self.Y_normalized = self.Y self.Y_normalized = self.Y
else: else:
self.Y = Y self.Y = Y
self.Y_normalized = self.Y
if Y.shape[0] != self.num_data: if Y.shape[0] != self.num_data:
#There can be cases where we want inputs than outputs, for example if we have multiple latent #There can be cases where we want inputs than outputs, for example if we have multiple latent
@ -248,14 +249,16 @@ class GP(Model):
""" """
#predict the latent function values #predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern) mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
if self.normalizer is not None:
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
if include_likelihood: if include_likelihood:
# now push through likelihood # now push through likelihood
if likelihood is None: if likelihood is None:
likelihood = self.likelihood likelihood = self.likelihood
mu, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata) mu, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
if self.normalizer is not None:
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
return mu, var return mu, var
def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None): def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
@ -300,11 +303,14 @@ class GP(Model):
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)] :rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
""" """
m, v = self._raw_predict(X, full_cov=False, kern=kern) m, v = self._raw_predict(X, full_cov=False, kern=kern)
if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
if likelihood is None: if likelihood is None:
likelihood = self.likelihood likelihood = self.likelihood
return likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
quantiles = likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
if self.normalizer is not None:
quantiles = [self.normalizer.inverse_mean(q) for q in quantiles]
return quantiles
def predictive_gradients(self, Xnew, kern=None): def predictive_gradients(self, Xnew, kern=None):
""" """

View file

@ -135,9 +135,7 @@ class Bias(Static):
def K(self, X, X2=None): def K(self, X, X2=None):
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0]) shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
ret = np.empty(shape, dtype=np.float64) return np.full(shape, self.variance, dtype=np.float64)
ret[:] = self.variance
return ret
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = dL_dK.sum() self.variance.gradient = dL_dK.sum()
@ -146,9 +144,7 @@ class Bias(Static):
self.variance.gradient = dL_dKdiag.sum() self.variance.gradient = dL_dKdiag.sum()
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
ret = np.empty((Z.shape[0], Z.shape[0]), dtype=np.float64) return np.full((Z.shape[0], Z.shape[0]), self.variance*self.variance*variational_posterior.shape[0], dtype=np.float64)
ret[:] = self.variance*self.variance*variational_posterior.shape[0]
return ret
def psi2n(self, Z, variational_posterior): def psi2n(self, Z, variational_posterior):
ret = np.empty((variational_posterior.mean.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) ret = np.empty((variational_posterior.mean.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)

View file

@ -15,7 +15,7 @@ class WarpedGP(GP):
This defines a GP Regression model that applies a This defines a GP Regression model that applies a
warping function to the output. warping function to the output.
""" """
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3): def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalizer=False):
if kernel is None: if kernel is None:
kernel = kern.RBF(X.shape[1]) kernel = kern.RBF(X.shape[1])
if warping_function == None: if warping_function == None:
@ -23,33 +23,23 @@ class WarpedGP(GP):
self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1) * 1) self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1) * 1)
else: else:
self.warping_function = warping_function self.warping_function = warping_function
self.scale_data = False
if self.scale_data:
Y = self._scale_data(Y)
#self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy()
self.predict_in_warped_space = True
likelihood = likelihoods.Gaussian() likelihood = likelihoods.Gaussian()
GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel) super(WarpedGP, self).__init__(X, Y.copy(), likelihood=likelihood, kernel=kernel, normalizer=normalizer)
self.Y_normalized = self.Y_normalized.copy()
self.Y_untransformed = self.Y_normalized.copy()
self.predict_in_warped_space = True
self.link_parameter(self.warping_function) self.link_parameter(self.warping_function)
def set_XY(self, X=None, Y=None): def set_XY(self, X=None, Y=None):
self.Y_untransformed = Y.copy() super(WarpedGP, self).set_XY(X, Y)
GP.set_XY(self, X, self.transform_data()) self.Y_untransformed = self.Y_normalized.copy()
self.update_model(True)
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y - self._Ymin) / (self._Ymax - self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
def parameters_changed(self): def parameters_changed(self):
""" """
Notice that we update the warping function gradients here. Notice that we update the warping function gradients here.
""" """
self.Y[:] = self.transform_data() self.Y_normalized[:] = self.transform_data()
super(WarpedGP, self).parameters_changed() super(WarpedGP, self).parameters_changed()
Kiy = self.posterior.woodbury_vector.flatten() Kiy = self.posterior.woodbury_vector.flatten()
self.warping_function.update_grads(self.Y_untransformed, Kiy) self.warping_function.update_grads(self.Y_untransformed, Kiy)
@ -96,7 +86,7 @@ class WarpedGP(GP):
deg_gauss_hermite=deg_gauss_hermite) deg_gauss_hermite=deg_gauss_hermite)
return arg1 - (arg2 ** 2) return arg1 - (arg2 ** 2)
def predict(self, Xnew, which_parts='all', pred_init=None, full_cov=False, Y_metadata=None, def predict(self, Xnew, kern=None, pred_init=None, Y_metadata=None,
median=False, deg_gauss_hermite=20, likelihood=None): median=False, deg_gauss_hermite=20, likelihood=None):
""" """
Prediction results depend on: Prediction results depend on:
@ -104,9 +94,12 @@ class WarpedGP(GP):
- The median flag passed as argument - The median flag passed as argument
The likelihood keyword is never used, it is just to follow the plotting API. The likelihood keyword is never used, it is just to follow the plotting API.
""" """
mu, var = GP._raw_predict(self, Xnew) #mu, var = GP._raw_predict(self, Xnew)
# now push through likelihood # now push through likelihood
mean, var = self.likelihood.predictive_values(mu, var) #mean, var = self.likelihood.predictive_values(mu, var)
mean, var = super(WarpedGP, self).predict(Xnew, kern=kern, full_cov=False, likelihood=likelihood)
if self.predict_in_warped_space: if self.predict_in_warped_space:
std = np.sqrt(var) std = np.sqrt(var)
@ -120,11 +113,9 @@ class WarpedGP(GP):
else: else:
wmean = mean wmean = mean
wvar = var wvar = var
if self.scale_data:
pred = self._unscale_data(pred)
return wmean, wvar return wmean, wvar
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, likelihood=None, median=False): def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, likelihood=None, kern=None):
""" """
Get the predictive quantiles around the prediction at X Get the predictive quantiles around the prediction at X
@ -135,15 +126,19 @@ class WarpedGP(GP):
:returns: list of quantiles for each X and predictive quantiles for interval combination :returns: list of quantiles for each X and predictive quantiles for interval combination
:rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)] :rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)]
""" """
m, v = self._raw_predict(X, full_cov=False) qs = super(WarpedGP, self).predict_quantiles(X, quantiles, Y_metadata=Y_metadata, likelihood=likelihood, kern=kern)
if self.normalizer is not None: if self.predict_in_warped_space:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v) return [self.warping_function.f_inv(q) for q in qs]
a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata) return qs
if not self.predict_in_warped_space: #m, v = self._raw_predict(X, full_cov=False)
return [a, b] #if self.normalizer is not None:
new_a = self.warping_function.f_inv(a) # m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
new_b = self.warping_function.f_inv(b) #a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
return [new_a, new_b] #if not self.predict_in_warped_space:
# return [a, b]
#new_a = self.warping_function.f_inv(a)
#new_b = self.warping_function.f_inv(b)
#return [new_a, new_b]
def log_predictive_density(self, x_test, y_test, Y_metadata=None): def log_predictive_density(self, x_test, y_test, Y_metadata=None):
""" """

View file

@ -74,9 +74,6 @@ def helper_predict_with_model(self, Xgrid, plot_raw, apply_link, percentiles, wh
if 'output_index' not in predict_kw['Y_metadata']: if 'output_index' not in predict_kw['Y_metadata']:
predict_kw['Y_metadata']['output_index'] = Xgrid[:,-1:].astype(np.int) predict_kw['Y_metadata']['output_index'] = Xgrid[:,-1:].astype(np.int)
if isinstance(self, WarpedGP) and self.predict_in_warped_space:
predict_kw['median'] = True
mu, _ = self.predict(Xgrid, **predict_kw) mu, _ = self.predict(Xgrid, **predict_kw)
if percentiles is not None: if percentiles is not None:
@ -299,8 +296,10 @@ def get_x_y_var(model):
Y = model.Y.values Y = model.Y.values
except AttributeError: except AttributeError:
Y = model.Y Y = model.Y
if isinstance(model, WarpedGP) and model.predict_in_warped_space:
Y = model.Y_untransformed if isinstance(model, WarpedGP) and not model.predict_in_warped_space:
Y = model.Y_normalized
if sparse.issparse(Y): Y = Y.todense().view(np.ndarray) if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
return X, X_variance, Y return X, X_variance, Y

View file

@ -91,18 +91,32 @@ class MiscTests(unittest.TestCase):
k = GPy.kern.RBF(1) k = GPy.kern.RBF(1)
m2 = GPy.models.GPRegression(self.X, (Y-mu)/std, kernel=k, normalizer=False) m2 = GPy.models.GPRegression(self.X, (Y-mu)/std, kernel=k, normalizer=False)
m2[:] = m[:] m2[:] = m[:]
mu1, var1 = m.predict(m.X, full_cov=True) mu1, var1 = m.predict(m.X, full_cov=True)
mu2, var2 = m2.predict(m2.X, full_cov=True) mu2, var2 = m2.predict(m2.X, full_cov=True)
np.testing.assert_allclose(mu1, (mu2*std)+mu) np.testing.assert_allclose(mu1, (mu2*std)+mu)
np.testing.assert_allclose(var1, var2) np.testing.assert_allclose(var1, var2*std**2)
mu1, var1 = m.predict(m.X, full_cov=False) mu1, var1 = m.predict(m.X, full_cov=False)
mu2, var2 = m2.predict(m2.X, full_cov=False) mu2, var2 = m2.predict(m2.X, full_cov=False)
np.testing.assert_allclose(mu1, (mu2*std)+mu) np.testing.assert_allclose(mu1, (mu2*std)+mu)
np.testing.assert_allclose(var1, var2) np.testing.assert_allclose(var1, var2*std**2)
q50n = m.predict_quantiles(m.X, (50,)) q50n = m.predict_quantiles(m.X, (50,))
q50 = m2.predict_quantiles(m2.X, (50,)) q50 = m2.predict_quantiles(m2.X, (50,))
np.testing.assert_allclose(q50n[0], (q50[0]*std)+mu) np.testing.assert_allclose(q50n[0], (q50[0]*std)+mu)
# Test variance component:
qs = np.array([2.5, 97.5])
# The quantiles get computed before unormalization
# And transformed using the mean transformation:
c = np.random.choice(self.X.shape[0])
q95 = m2.predict_quantiles(self.X[[c]], qs)
mu, var = m2.predict(self.X[[c]])
from scipy.stats import norm
np.testing.assert_allclose((mu+(norm.ppf(qs/100.)*np.sqrt(var))).flatten(), np.array(q95).flatten())
def check_jacobian(self): def check_jacobian(self):
try: try:

View file

@ -6,7 +6,7 @@ Created on Aug 27, 2014
import logging import logging
import numpy as np import numpy as np
class Norm(object): class _Norm(object):
def __init__(self): def __init__(self):
pass pass
def scale_by(self, Y): def scale_by(self, Y):
@ -18,7 +18,8 @@ class Norm(object):
""" """
Project Y into normalized space Project Y into normalized space
""" """
raise NotImplementedError if not self.scaled():
raise AttributeError("Norm object not initialized yet, try calling scale_by(data) first.")
def inverse_mean(self, X): def inverse_mean(self, X):
""" """
Project the normalized object X into space of Y Project the normalized object X into space of Y
@ -31,15 +32,46 @@ class Norm(object):
Whether this Norm object has been initialized. Whether this Norm object has been initialized.
""" """
raise NotImplementedError raise NotImplementedError
class MeanNorm(Norm):
class Standardize(_Norm):
def __init__(self): def __init__(self):
self.mean = None self.mean = None
def scale_by(self, Y): def scale_by(self, Y):
Y = np.ma.masked_invalid(Y, copy=False) Y = np.ma.masked_invalid(Y, copy=False)
self.mean = Y.mean(0).view(np.ndarray) self.mean = Y.mean(0).view(np.ndarray)
self.std = Y.std(0).view(np.ndarray)
def normalize(self, Y): def normalize(self, Y):
return Y-self.mean super(Standardize, self).normalize(Y)
return (Y-self.mean)/self.std
def inverse_mean(self, X): def inverse_mean(self, X):
return X+self.mean return (X*self.std)+self.mean
def inverse_variance(self, var):
return (var*(self.std**2))
def scaled(self): def scaled(self):
return self.mean is not None return self.mean is not None
# Inverse variance to be implemented, disabling for now
# If someone in the future want to implement this,
# we need to implement the inverse variance for
# normalization. This means, we need to know the factor
# for the variance to be multiplied to the variance in
# normalized space. This is easy to compute for standardization
# (see above) but gets tricky here.
# class Normalize(_Norm):
# def __init__(self):
# self.ymin = None
# self.ymax = None
# def scale_by(self, Y):
# Y = np.ma.masked_invalid(Y, copy=False)
# self.ymin = Y.min(0).view(np.ndarray)
# self.ymax = Y.max(0).view(np.ndarray)
# def normalize(self, Y):
# super(Normalize, self).normalize(Y)
# return (Y - self.ymin) / (self.ymax - self.ymin) - .5
# def inverse_mean(self, X):
# return (X + .5) * (self.ymax - self.ymin) + self.ymin
# def inverse_variance(self, var):
#
# return (var*(self.std**2))
# def scaled(self):
# return (self.ymin is not None) and (self.ymax is not None)

View file

@ -31,7 +31,7 @@ class WarpingFunction(Parameterized):
"""gradient of f w.r.t to y""" """gradient of f w.r.t to y"""
raise NotImplementedError raise NotImplementedError
def f_inv(self, z, max_iterations=100, y=None): def f_inv(self, z, max_iterations=250, y=None):
""" """
Calculate the numerical inverse of f. This should be Calculate the numerical inverse of f. This should be
overwritten for specific warping functions where the overwritten for specific warping functions where the
@ -51,14 +51,11 @@ class WarpingFunction(Parameterized):
update = (fy - z) / fgrady update = (fy - z) / fgrady
y -= self.rate * update y -= self.rate * update
it += 1 it += 1
if it == max_iterations: #if it == max_iterations:
print("WARNING!!! Maximum number of iterations reached in f_inv ") # print("WARNING!!! Maximum number of iterations reached in f_inv ")
print("Sum of roots: %.4f" % np.sum(fy - z)) # print("Sum of roots: %.4f" % np.sum(fy - z))
return y return y
def _get_param_names(self):
raise NotImplementedError
def plot(self, xmin, xmax): def plot(self, xmin, xmax):
y = np.arange(xmin, xmax, 0.01) y = np.arange(xmin, xmax, 0.01)
f_y = self.f(y) f_y = self.f(y)
@ -159,13 +156,6 @@ class TanhFunction(WarpingFunction):
return gradients return gradients
def _get_param_names(self):
variables = ['a', 'b', 'c', 'd']
names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)]
for q in range(self.n_terms)],[])
names.append('warp_tanh')
return names
def update_grads(self, Y_untransformed, Kiy): def update_grads(self, Y_untransformed, Kiy):
grad_y = self.fgrad_y(Y_untransformed) grad_y = self.fgrad_y(Y_untransformed)
grad_y_psi, grad_psi = self.fgrad_y_psi(Y_untransformed, grad_y_psi, grad_psi = self.fgrad_y_psi(Y_untransformed,