[warped stuff] plotting and normalizer in warped gps

This commit is contained in:
mzwiessele 2016-08-17 14:51:29 +01:00
parent f50b691ec6
commit d343ec8b41
8 changed files with 112 additions and 78 deletions

View file

@ -14,6 +14,8 @@ from . import testing
from . import kern
from . import plotting
from .util import normalizer
# backwards compatibility
import sys
backwards_compatibility = ['lists_and_dicts', 'observable_array', 'ties_and_remappings', 'index_operations']

View file

@ -2,17 +2,17 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from .. import kern
from GPy.core.model import Model
from paramz import ObsAr
from .model import Model
from .parameterization.variational import VariationalPosterior
from .mapping import Mapping
from .. import likelihoods
from .. import kern
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from GPy.core.parameterization.variational import VariationalPosterior
from ..util.normalizer import Standardize
from paramz import ObsAr
import logging
import warnings
from GPy.util.normalizer import MeanNorm
logger = logging.getLogger("GP")
class GP(Model):
@ -28,7 +28,7 @@ class GP(Model):
:param Norm normalizer:
normalize the outputs Y.
Prediction will be un-normalized using this normalizer.
If normalizer is None, we will normalize using MeanNorm.
If normalizer is None, we will normalize using Standardize.
If normalizer is False, no normalization will be done.
.. Note:: Multiple independent outputs are allowed using columns of Y
@ -49,7 +49,7 @@ class GP(Model):
logger.info("initializing Y")
if normalizer is True:
self.normalizer = MeanNorm()
self.normalizer = Standardize()
elif normalizer is False:
self.normalizer = None
else:
@ -64,6 +64,7 @@ class GP(Model):
self.Y_normalized = self.Y
else:
self.Y = Y
self.Y_normalized = self.Y
if Y.shape[0] != self.num_data:
#There can be cases where we want inputs than outputs, for example if we have multiple latent
@ -248,14 +249,16 @@ class GP(Model):
"""
#predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
if self.normalizer is not None:
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
if include_likelihood:
# now push through likelihood
if likelihood is None:
likelihood = self.likelihood
mu, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
if self.normalizer is not None:
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
return mu, var
def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
@ -300,11 +303,14 @@ class GP(Model):
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
"""
m, v = self._raw_predict(X, full_cov=False, kern=kern)
if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
if likelihood is None:
likelihood = self.likelihood
return likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
quantiles = likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
if self.normalizer is not None:
quantiles = [self.normalizer.inverse_mean(q) for q in quantiles]
return quantiles
def predictive_gradients(self, Xnew, kern=None):
"""

View file

@ -135,9 +135,7 @@ class Bias(Static):
def K(self, X, X2=None):
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
ret = np.empty(shape, dtype=np.float64)
ret[:] = self.variance
return ret
return np.full(shape, self.variance, dtype=np.float64)
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = dL_dK.sum()
@ -146,9 +144,7 @@ class Bias(Static):
self.variance.gradient = dL_dKdiag.sum()
def psi2(self, Z, variational_posterior):
ret = np.empty((Z.shape[0], Z.shape[0]), dtype=np.float64)
ret[:] = self.variance*self.variance*variational_posterior.shape[0]
return ret
return np.full((Z.shape[0], Z.shape[0]), self.variance*self.variance*variational_posterior.shape[0], dtype=np.float64)
def psi2n(self, Z, variational_posterior):
ret = np.empty((variational_posterior.mean.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)

View file

@ -15,7 +15,7 @@ class WarpedGP(GP):
This defines a GP Regression model that applies a
warping function to the output.
"""
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3):
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalizer=False):
if kernel is None:
kernel = kern.RBF(X.shape[1])
if warping_function == None:
@ -23,33 +23,23 @@ class WarpedGP(GP):
self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1) * 1)
else:
self.warping_function = warping_function
self.scale_data = False
if self.scale_data:
Y = self._scale_data(Y)
#self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy()
self.predict_in_warped_space = True
likelihood = likelihoods.Gaussian()
GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel)
super(WarpedGP, self).__init__(X, Y.copy(), likelihood=likelihood, kernel=kernel, normalizer=normalizer)
self.Y_normalized = self.Y_normalized.copy()
self.Y_untransformed = self.Y_normalized.copy()
self.predict_in_warped_space = True
self.link_parameter(self.warping_function)
def set_XY(self, X=None, Y=None):
self.Y_untransformed = Y.copy()
GP.set_XY(self, X, self.transform_data())
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y - self._Ymin) / (self._Ymax - self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
super(WarpedGP, self).set_XY(X, Y)
self.Y_untransformed = self.Y_normalized.copy()
self.update_model(True)
def parameters_changed(self):
"""
Notice that we update the warping function gradients here.
"""
self.Y[:] = self.transform_data()
self.Y_normalized[:] = self.transform_data()
super(WarpedGP, self).parameters_changed()
Kiy = self.posterior.woodbury_vector.flatten()
self.warping_function.update_grads(self.Y_untransformed, Kiy)
@ -96,7 +86,7 @@ class WarpedGP(GP):
deg_gauss_hermite=deg_gauss_hermite)
return arg1 - (arg2 ** 2)
def predict(self, Xnew, which_parts='all', pred_init=None, full_cov=False, Y_metadata=None,
def predict(self, Xnew, kern=None, pred_init=None, Y_metadata=None,
median=False, deg_gauss_hermite=20, likelihood=None):
"""
Prediction results depend on:
@ -104,9 +94,12 @@ class WarpedGP(GP):
- The median flag passed as argument
The likelihood keyword is never used, it is just to follow the plotting API.
"""
mu, var = GP._raw_predict(self, Xnew)
#mu, var = GP._raw_predict(self, Xnew)
# now push through likelihood
mean, var = self.likelihood.predictive_values(mu, var)
#mean, var = self.likelihood.predictive_values(mu, var)
mean, var = super(WarpedGP, self).predict(Xnew, kern=kern, full_cov=False, likelihood=likelihood)
if self.predict_in_warped_space:
std = np.sqrt(var)
@ -120,11 +113,9 @@ class WarpedGP(GP):
else:
wmean = mean
wvar = var
if self.scale_data:
pred = self._unscale_data(pred)
return wmean, wvar
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, likelihood=None, median=False):
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, likelihood=None, kern=None):
"""
Get the predictive quantiles around the prediction at X
@ -135,15 +126,19 @@ class WarpedGP(GP):
:returns: list of quantiles for each X and predictive quantiles for interval combination
:rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)]
"""
m, v = self._raw_predict(X, full_cov=False)
if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
if not self.predict_in_warped_space:
return [a, b]
new_a = self.warping_function.f_inv(a)
new_b = self.warping_function.f_inv(b)
return [new_a, new_b]
qs = super(WarpedGP, self).predict_quantiles(X, quantiles, Y_metadata=Y_metadata, likelihood=likelihood, kern=kern)
if self.predict_in_warped_space:
return [self.warping_function.f_inv(q) for q in qs]
return qs
#m, v = self._raw_predict(X, full_cov=False)
#if self.normalizer is not None:
# m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
#a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
#if not self.predict_in_warped_space:
# return [a, b]
#new_a = self.warping_function.f_inv(a)
#new_b = self.warping_function.f_inv(b)
#return [new_a, new_b]
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
"""

View file

@ -74,9 +74,6 @@ def helper_predict_with_model(self, Xgrid, plot_raw, apply_link, percentiles, wh
if 'output_index' not in predict_kw['Y_metadata']:
predict_kw['Y_metadata']['output_index'] = Xgrid[:,-1:].astype(np.int)
if isinstance(self, WarpedGP) and self.predict_in_warped_space:
predict_kw['median'] = True
mu, _ = self.predict(Xgrid, **predict_kw)
if percentiles is not None:
@ -299,8 +296,10 @@ def get_x_y_var(model):
Y = model.Y.values
except AttributeError:
Y = model.Y
if isinstance(model, WarpedGP) and model.predict_in_warped_space:
Y = model.Y_untransformed
if isinstance(model, WarpedGP) and not model.predict_in_warped_space:
Y = model.Y_normalized
if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
return X, X_variance, Y

View file

@ -91,18 +91,32 @@ class MiscTests(unittest.TestCase):
k = GPy.kern.RBF(1)
m2 = GPy.models.GPRegression(self.X, (Y-mu)/std, kernel=k, normalizer=False)
m2[:] = m[:]
mu1, var1 = m.predict(m.X, full_cov=True)
mu2, var2 = m2.predict(m2.X, full_cov=True)
np.testing.assert_allclose(mu1, (mu2*std)+mu)
np.testing.assert_allclose(var1, var2)
np.testing.assert_allclose(var1, var2*std**2)
mu1, var1 = m.predict(m.X, full_cov=False)
mu2, var2 = m2.predict(m2.X, full_cov=False)
np.testing.assert_allclose(mu1, (mu2*std)+mu)
np.testing.assert_allclose(var1, var2)
np.testing.assert_allclose(var1, var2*std**2)
q50n = m.predict_quantiles(m.X, (50,))
q50 = m2.predict_quantiles(m2.X, (50,))
np.testing.assert_allclose(q50n[0], (q50[0]*std)+mu)
# Test variance component:
qs = np.array([2.5, 97.5])
# The quantiles get computed before unormalization
# And transformed using the mean transformation:
c = np.random.choice(self.X.shape[0])
q95 = m2.predict_quantiles(self.X[[c]], qs)
mu, var = m2.predict(self.X[[c]])
from scipy.stats import norm
np.testing.assert_allclose((mu+(norm.ppf(qs/100.)*np.sqrt(var))).flatten(), np.array(q95).flatten())
def check_jacobian(self):
try:

View file

@ -6,7 +6,7 @@ Created on Aug 27, 2014
import logging
import numpy as np
class Norm(object):
class _Norm(object):
def __init__(self):
pass
def scale_by(self, Y):
@ -18,7 +18,8 @@ class Norm(object):
"""
Project Y into normalized space
"""
raise NotImplementedError
if not self.scaled():
raise AttributeError("Norm object not initialized yet, try calling scale_by(data) first.")
def inverse_mean(self, X):
"""
Project the normalized object X into space of Y
@ -31,15 +32,46 @@ class Norm(object):
Whether this Norm object has been initialized.
"""
raise NotImplementedError
class MeanNorm(Norm):
class Standardize(_Norm):
def __init__(self):
self.mean = None
def scale_by(self, Y):
Y = np.ma.masked_invalid(Y, copy=False)
self.mean = Y.mean(0).view(np.ndarray)
self.std = Y.std(0).view(np.ndarray)
def normalize(self, Y):
return Y-self.mean
super(Standardize, self).normalize(Y)
return (Y-self.mean)/self.std
def inverse_mean(self, X):
return X+self.mean
return (X*self.std)+self.mean
def inverse_variance(self, var):
return (var*(self.std**2))
def scaled(self):
return self.mean is not None
# Inverse variance to be implemented, disabling for now
# If someone in the future want to implement this,
# we need to implement the inverse variance for
# normalization. This means, we need to know the factor
# for the variance to be multiplied to the variance in
# normalized space. This is easy to compute for standardization
# (see above) but gets tricky here.
# class Normalize(_Norm):
# def __init__(self):
# self.ymin = None
# self.ymax = None
# def scale_by(self, Y):
# Y = np.ma.masked_invalid(Y, copy=False)
# self.ymin = Y.min(0).view(np.ndarray)
# self.ymax = Y.max(0).view(np.ndarray)
# def normalize(self, Y):
# super(Normalize, self).normalize(Y)
# return (Y - self.ymin) / (self.ymax - self.ymin) - .5
# def inverse_mean(self, X):
# return (X + .5) * (self.ymax - self.ymin) + self.ymin
# def inverse_variance(self, var):
#
# return (var*(self.std**2))
# def scaled(self):
# return (self.ymin is not None) and (self.ymax is not None)

View file

@ -31,7 +31,7 @@ class WarpingFunction(Parameterized):
"""gradient of f w.r.t to y"""
raise NotImplementedError
def f_inv(self, z, max_iterations=100, y=None):
def f_inv(self, z, max_iterations=250, y=None):
"""
Calculate the numerical inverse of f. This should be
overwritten for specific warping functions where the
@ -51,14 +51,11 @@ class WarpingFunction(Parameterized):
update = (fy - z) / fgrady
y -= self.rate * update
it += 1
if it == max_iterations:
print("WARNING!!! Maximum number of iterations reached in f_inv ")
print("Sum of roots: %.4f" % np.sum(fy - z))
#if it == max_iterations:
# print("WARNING!!! Maximum number of iterations reached in f_inv ")
# print("Sum of roots: %.4f" % np.sum(fy - z))
return y
def _get_param_names(self):
raise NotImplementedError
def plot(self, xmin, xmax):
y = np.arange(xmin, xmax, 0.01)
f_y = self.f(y)
@ -159,13 +156,6 @@ class TanhFunction(WarpingFunction):
return gradients
def _get_param_names(self):
variables = ['a', 'b', 'c', 'd']
names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)]
for q in range(self.n_terms)],[])
names.append('warp_tanh')
return names
def update_grads(self, Y_untransformed, Kiy):
grad_y = self.fgrad_y(Y_untransformed)
grad_y_psi, grad_psi = self.fgrad_y_psi(Y_untransformed,