mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-15 06:52:39 +02:00
[warped stuff] plotting and normalizer in warped gps
This commit is contained in:
parent
f50b691ec6
commit
d343ec8b41
8 changed files with 112 additions and 78 deletions
|
|
@ -14,6 +14,8 @@ from . import testing
|
|||
from . import kern
|
||||
from . import plotting
|
||||
|
||||
from .util import normalizer
|
||||
|
||||
# backwards compatibility
|
||||
import sys
|
||||
backwards_compatibility = ['lists_and_dicts', 'observable_array', 'ties_and_remappings', 'index_operations']
|
||||
|
|
|
|||
|
|
@ -2,17 +2,17 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from .. import kern
|
||||
from GPy.core.model import Model
|
||||
from paramz import ObsAr
|
||||
from .model import Model
|
||||
from .parameterization.variational import VariationalPosterior
|
||||
from .mapping import Mapping
|
||||
from .. import likelihoods
|
||||
from .. import kern
|
||||
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
|
||||
from GPy.core.parameterization.variational import VariationalPosterior
|
||||
from ..util.normalizer import Standardize
|
||||
from paramz import ObsAr
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from GPy.util.normalizer import MeanNorm
|
||||
logger = logging.getLogger("GP")
|
||||
|
||||
class GP(Model):
|
||||
|
|
@ -28,7 +28,7 @@ class GP(Model):
|
|||
:param Norm normalizer:
|
||||
normalize the outputs Y.
|
||||
Prediction will be un-normalized using this normalizer.
|
||||
If normalizer is None, we will normalize using MeanNorm.
|
||||
If normalizer is None, we will normalize using Standardize.
|
||||
If normalizer is False, no normalization will be done.
|
||||
|
||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||
|
|
@ -49,7 +49,7 @@ class GP(Model):
|
|||
logger.info("initializing Y")
|
||||
|
||||
if normalizer is True:
|
||||
self.normalizer = MeanNorm()
|
||||
self.normalizer = Standardize()
|
||||
elif normalizer is False:
|
||||
self.normalizer = None
|
||||
else:
|
||||
|
|
@ -64,6 +64,7 @@ class GP(Model):
|
|||
self.Y_normalized = self.Y
|
||||
else:
|
||||
self.Y = Y
|
||||
self.Y_normalized = self.Y
|
||||
|
||||
if Y.shape[0] != self.num_data:
|
||||
#There can be cases where we want inputs than outputs, for example if we have multiple latent
|
||||
|
|
@ -248,14 +249,16 @@ class GP(Model):
|
|||
"""
|
||||
#predict the latent function values
|
||||
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
|
||||
if self.normalizer is not None:
|
||||
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
|
||||
|
||||
if include_likelihood:
|
||||
# now push through likelihood
|
||||
if likelihood is None:
|
||||
likelihood = self.likelihood
|
||||
mu, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
|
||||
|
||||
if self.normalizer is not None:
|
||||
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
|
||||
|
||||
return mu, var
|
||||
|
||||
def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
|
||||
|
|
@ -300,11 +303,14 @@ class GP(Model):
|
|||
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
|
||||
"""
|
||||
m, v = self._raw_predict(X, full_cov=False, kern=kern)
|
||||
if self.normalizer is not None:
|
||||
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
|
||||
if likelihood is None:
|
||||
likelihood = self.likelihood
|
||||
return likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
|
||||
|
||||
quantiles = likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
|
||||
|
||||
if self.normalizer is not None:
|
||||
quantiles = [self.normalizer.inverse_mean(q) for q in quantiles]
|
||||
return quantiles
|
||||
|
||||
def predictive_gradients(self, Xnew, kern=None):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -135,9 +135,7 @@ class Bias(Static):
|
|||
|
||||
def K(self, X, X2=None):
|
||||
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
|
||||
ret = np.empty(shape, dtype=np.float64)
|
||||
ret[:] = self.variance
|
||||
return ret
|
||||
return np.full(shape, self.variance, dtype=np.float64)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
self.variance.gradient = dL_dK.sum()
|
||||
|
|
@ -146,9 +144,7 @@ class Bias(Static):
|
|||
self.variance.gradient = dL_dKdiag.sum()
|
||||
|
||||
def psi2(self, Z, variational_posterior):
|
||||
ret = np.empty((Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
ret[:] = self.variance*self.variance*variational_posterior.shape[0]
|
||||
return ret
|
||||
return np.full((Z.shape[0], Z.shape[0]), self.variance*self.variance*variational_posterior.shape[0], dtype=np.float64)
|
||||
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
ret = np.empty((variational_posterior.mean.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class WarpedGP(GP):
|
|||
This defines a GP Regression model that applies a
|
||||
warping function to the output.
|
||||
"""
|
||||
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3):
|
||||
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalizer=False):
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(X.shape[1])
|
||||
if warping_function == None:
|
||||
|
|
@ -23,33 +23,23 @@ class WarpedGP(GP):
|
|||
self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1) * 1)
|
||||
else:
|
||||
self.warping_function = warping_function
|
||||
self.scale_data = False
|
||||
if self.scale_data:
|
||||
Y = self._scale_data(Y)
|
||||
#self.has_uncertain_inputs = False
|
||||
self.Y_untransformed = Y.copy()
|
||||
self.predict_in_warped_space = True
|
||||
likelihood = likelihoods.Gaussian()
|
||||
GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel)
|
||||
super(WarpedGP, self).__init__(X, Y.copy(), likelihood=likelihood, kernel=kernel, normalizer=normalizer)
|
||||
self.Y_normalized = self.Y_normalized.copy()
|
||||
self.Y_untransformed = self.Y_normalized.copy()
|
||||
self.predict_in_warped_space = True
|
||||
self.link_parameter(self.warping_function)
|
||||
|
||||
def set_XY(self, X=None, Y=None):
|
||||
self.Y_untransformed = Y.copy()
|
||||
GP.set_XY(self, X, self.transform_data())
|
||||
|
||||
def _scale_data(self, Y):
|
||||
self._Ymax = Y.max()
|
||||
self._Ymin = Y.min()
|
||||
return (Y - self._Ymin) / (self._Ymax - self._Ymin) - 0.5
|
||||
|
||||
def _unscale_data(self, Y):
|
||||
return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
|
||||
super(WarpedGP, self).set_XY(X, Y)
|
||||
self.Y_untransformed = self.Y_normalized.copy()
|
||||
self.update_model(True)
|
||||
|
||||
def parameters_changed(self):
|
||||
"""
|
||||
Notice that we update the warping function gradients here.
|
||||
"""
|
||||
self.Y[:] = self.transform_data()
|
||||
self.Y_normalized[:] = self.transform_data()
|
||||
super(WarpedGP, self).parameters_changed()
|
||||
Kiy = self.posterior.woodbury_vector.flatten()
|
||||
self.warping_function.update_grads(self.Y_untransformed, Kiy)
|
||||
|
|
@ -96,7 +86,7 @@ class WarpedGP(GP):
|
|||
deg_gauss_hermite=deg_gauss_hermite)
|
||||
return arg1 - (arg2 ** 2)
|
||||
|
||||
def predict(self, Xnew, which_parts='all', pred_init=None, full_cov=False, Y_metadata=None,
|
||||
def predict(self, Xnew, kern=None, pred_init=None, Y_metadata=None,
|
||||
median=False, deg_gauss_hermite=20, likelihood=None):
|
||||
"""
|
||||
Prediction results depend on:
|
||||
|
|
@ -104,9 +94,12 @@ class WarpedGP(GP):
|
|||
- The median flag passed as argument
|
||||
The likelihood keyword is never used, it is just to follow the plotting API.
|
||||
"""
|
||||
mu, var = GP._raw_predict(self, Xnew)
|
||||
#mu, var = GP._raw_predict(self, Xnew)
|
||||
# now push through likelihood
|
||||
mean, var = self.likelihood.predictive_values(mu, var)
|
||||
#mean, var = self.likelihood.predictive_values(mu, var)
|
||||
|
||||
mean, var = super(WarpedGP, self).predict(Xnew, kern=kern, full_cov=False, likelihood=likelihood)
|
||||
|
||||
|
||||
if self.predict_in_warped_space:
|
||||
std = np.sqrt(var)
|
||||
|
|
@ -120,11 +113,9 @@ class WarpedGP(GP):
|
|||
else:
|
||||
wmean = mean
|
||||
wvar = var
|
||||
if self.scale_data:
|
||||
pred = self._unscale_data(pred)
|
||||
return wmean, wvar
|
||||
|
||||
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, likelihood=None, median=False):
|
||||
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, likelihood=None, kern=None):
|
||||
"""
|
||||
Get the predictive quantiles around the prediction at X
|
||||
|
||||
|
|
@ -135,15 +126,19 @@ class WarpedGP(GP):
|
|||
:returns: list of quantiles for each X and predictive quantiles for interval combination
|
||||
:rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)]
|
||||
"""
|
||||
m, v = self._raw_predict(X, full_cov=False)
|
||||
if self.normalizer is not None:
|
||||
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
|
||||
a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
|
||||
if not self.predict_in_warped_space:
|
||||
return [a, b]
|
||||
new_a = self.warping_function.f_inv(a)
|
||||
new_b = self.warping_function.f_inv(b)
|
||||
return [new_a, new_b]
|
||||
qs = super(WarpedGP, self).predict_quantiles(X, quantiles, Y_metadata=Y_metadata, likelihood=likelihood, kern=kern)
|
||||
if self.predict_in_warped_space:
|
||||
return [self.warping_function.f_inv(q) for q in qs]
|
||||
return qs
|
||||
#m, v = self._raw_predict(X, full_cov=False)
|
||||
#if self.normalizer is not None:
|
||||
# m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
|
||||
#a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
|
||||
#if not self.predict_in_warped_space:
|
||||
# return [a, b]
|
||||
#new_a = self.warping_function.f_inv(a)
|
||||
#new_b = self.warping_function.f_inv(b)
|
||||
#return [new_a, new_b]
|
||||
|
||||
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -74,9 +74,6 @@ def helper_predict_with_model(self, Xgrid, plot_raw, apply_link, percentiles, wh
|
|||
if 'output_index' not in predict_kw['Y_metadata']:
|
||||
predict_kw['Y_metadata']['output_index'] = Xgrid[:,-1:].astype(np.int)
|
||||
|
||||
if isinstance(self, WarpedGP) and self.predict_in_warped_space:
|
||||
predict_kw['median'] = True
|
||||
|
||||
mu, _ = self.predict(Xgrid, **predict_kw)
|
||||
|
||||
if percentiles is not None:
|
||||
|
|
@ -299,8 +296,10 @@ def get_x_y_var(model):
|
|||
Y = model.Y.values
|
||||
except AttributeError:
|
||||
Y = model.Y
|
||||
if isinstance(model, WarpedGP) and model.predict_in_warped_space:
|
||||
Y = model.Y_untransformed
|
||||
|
||||
if isinstance(model, WarpedGP) and not model.predict_in_warped_space:
|
||||
Y = model.Y_normalized
|
||||
|
||||
if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
|
||||
return X, X_variance, Y
|
||||
|
||||
|
|
|
|||
|
|
@ -91,18 +91,32 @@ class MiscTests(unittest.TestCase):
|
|||
k = GPy.kern.RBF(1)
|
||||
m2 = GPy.models.GPRegression(self.X, (Y-mu)/std, kernel=k, normalizer=False)
|
||||
m2[:] = m[:]
|
||||
|
||||
mu1, var1 = m.predict(m.X, full_cov=True)
|
||||
mu2, var2 = m2.predict(m2.X, full_cov=True)
|
||||
np.testing.assert_allclose(mu1, (mu2*std)+mu)
|
||||
np.testing.assert_allclose(var1, var2)
|
||||
np.testing.assert_allclose(var1, var2*std**2)
|
||||
|
||||
mu1, var1 = m.predict(m.X, full_cov=False)
|
||||
mu2, var2 = m2.predict(m2.X, full_cov=False)
|
||||
|
||||
np.testing.assert_allclose(mu1, (mu2*std)+mu)
|
||||
np.testing.assert_allclose(var1, var2)
|
||||
np.testing.assert_allclose(var1, var2*std**2)
|
||||
|
||||
q50n = m.predict_quantiles(m.X, (50,))
|
||||
q50 = m2.predict_quantiles(m2.X, (50,))
|
||||
|
||||
np.testing.assert_allclose(q50n[0], (q50[0]*std)+mu)
|
||||
|
||||
# Test variance component:
|
||||
qs = np.array([2.5, 97.5])
|
||||
# The quantiles get computed before unormalization
|
||||
# And transformed using the mean transformation:
|
||||
c = np.random.choice(self.X.shape[0])
|
||||
q95 = m2.predict_quantiles(self.X[[c]], qs)
|
||||
mu, var = m2.predict(self.X[[c]])
|
||||
from scipy.stats import norm
|
||||
np.testing.assert_allclose((mu+(norm.ppf(qs/100.)*np.sqrt(var))).flatten(), np.array(q95).flatten())
|
||||
|
||||
def check_jacobian(self):
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ Created on Aug 27, 2014
|
|||
import logging
|
||||
import numpy as np
|
||||
|
||||
class Norm(object):
|
||||
class _Norm(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
def scale_by(self, Y):
|
||||
|
|
@ -18,7 +18,8 @@ class Norm(object):
|
|||
"""
|
||||
Project Y into normalized space
|
||||
"""
|
||||
raise NotImplementedError
|
||||
if not self.scaled():
|
||||
raise AttributeError("Norm object not initialized yet, try calling scale_by(data) first.")
|
||||
def inverse_mean(self, X):
|
||||
"""
|
||||
Project the normalized object X into space of Y
|
||||
|
|
@ -31,15 +32,46 @@ class Norm(object):
|
|||
Whether this Norm object has been initialized.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
class MeanNorm(Norm):
|
||||
|
||||
class Standardize(_Norm):
|
||||
def __init__(self):
|
||||
self.mean = None
|
||||
def scale_by(self, Y):
|
||||
Y = np.ma.masked_invalid(Y, copy=False)
|
||||
self.mean = Y.mean(0).view(np.ndarray)
|
||||
self.std = Y.std(0).view(np.ndarray)
|
||||
def normalize(self, Y):
|
||||
return Y-self.mean
|
||||
super(Standardize, self).normalize(Y)
|
||||
return (Y-self.mean)/self.std
|
||||
def inverse_mean(self, X):
|
||||
return X+self.mean
|
||||
return (X*self.std)+self.mean
|
||||
def inverse_variance(self, var):
|
||||
return (var*(self.std**2))
|
||||
def scaled(self):
|
||||
return self.mean is not None
|
||||
|
||||
# Inverse variance to be implemented, disabling for now
|
||||
# If someone in the future want to implement this,
|
||||
# we need to implement the inverse variance for
|
||||
# normalization. This means, we need to know the factor
|
||||
# for the variance to be multiplied to the variance in
|
||||
# normalized space. This is easy to compute for standardization
|
||||
# (see above) but gets tricky here.
|
||||
# class Normalize(_Norm):
|
||||
# def __init__(self):
|
||||
# self.ymin = None
|
||||
# self.ymax = None
|
||||
# def scale_by(self, Y):
|
||||
# Y = np.ma.masked_invalid(Y, copy=False)
|
||||
# self.ymin = Y.min(0).view(np.ndarray)
|
||||
# self.ymax = Y.max(0).view(np.ndarray)
|
||||
# def normalize(self, Y):
|
||||
# super(Normalize, self).normalize(Y)
|
||||
# return (Y - self.ymin) / (self.ymax - self.ymin) - .5
|
||||
# def inverse_mean(self, X):
|
||||
# return (X + .5) * (self.ymax - self.ymin) + self.ymin
|
||||
# def inverse_variance(self, var):
|
||||
#
|
||||
# return (var*(self.std**2))
|
||||
# def scaled(self):
|
||||
# return (self.ymin is not None) and (self.ymax is not None)
|
||||
|
|
@ -31,7 +31,7 @@ class WarpingFunction(Parameterized):
|
|||
"""gradient of f w.r.t to y"""
|
||||
raise NotImplementedError
|
||||
|
||||
def f_inv(self, z, max_iterations=100, y=None):
|
||||
def f_inv(self, z, max_iterations=250, y=None):
|
||||
"""
|
||||
Calculate the numerical inverse of f. This should be
|
||||
overwritten for specific warping functions where the
|
||||
|
|
@ -51,14 +51,11 @@ class WarpingFunction(Parameterized):
|
|||
update = (fy - z) / fgrady
|
||||
y -= self.rate * update
|
||||
it += 1
|
||||
if it == max_iterations:
|
||||
print("WARNING!!! Maximum number of iterations reached in f_inv ")
|
||||
print("Sum of roots: %.4f" % np.sum(fy - z))
|
||||
#if it == max_iterations:
|
||||
# print("WARNING!!! Maximum number of iterations reached in f_inv ")
|
||||
# print("Sum of roots: %.4f" % np.sum(fy - z))
|
||||
return y
|
||||
|
||||
def _get_param_names(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def plot(self, xmin, xmax):
|
||||
y = np.arange(xmin, xmax, 0.01)
|
||||
f_y = self.f(y)
|
||||
|
|
@ -159,13 +156,6 @@ class TanhFunction(WarpingFunction):
|
|||
|
||||
return gradients
|
||||
|
||||
def _get_param_names(self):
|
||||
variables = ['a', 'b', 'c', 'd']
|
||||
names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)]
|
||||
for q in range(self.n_terms)],[])
|
||||
names.append('warp_tanh')
|
||||
return names
|
||||
|
||||
def update_grads(self, Y_untransformed, Kiy):
|
||||
grad_y = self.fgrad_y(Y_untransformed)
|
||||
grad_y_psi, grad_psi = self.fgrad_y_psi(Y_untransformed,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue