[normalizer] first commit for normalizer in GPy

Conflicts:
	GPy/core/sparse_gp.py
	GPy/models/bayesian_gplvm.py
This commit is contained in:
mzwiessele 2014-08-27 12:05:13 -07:00
parent 7ed0e70a46
commit 7ec0e75c0e
6 changed files with 101 additions and 24 deletions

View file

@ -15,6 +15,7 @@ from parameterization.variational import VariationalPosterior
from scipy.sparse.base import issparse from scipy.sparse.base import issparse
import logging import logging
from GPy.util.normalizer import GaussianNorm
logger = logging.getLogger("GP") logger = logging.getLogger("GP")
class GP(Model): class GP(Model):
@ -27,12 +28,17 @@ class GP(Model):
:param likelihood: a GPy likelihood :param likelihood: a GPy likelihood
:param :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference_method: The inference method to use for this GP :param :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference_method: The inference method to use for this GP
:rtype: model object :rtype: model object
:param Norm normalizer:
normalize the outputs Y.
Prediction will be un-normalized using this normalizer.
If normalizer is None, we will normalize using GaussianNorm.
If normalizer is False, no normalization will be done.
.. Note:: Multiple independent outputs are allowed using columns of Y .. Note:: Multiple independent outputs are allowed using columns of Y
""" """
def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', Y_metadata=None): def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
super(GP, self).__init__(name) super(GP, self).__init__(name)
assert X.ndim == 2 assert X.ndim == 2
@ -44,8 +50,22 @@ class GP(Model):
assert Y.ndim == 2 assert Y.ndim == 2
logger.info("initializing Y") logger.info("initializing Y")
if issparse(Y): self.Y = Y
else: self.Y = ObsAr(Y) if normalizer is None:
self.normalizer = GaussianNorm()
elif normalizer is False:
self.normalizer = None
else:
self.normalizer = normalizer
if self.normalizer is not None:
self.normalizer.scale_by(Y)
self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
self.Y = Y
else:
self.Y = ObsAr(Y)
self.Y_normalized = self.Y
assert Y.shape[0] == self.num_data assert Y.shape[0] == self.num_data
_, self.output_dim = self.Y.shape _, self.output_dim = self.Y.shape
@ -74,7 +94,7 @@ class GP(Model):
self.add_parameter(self.likelihood) self.add_parameter(self.likelihood)
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL']) self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X) self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
@ -142,7 +162,11 @@ class GP(Model):
# now push through likelihood # now push through likelihood
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata) mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
return mean, var
if self.normalizer is not None:
return self.normalizer.inverse_mean(mean), self.normalizer.inverse_variance(var)
else:
return mean, var
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None): def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
m, v = self._raw_predict(X, full_cov=False) m, v = self._raw_predict(X, full_cov=False)

View file

@ -34,7 +34,7 @@ class SparseGP(GP):
""" """
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None): def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False):
#pick a sensible inference method #pick a sensible inference method
if inference_method is None: if inference_method is None:
@ -48,7 +48,7 @@ class SparseGP(GP):
self.Z = Param('inducing inputs', Z) self.Z = Param('inducing inputs', Z)
self.num_inducing = Z.shape[0] self.num_inducing = Z.shape[0]
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata) GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
logger.info("Adding Z as parameter") logger.info("Adding Z as parameter")
self.add_parameter(self.Z, index=0) self.add_parameter(self.Z, index=0)
@ -56,7 +56,7 @@ class SparseGP(GP):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL']) self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
if isinstance(self.X, VariationalPosterior): if isinstance(self.X, VariationalPosterior):
#gradients wrt kernel #gradients wrt kernel

View file

@ -25,9 +25,10 @@ class BayesianGPLVM(SparseGP):
""" """
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, **kwargs): Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, normalizer=None):
self.mpi_comm = mpi_comm self.mpi_comm = mpi_comm
self.__IN_OPTIMIZATION__ = False self.__IN_OPTIMIZATION__ = False
self.logger = logging.getLogger(self.__class__.__name__) self.logger = logging.getLogger(self.__class__.__name__)
if X == None: if X == None:
from ..util.initialization import initialize_latent from ..util.initialization import initialize_latent
@ -49,7 +50,7 @@ class BayesianGPLVM(SparseGP):
if kernel is None: if kernel is None:
self.logger.info("initializing kernel RBF") self.logger.info("initializing kernel RBF")
kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim) kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) #+ kern.Bias(input_dim) + kern.White(input_dim)
if likelihood is None: if likelihood is None:
likelihood = Gaussian() likelihood = Gaussian()
@ -75,7 +76,7 @@ class BayesianGPLVM(SparseGP):
if kernel.useGPU and isinstance(inference_method, VarDTC_GPU): if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
kernel.psicomp.GPU_direct = True kernel.psicomp.GPU_direct = True
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, normalizer=normalizer)
self.logger.info("Adding X as parameter") self.logger.info("Adding X as parameter")
self.add_parameter(self.X, index=0) self.add_parameter(self.X, index=0)

View file

@ -15,17 +15,22 @@ class GPRegression(GP):
:param X: input observations :param X: input observations
:param Y: observed values :param Y: observed values
:param kernel: a GPy kernel, defaults to rbf :param kernel: a GPy kernel, defaults to rbf
:param Norm normalizer: [False]
Normalize Y with the norm given.
If normalizer is False, no normalization will be done
If it is None, we use GaussianNorm(alization)
.. Note:: Multiple independent outputs are allowed using columns of Y .. Note:: Multiple independent outputs are allowed using columns of Y
""" """
def __init__(self, X, Y, kernel=None, Y_metadata=None): def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None):
if kernel is None: if kernel is None:
kernel = kern.RBF(X.shape[1]) kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Gaussian() likelihood = likelihoods.Gaussian()
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata) super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer)

View file

@ -30,7 +30,7 @@ class SparseGPRegression(SparseGP):
""" """
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None): def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None):
num_data, input_dim = X.shape num_data, input_dim = X.shape
# kern defaults to rbf (plus white for stability) # kern defaults to rbf (plus white for stability)
@ -49,7 +49,7 @@ class SparseGPRegression(SparseGP):
if not (X_variance is None): if not (X_variance is None):
X = NormalPosterior(X,X_variance) X = NormalPosterior(X,X_variance)
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC()) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC(), normalizer=normalizer)
class SparseGPRegressionUncertainInput(SparseGP): class SparseGPRegressionUncertainInput(SparseGP):
""" """
@ -59,7 +59,7 @@ class SparseGPRegressionUncertainInput(SparseGP):
""" """
def __init__(self, X, X_variance, Y, kernel=None, Z=None, num_inducing=10): def __init__(self, X, X_variance, Y, kernel=None, Z=None, num_inducing=10, normalizer=None):
""" """
:param X: input observations :param X: input observations
:type X: np.ndarray (num_data x input_dim) :type X: np.ndarray (num_data x input_dim)
@ -91,5 +91,5 @@ class SparseGPRegressionUncertainInput(SparseGP):
likelihood = likelihoods.Gaussian() likelihood = likelihoods.Gaussian()
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance, inference_method=VarDTC()) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance, inference_method=VarDTC(), normalizer=normalizer)
self.ensure_default_constraints() self.ensure_default_constraints()

47
GPy/util/normalizer.py Normal file
View file

@ -0,0 +1,47 @@
'''
Created on Aug 27, 2014
@author: t-mazwie
'''
import logging
import numpy as np
class Norm(object):
def __init__(self):
pass
def scale_by(self, Y):
"""
Use data matrix Y as normalization space to work in.
"""
raise NotImplementedError
def normalize(self, Y):
"""
Project Y into normalized space
"""
raise NotImplementedError
def inverse_mean(self, X):
"""
Project the normalized object X into space of Y
"""
raise NotImplementedError
def scaled(self):
"""
Whether this Norm object has been initialized.
"""
raise NotImplementedError
class GaussianNorm(Norm):
def __init__(self):
self.mean = None
self.std = None
def scale_by(self, Y):
Y = np.ma.masked_invalid(Y, copy=False)
self.mean = Y.mean(0).view(np.ndarray)
self.std = Y.std(0).view(np.ndarray)
def normalize(self, Y):
return ((Y-self.mean)/self.std)
def inverse_mean(self, X):
return ((X*self.std)+self.mean)
def inverse_variance(self, var):
return (var*self.std**2)
def scaled(self):
return self.mean is not None and self.std is not None