Merge remote-tracking branch 'origin/devel' into feature-multioutput

This commit is contained in:
Eero Siivola 2018-06-24 12:43:49 +03:00
commit afe9206656
19 changed files with 642 additions and 142 deletions

View file

@ -1 +1 @@
__version__ = "1.8.5"
__version__ = "1.9.2"

View file

@ -282,10 +282,12 @@ class GP(Model):
mu += self.mean_function.f(Xnew)
return mu, var
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True):
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None,
likelihood=None, include_likelihood=True):
"""
Predict the function(s) at the new point(s) Xnew. This includes the likelihood
variance added to the predicted underlying function (usually referred to as f).
Predict the function(s) at the new point(s) Xnew. This includes the
likelihood variance added to the predicted underlying function
(usually referred to as f).
In order to predict without adding in the likelihood give
`include_likelihood=False`, or refer to self.predict_noiseless().
@ -295,33 +297,49 @@ class GP(Model):
:param full_cov: whether to return the full covariance matrix, or just
the diagonal
:type full_cov: bool
:param Y_metadata: metadata about the predicting point to pass to the likelihood
:param Y_metadata: metadata about the predicting point to pass to the
likelihood
:param kern: The kernel to use for prediction (defaults to the model
kern). this is useful for examining e.g. subprocesses.
:param bool include_likelihood: Whether or not to add likelihood noise to the predicted underlying latent function f.
:param include_likelihood: Whether or not to add likelihood noise to
the predicted underlying latent function f.
:type include_likelihood: bool
:returns: (mean, var):
mean: posterior mean, a Numpy array, Nnew x self.input_dim
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False,
Nnew x Nnew otherwise
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions.
If full_cov and self.input_dim > 1, the return shape of var is
Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return
shape is Nnew x Nnew. This is to allow for different normalizations
of the output dimensions.
Note: If you want the predictive quantiles (e.g. 95% confidence interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
Note: If you want the predictive quantiles (e.g. 95% confidence
interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
"""
#predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
# Predict the latent function values
mean, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
if include_likelihood:
# now push through likelihood
if likelihood is None:
likelihood = self.likelihood
mu, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
mean, var = likelihood.predictive_values(mean, var, full_cov,
Y_metadata=Y_metadata)
if self.normalizer is not None:
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
mean = self.normalizer.inverse_mean(mean)
return mu, var
# We need to create 3d array for the full covariance matrix with
# multiple outputs.
if full_cov & (mean.shape[1] > 1):
var = self.normalizer.inverse_covariance(var)
else:
var = self.normalizer.inverse_variance(var)
return mean, var
def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
"""
@ -376,13 +394,16 @@ class GP(Model):
def predictive_gradients(self, Xnew, kern=None):
"""
Compute the derivatives of the predicted latent function with respect to X*
Compute the derivatives of the predicted latent function with respect
to X*
Given a set of points at which to predict X* (size [N*,Q]), compute the
derivatives of the mean and variance. Resulting arrays are sized:
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP
(usually one).
Note that this is not the same as computing the mean and variance of the derivative of the function!
Note that this is not the same as computing the mean and variance of
the derivative of the function!
dv_dX* -- [N*, Q], (since all outputs have the same variance)
:param X: The points at which to get the predictive gradients
@ -393,25 +414,32 @@ class GP(Model):
"""
if kern is None:
kern = self.kern
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
mean_jac = np.empty((Xnew.shape[0], Xnew.shape[1], self.output_dim))
for i in range(self.output_dim):
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)
mean_jac[:, :, i] = kern.gradients_X(
self.posterior.woodbury_vector[:, i:i+1].T, Xnew,
self._predictive_variable)
# gradients wrt the diagonal part k_{xx}
dv_dX = kern.gradients_X(np.eye(Xnew.shape[0]), Xnew)
#grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
# Gradients wrt the diagonal part k_{xx}
dv_dX = kern.gradients_X_diag(np.ones(Xnew.shape[0]), Xnew)
# Grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
if self.posterior.woodbury_inv.ndim == 3:
tmp = np.empty(dv_dX.shape + (self.posterior.woodbury_inv.shape[2],))
tmp[:] = dv_dX[:,:,None]
var_jac = np.empty(dv_dX.shape +
(self.posterior.woodbury_inv.shape[2],))
var_jac[:] = dv_dX[:, :, None]
for i in range(self.posterior.woodbury_inv.shape[2]):
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv[:, :, i])
tmp[:, :, i] += kern.gradients_X(alpha, Xnew, self._predictive_variable)
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable),
self.posterior.woodbury_inv[:, :, i])
var_jac[:, :, i] += kern.gradients_X(alpha, Xnew,
self._predictive_variable)
else:
tmp = dv_dX
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv)
tmp += kern.gradients_X(alpha, Xnew, self._predictive_variable)
return mean_jac, tmp
var_jac = dv_dX
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable),
self.posterior.woodbury_inv)
var_jac += kern.gradients_X(alpha, Xnew, self._predictive_variable)
return mean_jac, var_jac
def predict_jacobian(self, Xnew, kern=None, full_cov=False):
"""
@ -564,7 +592,7 @@ class GP(Model):
if self.output_dim == 1:
return sim_one_dim(m, v)
else:
fsim = np.empty((self.output_dim, self.num_data, size))
fsim = np.empty((self.output_dim, X.shape[0], size))
for d in range(self.output_dim):
if full_cov and v.ndim == 3:
fsim[d] = sim_one_dim(m[:, d], v[:, :, d])

View file

@ -288,9 +288,17 @@ class Gamma(Prior):
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
@property
def a(self):
return self._a
@property
def b(self):
return self._b
def __init__(self, a, b):
self.a = float(a)
self.b = float(b)
self._a = float(a)
self._b = float(b)
self.constant = -gammaln(self.a) + a * np.log(b)
def __str__(self):
@ -333,8 +341,8 @@ class Gamma(Prior):
return self.a, self.b
def __setstate__(self, state):
self.a = state[0]
self.b = state[1]
self._a = state[0]
self._b = state[1]
self.constant = -gammaln(self.a) + self.a * np.log(self.b)
class InverseGamma(Gamma):
@ -360,8 +368,8 @@ class InverseGamma(Gamma):
return cls._instances[-1]()
def __init__(self, a, b):
self.a = float(a)
self.b = float(b)
self._a = float(a)
self._b = float(b)
self.constant = -gammaln(self.a) + a * np.log(b)
def __str__(self):

View file

@ -4,23 +4,26 @@ import matplotlib.pyplot as plt
import GPy.models.state_space_model as SS_model
X = np.linspace(0, 10, 2000)[:, None]
Y = np.sin(X) + np.random.randn(*X.shape)*0.1
def state_space_example():
X = np.linspace(0, 10, 2000)[:, None]
Y = np.sin(X) + np.random.randn(*X.shape)*0.1
kernel1 = GPy.kern.Matern32(X.shape[1])
m1 = GPy.models.GPRegression(X,Y, kernel1)
kernel1 = GPy.kern.Matern32(X.shape[1])
m1 = GPy.models.GPRegression(X,Y, kernel1)
print(m1)
m1.optimize(optimizer='bfgs',messages=True)
print(m1)
m1.optimize(optimizer='bfgs',messages=True)
print(m1)
print(m1)
kernel2 = GPy.kern.sde_Matern32(X.shape[1])
#m2 = SS_model.StateSpace(X,Y, kernel2)
m2 = GPy.models.StateSpace(X,Y, kernel2)
print(m2)
kernel2 = GPy.kern.sde_Matern32(X.shape[1])
#m2 = SS_model.StateSpace(X,Y, kernel2)
m2 = GPy.models.StateSpace(X,Y, kernel2)
print(m2)
m2.optimize(optimizer='bfgs',messages=True)
m2.optimize(optimizer='bfgs',messages=True)
print(m2)
print(m2)
return m1, m2

View file

@ -132,6 +132,13 @@ class posteriorParamsDTC(posteriorParamsBase):
self.mu += (delta_v-delta_tau*self.mu[i])*si
#mu = np.dot(Sigma, v_tilde)
def to_dict(self):
return { "mu": self.mu.tolist(), "Sigma_diag": self.Sigma_diag.tolist()}
@staticmethod
def from_dict(input_dict):
return posteriorParamsDTC(np.array(input_dict["mu"]), np.array(input_dict["Sigma_diag"]))
@staticmethod
def _recompute(LLT0, Kmn, ga_approx):
LLT = LLT0 + np.dot(Kmn*ga_approx.tau[None,:],Kmn.T)
@ -533,3 +540,35 @@ class EPDTC(EPBase, VarDTC):
#Posterior distribution parameters update
if self.parallel_updates == False:
post_params._update_rank1(LLT, Kmn, delta_v, delta_tau, i)
def to_dict(self):
input_dict = super(EPDTC, self)._to_dict()
input_dict["class"] = "GPy.inference.latent_function_inference.expectation_propagation.EPDTC"
if self.ga_approx_old is not None:
input_dict["ga_approx_old"] = self.ga_approx_old.to_dict()
if self._ep_approximation is not None:
input_dict["_ep_approximation"] = {}
input_dict["_ep_approximation"]["post_params"] = self._ep_approximation[0].to_dict()
input_dict["_ep_approximation"]["ga_approx"] = self._ep_approximation[1].to_dict()
input_dict["_ep_approximation"]["cav_params"] = self._ep_approximation[2].to_dict()
input_dict["_ep_approximation"]["log_Z_tilde"] = self._ep_approximation[3].tolist()
return input_dict
@staticmethod
def _from_dict(inference_class, input_dict):
ga_approx_old = input_dict.pop('ga_approx_old', None)
if ga_approx_old is not None:
ga_approx_old = gaussianApproximation.from_dict(ga_approx_old)
_ep_approximation_dict = input_dict.pop('_ep_approximation', None)
_ep_approximation = []
if _ep_approximation is not None:
_ep_approximation.append(posteriorParamsDTC.from_dict(_ep_approximation_dict["post_params"]))
_ep_approximation.append(gaussianApproximation.from_dict(_ep_approximation_dict["ga_approx"]))
_ep_approximation.append(cavityParams.from_dict(_ep_approximation_dict["cav_params"]))
_ep_approximation.append(np.array(_ep_approximation_dict["log_Z_tilde"]))
ee = EPDTC(**input_dict)
ee.ga_approx_old = ga_approx_old
ee._ep_approximation = _ep_approximation
return ee

View file

@ -31,13 +31,16 @@ class Prod(CombinationKernel):
"""
def __init__(self, kernels, name='mul'):
for i, kern in enumerate(kernels[:]):
_newkerns = []
for kern in kernels:
if isinstance(kern, Prod):
del kernels[i]
for part in kern.parts[::-1]:
kern.unlink_parameter(part)
kernels.insert(i, part)
super(Prod, self).__init__(kernels, name)
for part in kern.parts:
#kern.unlink_parameter(part)
_newkerns.append(part.copy())
else:
_newkerns.append(kern.copy())
super(Prod, self).__init__(_newkerns, name)
def to_dict(self):
input_dict = super(Prod, self)._to_dict()

View file

@ -337,7 +337,7 @@ def plot(self, plot_limits=None, fixed_inputs=None,
plot_data = False
plots = {}
if hasattr(self, 'Z') and plot_inducing:
plots.update(_plot_inducing(self, canvas, visible_dims, projection, 'Inducing'))
plots.update(_plot_inducing(self, canvas, free_dims, projection, 'Inducing'))
if plot_data:
plots.update(_plot_data(self, canvas, which_data_rows, which_data_ycols, free_dims, projection, "Data"))
plots.update(_plot_data_error(self, canvas, which_data_rows, which_data_ycols, free_dims, projection, "Data Error"))

View file

@ -118,6 +118,51 @@ class MiscTests(unittest.TestCase):
from scipy.stats import norm
np.testing.assert_allclose((mu+(norm.ppf(qs/100.)*np.sqrt(var))).flatten(), np.array(q95).flatten())
def test_multioutput_regression_with_normalizer(self):
"""
Test that normalizing works in multi-output case
"""
# Create test inputs
X = self.X
Y1 = np.sin(X) + np.random.randn(*X.shape) * 0.2
Y2 = -np.sin(X) + np.random.randn(*X.shape) * 0.05
Y = np.hstack((Y1, Y2))
mu, std = Y.mean(0), Y.std(0)
m = GPy.models.GPRegression(X, Y, normalizer=True)
m.optimize(messages=True)
assert(m.checkgrad())
k = GPy.kern.RBF(1)
m2 = GPy.models.GPRegression(X, (Y-mu)/std, normalizer=False)
m2[:] = m[:]
mu1, var1 = m.predict(m.X, full_cov=True)
mu2, var2 = m2.predict(m2.X, full_cov=True)
np.testing.assert_allclose(mu1, (mu2*std)+mu)
np.testing.assert_allclose(var1, var2[:, :, None]*std[None, None, :]**2)
mu1, var1 = m.predict(m.X, full_cov=False)
mu2, var2 = m2.predict(m2.X, full_cov=False)
np.testing.assert_allclose(mu1, (mu2*std)+mu)
np.testing.assert_allclose(var1, var2*std[None, :]**2)
q50n = m.predict_quantiles(m.X, (50,))
q50 = m2.predict_quantiles(m2.X, (50,))
np.testing.assert_allclose(q50n[0], (q50[0]*std)+mu)
# Test variance component:
qs = np.array([2.5, 97.5])
# The quantiles get computed before unormalization
# And transformed using the mean transformation:
c = np.random.choice(X.shape[0])
q95 = m2.predict_quantiles(X[[c]], qs)
mu, var = m2.predict(X[[c]])
from scipy.stats import norm
np.testing.assert_allclose((mu.T+(norm.ppf(qs/100.)*np.sqrt(var))).T.flatten(), np.array(q95).flatten())
def check_jacobian(self):
try:
import autograd.numpy as np, autograd as ag, GPy, matplotlib.pyplot as plt

View file

@ -116,11 +116,45 @@ class Test(unittest.TestCase):
np.testing.assert_array_equal(e1._ep_approximation[2].v[:], e1_r._ep_approximation[2].v[:])
np.testing.assert_array_equal(e1._ep_approximation[3][:], e1_r._ep_approximation[3][:])
e1 = GPy.inference.latent_function_inference.expectation_propagation.EPDTC(ep_mode="nested")
e1.ga_approx_old = GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10))
e1._ep_approximation = []
e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.posteriorParamsDTC(np.random.rand(10),np.random.rand(10)))
e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10)))
e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.cavityParams(10))
e1._ep_approximation[-1].v = np.random.rand(10)
e1._ep_approximation[-1].tau = np.random.rand(10)
e1._ep_approximation.append(np.random.rand(10))
e1_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e1.to_dict())
assert type(e1) == type(e1_r)
assert e1.epsilon==e1_r.epsilon
assert e1.eta==e1_r.eta
assert e1.delta==e1_r.delta
assert e1.always_reset==e1_r.always_reset
assert e1.max_iters==e1_r.max_iters
assert e1.ep_mode==e1_r.ep_mode
assert e1.parallel_updates==e1_r.parallel_updates
np.testing.assert_array_equal(e1.ga_approx_old.tau[:], e1_r.ga_approx_old.tau[:])
np.testing.assert_array_equal(e1.ga_approx_old.v[:], e1_r.ga_approx_old.v[:])
np.testing.assert_array_equal(e1._ep_approximation[0].mu[:], e1_r._ep_approximation[0].mu[:])
np.testing.assert_array_equal(e1._ep_approximation[0].Sigma_diag[:], e1_r._ep_approximation[0].Sigma_diag[:])
np.testing.assert_array_equal(e1._ep_approximation[1].tau[:], e1_r._ep_approximation[1].tau[:])
np.testing.assert_array_equal(e1._ep_approximation[1].v[:], e1_r._ep_approximation[1].v[:])
np.testing.assert_array_equal(e1._ep_approximation[2].tau[:], e1_r._ep_approximation[2].tau[:])
np.testing.assert_array_equal(e1._ep_approximation[2].v[:], e1_r._ep_approximation[2].v[:])
np.testing.assert_array_equal(e1._ep_approximation[3][:], e1_r._ep_approximation[3][:])
e2 = GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference()
e2_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e2.to_dict())
assert type(e2) == type(e2_r)
def test_serialize_deserialize_model(self):
np.random.seed(fixed_seed)
N = 20

View file

@ -28,7 +28,8 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#===============================================================================
import unittest, numpy as np
import unittest
import numpy as np
import GPy
class TestDebug(unittest.TestCase):
@ -225,3 +226,17 @@ class TestUnivariateGaussian(unittest.TestCase):
for i in range(len(pySols)):
diff += abs(derivLogCdfNormal(self.zz[i]) - pySols[i])
self.assertTrue(diff < 1e-8)
class TestStandardize(unittest.TestCase):
def setUp(self):
self.normalizer = GPy.util.normalizer.Standardize()
y = np.stack([np.random.randn(10), 2*np.random.randn(10)], axis=1)
self.normalizer.scale_by(y)
def test_inverse_covariance(self):
"""
Test inverse covariance outputs correct size
"""
covariance = np.random.rand(100, 100)
output = self.normalizer.inverse_covariance(covariance)
self.assertTrue(output.shape == (100, 100, 2))

View file

@ -3,30 +3,46 @@ Created on Aug 27, 2014
@author: Max Zwiessele
'''
import logging
import numpy as np
class _Norm(object):
def __init__(self):
pass
def scale_by(self, Y):
"""
Use data matrix Y as normalization space to work in.
"""
raise NotImplementedError
def normalize(self, Y):
"""
Project Y into normalized space
"""
if not self.scaled():
raise AttributeError("Norm object not initialized yet, try calling scale_by(data) first.")
def inverse_mean(self, X):
"""
Project the normalized object X into space of Y
"""
raise NotImplementedError
def inverse_variance(self, var):
return var
def inverse_covariance(self, covariance):
"""
Convert scaled covariance to unscaled.
Args:
covariance - numpy array of shape (n, n)
Returns:
covariance - numpy array of shape (n, n, m) where m is number of
outputs
"""
raise NotImplementedError
def scaled(self):
"""
Whether this Norm object has been initialized.
@ -57,17 +73,25 @@ class _Norm(object):
class Standardize(_Norm):
def __init__(self):
self.mean = None
def scale_by(self, Y):
Y = np.ma.masked_invalid(Y, copy=False)
self.mean = Y.mean(0).view(np.ndarray)
self.std = Y.std(0).view(np.ndarray)
def normalize(self, Y):
super(Standardize, self).normalize(Y)
return (Y-self.mean)/self.std
def inverse_mean(self, X):
return (X*self.std)+self.mean
def inverse_variance(self, var):
return (var*(self.std**2))
def inverse_covariance(self, covariance):
return (covariance[..., np.newaxis]*(self.std**2))
def scaled(self):
return self.mean is not None
@ -87,29 +111,3 @@ class Standardize(_Norm):
if "std" in input_dict:
s.std = np.array(input_dict["std"])
return s
# Inverse variance to be implemented, disabling for now
# If someone in the future want to implement this,
# we need to implement the inverse variance for
# normalization. This means, we need to know the factor
# for the variance to be multiplied to the variance in
# normalized space. This is easy to compute for standardization
# (see above) but gets tricky here.
# class Normalize(_Norm):
# def __init__(self):
# self.ymin = None
# self.ymax = None
# def scale_by(self, Y):
# Y = np.ma.masked_invalid(Y, copy=False)
# self.ymin = Y.min(0).view(np.ndarray)
# self.ymax = Y.max(0).view(np.ndarray)
# def normalize(self, Y):
# super(Normalize, self).normalize(Y)
# return (Y - self.ymin) / (self.ymax - self.ymin) - .5
# def inverse_mean(self, X):
# return (X + .5) * (self.ymax - self.ymin) + self.ymin
# def inverse_variance(self, var):
#
# return (var*(self.std**2))
# def scaled(self):
# return (self.ymin is not None) and (self.ymax is not None)