Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Zhenwen Dai 2015-08-20 17:33:56 +01:00
commit 0e01474aec
14 changed files with 217 additions and 96 deletions

View file

@ -17,7 +17,7 @@ before_install:
- sudo ln -s /run/shm /dev/shm
install:
- conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.7 scipy=0.12 matplotlib nose sphinx pip nose
- conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.9 scipy=0.16 matplotlib nose sphinx pip nose
#- pip install .
- python setup.py build_ext --inplace
#--use-mirrors

View file

@ -183,7 +183,7 @@ class GP(Model):
"""
return self._log_marginal_likelihood
def _raw_predict(self, _Xnew, full_cov=False, kern=None):
def _raw_predict(self, Xnew, full_cov=False, kern=None):
"""
For making predictions, does not account for normalization or likelihood
@ -199,24 +199,30 @@ class GP(Model):
if kern is None:
kern = self.kern
Kx = kern.K(_Xnew, self.X).T
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
Kx = kern.K(self.X, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = kern.K(_Xnew)
var = Kxx - np.dot(Kx.T, WiKx)
Kxx = kern.K(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
var = var
else:
Kxx = kern.Kdiag(_Xnew)
var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1)
var[var<0.] = 0.
Kxx = kern.Kdiag(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]):
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
var = var
#add in the mean function
if self.mean_function is not None:
mu += self.mean_function.f(Xnew)
#force mu to be a column vector
if len(mu.shape)==1: mu = mu[:,None]
#add the mean function in
if not self.mean_function is None:
mu += self.mean_function.f(_Xnew)
return mu, var
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
@ -249,7 +255,7 @@ class GP(Model):
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
return mean, var
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None):
"""
Get the predictive quantiles around the prediction at X
@ -257,10 +263,12 @@ class GP(Model):
:type X: np.ndarray (Xnew x self.input_dim)
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
:type quantiles: tuple
:param kern: optional kernel to use for prediction
:type predict_kw: dict
:returns: list of quantiles for each X and predictive quantiles for interval combination
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
"""
m, v = self._raw_predict(X, full_cov=False)
m, v = self._raw_predict(X, full_cov=False, kern=kern)
if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)

View file

@ -69,7 +69,7 @@ from .expectation_propagation_dtc import EPDTC
from .dtc import DTC
from .fitc import FITC
from .var_dtc_parallel import VarDTC_minibatch
#from .svgp import SVGP
from .var_gauss import VarGauss
# class FullLatentFunctionData(object):
#

View file

@ -171,7 +171,9 @@ class Laplace(LatentFunctionInference):
#define the objective function (to be maximised)
def obj(Ki_f, f):
ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
print ll
if np.isnan(ll):
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
return -np.inf
else:
return ll

View file

@ -0,0 +1,66 @@
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ...util.linalg import pdinv
from .posterior import Posterior
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
class VarGauss(LatentFunctionInference):
"""
The Variational Gaussian Approximation revisited
@article{Opper:2009,
title = {The Variational Gaussian Approximation Revisited},
author = {Opper, Manfred and Archambeau, C{\'e}dric},
journal = {Neural Comput.},
year = {2009},
pages = {786--792},
}
"""
def __init__(self, alpha, beta):
"""
:param alpha: GPy.core.Param varational parameter
:param beta: GPy.core.Param varational parameter
"""
self.alpha, self.beta = alpha, beta
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, Z=None):
if mean_function is not None:
raise NotImplementedError
num_data, output_dim = Y.shape
assert output_dim ==1, "Only one output supported"
K = kern.K(X)
m = K.dot(self.alpha)
KB = K*self.beta[:, None]
BKB = KB*self.beta[None, :]
A = np.eye(num_data) + BKB
Ai, LA, _, Alogdet = pdinv(A)
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
var = np.diag(Sigma).reshape(-1,1)
F, dF_dm, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, m, var, Y_metadata=Y_metadata)
if dF_dthetaL is not None:
dL_dthetaL = dF_dthetaL.sum(1).sum(1)
else:
dL_dthetaL = np.array([])
dF_da = np.dot(K, dF_dm)
SigmaB = Sigma*self.beta
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
KL = 0.5*(Alogdet + np.trace(Ai) - num_data + np.sum(m*self.alpha))
dKL_da = m
A_A2 = Ai - Ai.dot(Ai)
dKL_db = np.diag(np.dot(KB.T, A_A2))
log_marginal = F.sum() - KL
self.alpha.gradient = dF_da - dKL_da
self.beta.gradient = dF_db - dKL_db
# K-gradients
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
tmp = Ai*self.beta[:, None]/self.beta[None, :]
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
return Posterior(mean=m, cov=Sigma ,K=K),\
log_marginal,\
{'dL_dK':dF_dK-dKL_dK, 'dL_dthetaL':dL_dthetaL}

View file

@ -94,7 +94,7 @@ class Coregionalize(Kern):
dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)
dkappa = np.diag(dL_dK_small)
dkappa = np.diag(dL_dK_small).copy()
dL_dK_small += dL_dK_small.T
dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)

View file

@ -85,6 +85,7 @@ class Bernoulli(Likelihood):
gh_x, gh_w = gh_points
gh_w = gh_w / np.sqrt(np.pi)
shape = m.shape
m,v,Y = m.flatten(), v.flatten(), Y.flatten()
Ysign = np.where(Y==1,1,-1)

View file

@ -315,6 +315,9 @@ class Gaussian(Likelihood):
return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
if not isinstance(self.gp_link, link_functions.Identity):
return super(Gaussian, self).variational_expectations(Y=Y, m=m, v=v, gh_points=gh_points, Y_metadata=Y_metadata)
lik_var = float(self.variance)
F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var
dF_dmu = (Y - m)/lik_var

View file

@ -2,19 +2,16 @@
# Distributed under the terms of the GNU General public License, see LICENSE.txt
import numpy as np
from scipy import stats
from scipy.special import erf
from ..core.model import Model
from ..core import GP
from ..core.parameterization import ObsAr
from .. import kern
from ..core.parameterization.param import Param
from ..util.linalg import pdinv
from ..likelihoods import Gaussian
from ..inference.latent_function_inference import VarGauss
log_2_pi = np.log(2*np.pi)
class GPVariationalGaussianApproximation(Model):
class GPVariationalGaussianApproximation(GP):
"""
The Variational Gaussian Approximation revisited
@ -26,70 +23,15 @@ class GPVariationalGaussianApproximation(Model):
pages = {786--792},
}
"""
def __init__(self, X, Y, kernel, likelihood=None, Y_metadata=None):
Model.__init__(self,'Variational GP')
if likelihood is None:
likelihood = Gaussian()
# accept the construction arguments
self.X = ObsAr(X)
self.Y = Y
self.num_data, self.input_dim = self.X.shape
self.Y_metadata = Y_metadata
def __init__(self, X, Y, kernel, likelihood, Y_metadata=None):
self.kern = kernel
self.likelihood = likelihood
self.link_parameter(self.kern)
self.link_parameter(self.likelihood)
num_data = Y.shape[0]
self.alpha = Param('alpha', np.zeros((num_data,1))) # only one latent fn for now.
self.beta = Param('beta', np.ones(num_data))
inf = VarGauss(self.alpha, self.beta)
super(GPVariationalGaussianApproximation, self).__init__(X, Y, kernel, likelihood, name='VarGP', inference_method=inf)
self.alpha = Param('alpha', np.zeros((self.num_data,1))) # only one latent fn for now.
self.beta = Param('beta', np.ones(self.num_data))
self.link_parameter(self.alpha)
self.link_parameter(self.beta)
def log_likelihood(self):
return self._log_lik
def parameters_changed(self):
K = self.kern.K(self.X)
m = K.dot(self.alpha)
KB = K*self.beta[:, None]
BKB = KB*self.beta[None, :]
A = np.eye(self.num_data) + BKB
Ai, LA, _, Alogdet = pdinv(A)
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
var = np.diag(Sigma).reshape(-1,1)
F, dF_dm, dF_dv, dF_dthetaL = self.likelihood.variational_expectations(self.Y, m, var, Y_metadata=self.Y_metadata)
self.likelihood.gradient = dF_dthetaL.sum(1).sum(1)
dF_da = np.dot(K, dF_dm)
SigmaB = Sigma*self.beta
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + np.sum(m*self.alpha))
dKL_da = m
A_A2 = Ai - Ai.dot(Ai)
dKL_db = np.diag(np.dot(KB.T, A_A2))
self._log_lik = F.sum() - KL
self.alpha.gradient = dF_da - dKL_da
self.beta.gradient = dF_db - dKL_db
# K-gradients
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
tmp = Ai*self.beta[:, None]/self.beta[None, :]
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
def _raw_predict(self, Xnew):
"""
Predict the function(s) at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
"""
Wi, _, _, _ = pdinv(self.kern.K(self.X) + np.diag(self.beta**-2))
Kux = self.kern.K(self.X, Xnew)
mu = np.dot(Kux.T, self.alpha)
WiKux = np.dot(Wi, Kux)
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(WiKux*Kux, 0)
return mu, var.reshape(-1,1)

View file

@ -110,7 +110,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
else:
Y_metadata['output_index'] = extra_data
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
lower, upper = model.predict_quantiles(Xgrid, Y_metadata=Y_metadata)
fmu, fv = model._raw_predict(Xgrid, full_cov=False, **predict_kw)
lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=Y_metadata)
for d in which_data_ycols:

View file

@ -8,7 +8,7 @@ The test cases for various inference algorithms
import unittest, itertools
import numpy as np
import GPy
#np.seterr(invalid='raise')
class InferenceXTestCase(unittest.TestCase):

View file

@ -9,8 +9,7 @@ import inspect
from GPy.likelihoods import link_functions
from GPy.core.parameterization import Param
from functools import partial
#np.random.seed(300)
#np.random.seed(4)
fixed_seed = 7
#np.seterr(divide='raise')
def dparam_partial(inst_func, *args):
@ -105,6 +104,7 @@ class TestNoiseModels(object):
Generic model checker
"""
def setUp(self):
np.random.seed(fixed_seed)
self.N = 15
self.D = 3
self.X = np.random.rand(self.N, self.D)*10
@ -218,7 +218,8 @@ class TestNoiseModels(object):
"constraints": [(".*variance", self.constrain_positive)]
},
"laplace": True,
"ep": False # FIXME: Should be True when we have it working again
"ep": False, # FIXME: Should be True when we have it working again
"variational_expectations": True,
},
"Gaussian_log": {
"model": GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var),
@ -227,7 +228,8 @@ class TestNoiseModels(object):
"vals": [self.var],
"constraints": [(".*variance", self.constrain_positive)]
},
"laplace": True
"laplace": True,
"variational_expectations": True
},
#"Gaussian_probit": {
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
@ -252,7 +254,8 @@ class TestNoiseModels(object):
"link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)],
"laplace": True,
"Y": self.binary_Y,
"ep": False # FIXME: Should be True when we have it working again
"ep": False, # FIXME: Should be True when we have it working again
"variational_expectations": True
},
"Exponential_default": {
"model": GPy.likelihoods.Exponential(),
@ -347,6 +350,10 @@ class TestNoiseModels(object):
ep = attributes["ep"]
else:
ep = False
if "variational_expectations" in attributes:
var_exp = attributes["variational_expectations"]
else:
var_exp = False
#if len(param_vals) > 1:
#raise NotImplementedError("Cannot support multiple params in likelihood yet!")
@ -377,6 +384,11 @@ class TestNoiseModels(object):
if ep:
#ep likelihood gradcheck
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
if var_exp:
#Need to specify mu and var!
yield self.t_varexp, model, Y, Y_metadata
yield self.t_dexp_dmu, model, Y, Y_metadata
yield self.t_dexp_dvar, model, Y, Y_metadata
self.tearDown()
@ -603,6 +615,87 @@ class TestNoiseModels(object):
print(m)
assert m.checkgrad(verbose=1, step=step)
################
# variational expectations #
################
@with_setup(setUp, tearDown)
def t_varexp(self, model, Y, Y_metadata):
#Test that the analytic implementation (if it exists) matches the generic gauss
#hermite implementation
print("\n{}".format(inspect.stack()[0][3]))
#Make mu and var (marginal means and variances of q(f)) draws from a GP
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
L = GPy.util.linalg.jitchol(k)
mu = L.dot(np.random.randn(*Y.shape))
#Variance must be positive
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
expectation = model.variational_expectations(Y=Y, m=mu, v=var, gh_points=None, Y_metadata=Y_metadata)[0]
#Implementation of gauss hermite integration
shape = mu.shape
gh_x, gh_w= np.polynomial.hermite.hermgauss(50)
m,v,Y = mu.flatten(), var.flatten(), Y.flatten()
#make a grid of points
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None]
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
# broadcast needs to be handled carefully.
logp = model.logpdf(X, Y[:,None], Y_metadata=Y_metadata)
#average over the gird to get derivatives of the Gaussian's parameters
#division by pi comes from fact that for each quadrature we need to scale by 1/sqrt(pi)
expectation_gh = np.dot(logp, gh_w)/np.sqrt(np.pi)
expectation_gh = expectation_gh.reshape(*shape)
np.testing.assert_almost_equal(expectation, expectation_gh, decimal=5)
@with_setup(setUp, tearDown)
def t_dexp_dmu(self, model, Y, Y_metadata):
print("\n{}".format(inspect.stack()[0][3]))
#Make mu and var (marginal means and variances of q(f)) draws from a GP
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
L = GPy.util.linalg.jitchol(k)
mu = L.dot(np.random.randn(*Y.shape))
#Variance must be positive
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
expectation = functools.partial(model.variational_expectations, Y=Y, v=var, gh_points=None, Y_metadata=Y_metadata)
#Function to get the nth returned value
def F(mu):
return expectation(m=mu)[0]
def dmu(mu):
return expectation(m=mu)[1]
grad = GradientChecker(F, dmu, mu.copy(), 'm')
grad.randomize()
print(grad)
print(model)
assert grad.checkgrad(verbose=1)
@with_setup(setUp, tearDown)
def t_dexp_dvar(self, model, Y, Y_metadata):
print("\n{}".format(inspect.stack()[0][3]))
#Make mu and var (marginal means and variances of q(f)) draws from a GP
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
L = GPy.util.linalg.jitchol(k)
mu = L.dot(np.random.randn(*Y.shape))
#Variance must be positive
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
expectation = functools.partial(model.variational_expectations, Y=Y, m=mu, gh_points=None, Y_metadata=Y_metadata)
#Function to get the nth returned value
def F(var):
return expectation(v=var)[0]
def dvar(var):
return expectation(v=var)[2]
grad = GradientChecker(F, dvar, var.copy(), 'v')
self.constrain_positive('v', grad)
#grad.randomize()
print(grad)
print(model)
assert grad.checkgrad(verbose=1)
class LaplaceTests(unittest.TestCase):
"""
@ -610,6 +703,7 @@ class LaplaceTests(unittest.TestCase):
"""
def setUp(self):
np.random.seed(fixed_seed)
self.N = 15
self.D = 1
self.X = np.random.rand(self.N, self.D)*10

View file

@ -13,10 +13,13 @@ class MiscTests(np.testing.TestCase):
def test_safe_exp_upper(self):
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always') # always print
assert np.isfinite(np.exp(self._lim_val_exp))
assert np.isinf(np.exp(self._lim_val_exp + 1))
assert np.isfinite(GPy.util.misc.safe_exp(self._lim_val_exp + 1))
print w
print len(w)
assert len(w)==1 # should have one overflow warning
def test_safe_exp_lower(self):

View file

@ -509,7 +509,8 @@ class GradientTests(np.testing.TestCase):
X = X[:, None]
Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kern)
lik = GPy.likelihoods.Gaussian()
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kernel=kern, likelihood=lik)
self.assertTrue(m.checkgrad())