Added kronecker and variational gaussian approximation gp's, vargpapprox needs generalising to any factorizing likelihood

This commit is contained in:
Alan Saul 2014-08-15 18:22:09 +01:00
parent 051a8115a2
commit 0b3f8b3cc7
4 changed files with 268 additions and 0 deletions

View file

@ -18,3 +18,5 @@ from gp_coregionalized_regression import GPCoregionalizedRegression
from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from gp_heteroscedastic_regression import GPHeteroscedasticRegression
from ss_mrd import SSMRD
from gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression
from gp_var_gauss import GPVariationalGaussianApproximation

View file

@ -0,0 +1,119 @@
# Copyright (c) 2014, James Hensman, Alan Saul
# Distributed under the terms of the GNU General public License, see LICENSE.txt
import numpy as np
from ..core.model import Model
from ..core.parameterization import ObsAr
from .. import likelihoods
class GPKroneckerGaussianRegression(Model):
"""
Kronecker GP regression
Take two kernels computed on separate spaces K1(X1), K2(X2), and a data
matrix Y which is f size (N1, N2).
The effective covaraince is np.kron(K2, K1)
The effective data is vec(Y) = Y.flatten(order='F')
The noise must be iid Gaussian.
See Stegle et al.
@inproceedings{stegle2011efficient,
title={Efficient inference in matrix-variate gaussian models with $\\backslash$ iid observation noise},
author={Stegle, Oliver and Lippert, Christoph and Mooij, Joris M and Lawrence, Neil D and Borgwardt, Karsten M},
booktitle={Advances in Neural Information Processing Systems},
pages={630--638},
year={2011}
}
"""
def __init__(self, X1, X2, Y, kern1, kern2, noise_var=1., name='KGPR'):
Model.__init__(self, name=name)
# accept the construction arguments
self.X1 = ObsAr(X1)
self.X2 = ObsAr(X2)
self.Y = Y
self.kern1, self.kern2 = kern1, kern2
self.add_parameter(self.kern1)
self.add_parameter(self.kern2)
self.likelihood = likelihoods.Gaussian()
self.likelihood.variance = noise_var
self.add_parameter(self.likelihood)
self.num_data1, self.input_dim1 = self.X1.shape
self.num_data2, self.input_dim2 = self.X2.shape
assert kern1.input_dim == self.input_dim1
assert kern2.input_dim == self.input_dim2
assert Y.shape == (self.num_data1, self.num_data2)
def log_likelihood(self):
return self._log_marginal_likelihood
def parameters_changed(self):
(N1, D1), (N2, D2) = self.X1.shape, self.X2.shape
K1, K2 = self.kern1.K(self.X1), self.kern2.K(self.X2)
# eigendecompositon
S1, U1 = np.linalg.eigh(K1)
S2, U2 = np.linalg.eigh(K2)
W = np.kron(S2, S1) + self.likelihood.variance
Y_ = U1.T.dot(self.Y).dot(U2)
# store these quantities: needed for prediction
Wi = 1./W
Ytilde = Y_.flatten(order='F')*Wi
self._log_marginal_likelihood = -0.5*self.num_data1*self.num_data2*np.log(2*np.pi)\
-0.5*np.sum(np.log(W))\
-0.5*np.dot(Y_.flatten(order='F'), Ytilde)
# gradients for data fit part
Yt_reshaped = Ytilde.reshape(N1, N2, order='F')
tmp = U1.dot(Yt_reshaped)
dL_dK1 = .5*(tmp*S2).dot(tmp.T)
tmp = U2.dot(Yt_reshaped.T)
dL_dK2 = .5*(tmp*S1).dot(tmp.T)
# gradients for logdet
Wi_reshaped = Wi.reshape(N1, N2, order='F')
tmp = np.dot(Wi_reshaped, S2)
dL_dK1 += -0.5*(U1*tmp).dot(U1.T)
tmp = np.dot(Wi_reshaped.T, S1)
dL_dK2 += -0.5*(U2*tmp).dot(U2.T)
self.kern1.update_gradients_full(dL_dK1, self.X1)
self.kern2.update_gradients_full(dL_dK2, self.X2)
# gradients for noise variance
dL_dsigma2 = -0.5*Wi.sum() + 0.5*np.sum(np.square(Ytilde))
self.likelihood.variance.gradient = dL_dsigma2
# store these quantities for prediction:
self.Wi, self.Ytilde, self.U1, self.U2 = Wi, Ytilde, U1, U2
def predict(self, X1new, X2new):
"""
Return the predictive mean and variance at a series of new points X1new, X2new
Only returns the diagonal of the predictive variance, for now.
:param X1new: The points at which to make a prediction
:type X1new: np.ndarray, Nnew x self.input_dim1
:param X2new: The points at which to make a prediction
:type X2new: np.ndarray, Nnew x self.input_dim2
"""
k1xf = self.kern1.K(X1new, self.X1)
k2xf = self.kern2.K(X2new, self.X2)
A = k1xf.dot(self.U1)
B = k2xf.dot(self.U2)
mu = A.dot(self.Ytilde.reshape(self.num_data1, self.num_data2, order='F')).dot(B.T).flatten(order='F')
k1xx = self.kern1.Kdiag(X1new)
k2xx = self.kern2.Kdiag(X2new)
BA = np.kron(B, A)
var = np.kron(k2xx, k1xx) - np.sum(BA**2*self.Wi, 1) + self.likelihood.variance
return mu[:, None], var[:, None]

108
GPy/models/gp_var_gauss.py Normal file
View file

@ -0,0 +1,108 @@
# Copyright (c) 2014, James Hensman, Alan Saul
# Distributed under the terms of the GNU General public License, see LICENSE.txt
import numpy as np
from scipy import stats
from scipy.special import erf
from ..core.model import Model
from ..core.parameterization import ObsAr
from .. import kern
from ..core.parameterization.param import Param
from ..util.linalg import pdinv
log_2_pi = np.log(2*np.pi)
class GPVariationalGaussianApproximation(Model):
"""
The Variational Gaussian Approximation revisited implementation for regression
@article{Opper:2009,
title = {The Variational Gaussian Approximation Revisited},
author = {Opper, Manfred and Archambeau, C{\'e}dric},
journal = {Neural Comput.},
year = {2009},
pages = {786--792},
}
"""
def __init__(self, X, Y, kernel=None):
Model.__init__(self,'Variational GP classification')
# accept the construction arguments
self.X = ObsAr(X)
if kernel is None:
kernel = kern.RBF(X.shape[1]) + kern.White(X.shape[1], 0.01)
self.kern = kernel
self.add_parameter(self.kern)
self.num_data, self.input_dim = self.X.shape
self.alpha = Param('alpha', np.zeros(self.num_data))
self.beta = Param('beta', np.ones(self.num_data))
self.add_parameter(self.alpha)
self.add_parameter(self.beta)
self.gh_x, self.gh_w = np.polynomial.hermite.hermgauss(20)
self.Ysign = np.where(Y==1, 1, -1).flatten()
def log_likelihood(self):
"""
Marginal log likelihood evaluation
"""
return self._log_lik
def likelihood_quadrature(self, m, v):
"""
Perform Gauss-Hermite quadrature over the log of the likelihood, with a fixed weight
"""
# assume probit for now.
X = self.gh_x[None, :]*np.sqrt(2.*v[:, None]) + (m*self.Ysign)[:, None]
p = stats.norm.cdf(X)
N = stats.norm.pdf(X)
F = np.log(p).dot(self.gh_w)
NoverP = N/p
dF_dm = (NoverP*self.Ysign[:,None]).dot(self.gh_w)
dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(self.gh_w)
return F, dF_dm, dF_dv
def parameters_changed(self):
K = self.kern.K(self.X)
m = K.dot(self.alpha)
KB = K*self.beta[:, None]
BKB = KB*self.beta[None, :]
A = np.eye(self.num_data) + BKB
Ai, LA, _, Alogdet = pdinv(A)
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
var = np.diag(Sigma)
F, dF_dm, dF_dv = self.likelihood_quadrature(m, var)
dF_da = np.dot(K, dF_dm)
SigmaB = Sigma*self.beta
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv)).dot(SigmaB))*2
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + m.dot(self.alpha))
dKL_da = m
A_A2 = Ai - Ai.dot(Ai)
dKL_db = np.diag(np.dot(KB.T, A_A2))
self._log_lik = F.sum() - KL
self.alpha.gradient = dF_da - dKL_da
self.beta.gradient = dF_db - dKL_db
# K-gradients
dKL_dK = 0.5*(self.alpha[None, :]*self.alpha[:, None] + self.beta[:, None]*self.beta[None, :]*A_A2)
tmp = Ai*self.beta[:, None]/self.beta[None, :]
dF_dK = self.alpha[:, None]*dF_dm[None, :] + np.dot(tmp*dF_dv, tmp.T)
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
def predict(self, Xnew):
"""
Predict the function(s) at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
"""
Wi, _, _, _ = pdinv(self.kern.K(self.X) + np.diag(self.beta**-2))
Kux = self.kern.K(self.X, Xnew)
mu = np.dot(Kux.T, self.alpha)
WiKux = np.dot(Wi, Kux)
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(WiKux*Kux, 0)
return 0.5*(1+erf(mu/np.sqrt(2.*(var+1))))

View file

@ -423,6 +423,45 @@ class GradientTests(np.testing.TestCase):
m = GPy.models.GPHeteroscedasticRegression(X,Y,kern)
self.assertTrue(m.checkgrad())
def test_gp_kronecker_gaussian(self):
N1, N2 = 30, 20
X1 = np.random.randn(N1, 1)
X2 = np.random.randn(N2, 1)
X1.sort(0); X2.sort(0)
k1 = GPy.kern.RBF(1) # + GPy.kern.White(1)
k2 = GPy.kern.RBF(1) # + GPy.kern.White(1)
Y = np.random.randn(N1, N2)
m = GPy.models.GPKroneckerGaussianRegression(X1, X2, Y, k1, k2)
# build the model the dumb way
assert (N1*N2<1000), "too much data for standard GPs!"
yy, xx = np.meshgrid(X2, X1)
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
kg = GPy.kern.RBF(1, active_dims=[0]) * GPy.kern.RBF(1, active_dims=[1])
mm = GPy.models.GPRegression(Xgrid, Y.reshape(-1, 1, order='F'), kernel=kg)
m.randomize()
mm[:] = m[:]
assert np.allclose(m.log_likelihood(), mm.log_likelihood())
assert np.allclose(m.gradient, mm.gradient)
X1test = np.random.randn(100, 1)
X2test = np.random.randn(100, 1)
mean1, var1 = m.predict(X1test, X2test)
yy, xx = np.meshgrid(X2test, X1test)
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
mean2, var2 = mm.predict(Xgrid)
assert np.allclose(mean1, mean2)
assert np.allclose(var1, var2)
def test_gp_VGPC(self):
num_obs = 25
X = np.random.randint(0,140,num_obs)
X = X[:,None]
Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None]
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
m = GPy.models.GPVariationalGaussianApproximation(X,Y,kern)
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."