mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-05 17:52:39 +02:00
Added kronecker and variational gaussian approximation gp's, vargpapprox needs generalising to any factorizing likelihood
This commit is contained in:
parent
051a8115a2
commit
0b3f8b3cc7
4 changed files with 268 additions and 0 deletions
|
|
@ -18,3 +18,5 @@ from gp_coregionalized_regression import GPCoregionalizedRegression
|
|||
from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
|
||||
from gp_heteroscedastic_regression import GPHeteroscedasticRegression
|
||||
from ss_mrd import SSMRD
|
||||
from gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression
|
||||
from gp_var_gauss import GPVariationalGaussianApproximation
|
||||
|
|
|
|||
119
GPy/models/gp_kronecker_gaussian_regression.py
Normal file
119
GPy/models/gp_kronecker_gaussian_regression.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
# Copyright (c) 2014, James Hensman, Alan Saul
|
||||
# Distributed under the terms of the GNU General public License, see LICENSE.txt
|
||||
|
||||
import numpy as np
|
||||
from ..core.model import Model
|
||||
from ..core.parameterization import ObsAr
|
||||
from .. import likelihoods
|
||||
|
||||
class GPKroneckerGaussianRegression(Model):
|
||||
"""
|
||||
Kronecker GP regression
|
||||
|
||||
Take two kernels computed on separate spaces K1(X1), K2(X2), and a data
|
||||
matrix Y which is f size (N1, N2).
|
||||
|
||||
The effective covaraince is np.kron(K2, K1)
|
||||
The effective data is vec(Y) = Y.flatten(order='F')
|
||||
|
||||
The noise must be iid Gaussian.
|
||||
|
||||
See Stegle et al.
|
||||
@inproceedings{stegle2011efficient,
|
||||
title={Efficient inference in matrix-variate gaussian models with $\\backslash$ iid observation noise},
|
||||
author={Stegle, Oliver and Lippert, Christoph and Mooij, Joris M and Lawrence, Neil D and Borgwardt, Karsten M},
|
||||
booktitle={Advances in Neural Information Processing Systems},
|
||||
pages={630--638},
|
||||
year={2011}
|
||||
}
|
||||
|
||||
"""
|
||||
def __init__(self, X1, X2, Y, kern1, kern2, noise_var=1., name='KGPR'):
|
||||
Model.__init__(self, name=name)
|
||||
# accept the construction arguments
|
||||
self.X1 = ObsAr(X1)
|
||||
self.X2 = ObsAr(X2)
|
||||
self.Y = Y
|
||||
self.kern1, self.kern2 = kern1, kern2
|
||||
self.add_parameter(self.kern1)
|
||||
self.add_parameter(self.kern2)
|
||||
|
||||
self.likelihood = likelihoods.Gaussian()
|
||||
self.likelihood.variance = noise_var
|
||||
self.add_parameter(self.likelihood)
|
||||
|
||||
self.num_data1, self.input_dim1 = self.X1.shape
|
||||
self.num_data2, self.input_dim2 = self.X2.shape
|
||||
|
||||
assert kern1.input_dim == self.input_dim1
|
||||
assert kern2.input_dim == self.input_dim2
|
||||
assert Y.shape == (self.num_data1, self.num_data2)
|
||||
|
||||
def log_likelihood(self):
|
||||
return self._log_marginal_likelihood
|
||||
|
||||
def parameters_changed(self):
|
||||
(N1, D1), (N2, D2) = self.X1.shape, self.X2.shape
|
||||
K1, K2 = self.kern1.K(self.X1), self.kern2.K(self.X2)
|
||||
|
||||
# eigendecompositon
|
||||
S1, U1 = np.linalg.eigh(K1)
|
||||
S2, U2 = np.linalg.eigh(K2)
|
||||
W = np.kron(S2, S1) + self.likelihood.variance
|
||||
|
||||
Y_ = U1.T.dot(self.Y).dot(U2)
|
||||
|
||||
# store these quantities: needed for prediction
|
||||
Wi = 1./W
|
||||
Ytilde = Y_.flatten(order='F')*Wi
|
||||
|
||||
self._log_marginal_likelihood = -0.5*self.num_data1*self.num_data2*np.log(2*np.pi)\
|
||||
-0.5*np.sum(np.log(W))\
|
||||
-0.5*np.dot(Y_.flatten(order='F'), Ytilde)
|
||||
|
||||
# gradients for data fit part
|
||||
Yt_reshaped = Ytilde.reshape(N1, N2, order='F')
|
||||
tmp = U1.dot(Yt_reshaped)
|
||||
dL_dK1 = .5*(tmp*S2).dot(tmp.T)
|
||||
tmp = U2.dot(Yt_reshaped.T)
|
||||
dL_dK2 = .5*(tmp*S1).dot(tmp.T)
|
||||
|
||||
# gradients for logdet
|
||||
Wi_reshaped = Wi.reshape(N1, N2, order='F')
|
||||
tmp = np.dot(Wi_reshaped, S2)
|
||||
dL_dK1 += -0.5*(U1*tmp).dot(U1.T)
|
||||
tmp = np.dot(Wi_reshaped.T, S1)
|
||||
dL_dK2 += -0.5*(U2*tmp).dot(U2.T)
|
||||
|
||||
self.kern1.update_gradients_full(dL_dK1, self.X1)
|
||||
self.kern2.update_gradients_full(dL_dK2, self.X2)
|
||||
|
||||
# gradients for noise variance
|
||||
dL_dsigma2 = -0.5*Wi.sum() + 0.5*np.sum(np.square(Ytilde))
|
||||
self.likelihood.variance.gradient = dL_dsigma2
|
||||
|
||||
# store these quantities for prediction:
|
||||
self.Wi, self.Ytilde, self.U1, self.U2 = Wi, Ytilde, U1, U2
|
||||
|
||||
def predict(self, X1new, X2new):
|
||||
"""
|
||||
Return the predictive mean and variance at a series of new points X1new, X2new
|
||||
Only returns the diagonal of the predictive variance, for now.
|
||||
|
||||
:param X1new: The points at which to make a prediction
|
||||
:type X1new: np.ndarray, Nnew x self.input_dim1
|
||||
:param X2new: The points at which to make a prediction
|
||||
:type X2new: np.ndarray, Nnew x self.input_dim2
|
||||
|
||||
"""
|
||||
k1xf = self.kern1.K(X1new, self.X1)
|
||||
k2xf = self.kern2.K(X2new, self.X2)
|
||||
A = k1xf.dot(self.U1)
|
||||
B = k2xf.dot(self.U2)
|
||||
mu = A.dot(self.Ytilde.reshape(self.num_data1, self.num_data2, order='F')).dot(B.T).flatten(order='F')
|
||||
k1xx = self.kern1.Kdiag(X1new)
|
||||
k2xx = self.kern2.Kdiag(X2new)
|
||||
BA = np.kron(B, A)
|
||||
var = np.kron(k2xx, k1xx) - np.sum(BA**2*self.Wi, 1) + self.likelihood.variance
|
||||
|
||||
return mu[:, None], var[:, None]
|
||||
108
GPy/models/gp_var_gauss.py
Normal file
108
GPy/models/gp_var_gauss.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
# Copyright (c) 2014, James Hensman, Alan Saul
|
||||
# Distributed under the terms of the GNU General public License, see LICENSE.txt
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from scipy.special import erf
|
||||
from ..core.model import Model
|
||||
from ..core.parameterization import ObsAr
|
||||
from .. import kern
|
||||
from ..core.parameterization.param import Param
|
||||
from ..util.linalg import pdinv
|
||||
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
|
||||
class GPVariationalGaussianApproximation(Model):
|
||||
"""
|
||||
The Variational Gaussian Approximation revisited implementation for regression
|
||||
|
||||
@article{Opper:2009,
|
||||
title = {The Variational Gaussian Approximation Revisited},
|
||||
author = {Opper, Manfred and Archambeau, C{\'e}dric},
|
||||
journal = {Neural Comput.},
|
||||
year = {2009},
|
||||
pages = {786--792},
|
||||
}
|
||||
"""
|
||||
def __init__(self, X, Y, kernel=None):
|
||||
Model.__init__(self,'Variational GP classification')
|
||||
# accept the construction arguments
|
||||
self.X = ObsAr(X)
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(X.shape[1]) + kern.White(X.shape[1], 0.01)
|
||||
self.kern = kernel
|
||||
self.add_parameter(self.kern)
|
||||
self.num_data, self.input_dim = self.X.shape
|
||||
|
||||
self.alpha = Param('alpha', np.zeros(self.num_data))
|
||||
self.beta = Param('beta', np.ones(self.num_data))
|
||||
self.add_parameter(self.alpha)
|
||||
self.add_parameter(self.beta)
|
||||
|
||||
self.gh_x, self.gh_w = np.polynomial.hermite.hermgauss(20)
|
||||
self.Ysign = np.where(Y==1, 1, -1).flatten()
|
||||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
Marginal log likelihood evaluation
|
||||
"""
|
||||
return self._log_lik
|
||||
|
||||
def likelihood_quadrature(self, m, v):
|
||||
"""
|
||||
Perform Gauss-Hermite quadrature over the log of the likelihood, with a fixed weight
|
||||
"""
|
||||
# assume probit for now.
|
||||
X = self.gh_x[None, :]*np.sqrt(2.*v[:, None]) + (m*self.Ysign)[:, None]
|
||||
p = stats.norm.cdf(X)
|
||||
N = stats.norm.pdf(X)
|
||||
F = np.log(p).dot(self.gh_w)
|
||||
NoverP = N/p
|
||||
dF_dm = (NoverP*self.Ysign[:,None]).dot(self.gh_w)
|
||||
dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(self.gh_w)
|
||||
return F, dF_dm, dF_dv
|
||||
|
||||
def parameters_changed(self):
|
||||
K = self.kern.K(self.X)
|
||||
m = K.dot(self.alpha)
|
||||
KB = K*self.beta[:, None]
|
||||
BKB = KB*self.beta[None, :]
|
||||
A = np.eye(self.num_data) + BKB
|
||||
Ai, LA, _, Alogdet = pdinv(A)
|
||||
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
|
||||
var = np.diag(Sigma)
|
||||
|
||||
F, dF_dm, dF_dv = self.likelihood_quadrature(m, var)
|
||||
dF_da = np.dot(K, dF_dm)
|
||||
SigmaB = Sigma*self.beta
|
||||
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv)).dot(SigmaB))*2
|
||||
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + m.dot(self.alpha))
|
||||
dKL_da = m
|
||||
A_A2 = Ai - Ai.dot(Ai)
|
||||
dKL_db = np.diag(np.dot(KB.T, A_A2))
|
||||
self._log_lik = F.sum() - KL
|
||||
self.alpha.gradient = dF_da - dKL_da
|
||||
self.beta.gradient = dF_db - dKL_db
|
||||
|
||||
# K-gradients
|
||||
dKL_dK = 0.5*(self.alpha[None, :]*self.alpha[:, None] + self.beta[:, None]*self.beta[None, :]*A_A2)
|
||||
tmp = Ai*self.beta[:, None]/self.beta[None, :]
|
||||
dF_dK = self.alpha[:, None]*dF_dm[None, :] + np.dot(tmp*dF_dv, tmp.T)
|
||||
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
|
||||
|
||||
def predict(self, Xnew):
|
||||
"""
|
||||
Predict the function(s) at the new point(s) Xnew.
|
||||
|
||||
:param Xnew: The points at which to make a prediction
|
||||
:type Xnew: np.ndarray, Nnew x self.input_dim
|
||||
"""
|
||||
Wi, _, _, _ = pdinv(self.kern.K(self.X) + np.diag(self.beta**-2))
|
||||
Kux = self.kern.K(self.X, Xnew)
|
||||
mu = np.dot(Kux.T, self.alpha)
|
||||
WiKux = np.dot(Wi, Kux)
|
||||
Kxx = self.kern.Kdiag(Xnew)
|
||||
var = Kxx - np.sum(WiKux*Kux, 0)
|
||||
|
||||
return 0.5*(1+erf(mu/np.sqrt(2.*(var+1))))
|
||||
|
|
@ -423,6 +423,45 @@ class GradientTests(np.testing.TestCase):
|
|||
m = GPy.models.GPHeteroscedasticRegression(X,Y,kern)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
def test_gp_kronecker_gaussian(self):
|
||||
N1, N2 = 30, 20
|
||||
X1 = np.random.randn(N1, 1)
|
||||
X2 = np.random.randn(N2, 1)
|
||||
X1.sort(0); X2.sort(0)
|
||||
k1 = GPy.kern.RBF(1) # + GPy.kern.White(1)
|
||||
k2 = GPy.kern.RBF(1) # + GPy.kern.White(1)
|
||||
Y = np.random.randn(N1, N2)
|
||||
m = GPy.models.GPKroneckerGaussianRegression(X1, X2, Y, k1, k2)
|
||||
|
||||
# build the model the dumb way
|
||||
assert (N1*N2<1000), "too much data for standard GPs!"
|
||||
yy, xx = np.meshgrid(X2, X1)
|
||||
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
|
||||
kg = GPy.kern.RBF(1, active_dims=[0]) * GPy.kern.RBF(1, active_dims=[1])
|
||||
mm = GPy.models.GPRegression(Xgrid, Y.reshape(-1, 1, order='F'), kernel=kg)
|
||||
|
||||
m.randomize()
|
||||
mm[:] = m[:]
|
||||
assert np.allclose(m.log_likelihood(), mm.log_likelihood())
|
||||
assert np.allclose(m.gradient, mm.gradient)
|
||||
X1test = np.random.randn(100, 1)
|
||||
X2test = np.random.randn(100, 1)
|
||||
mean1, var1 = m.predict(X1test, X2test)
|
||||
yy, xx = np.meshgrid(X2test, X1test)
|
||||
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
|
||||
mean2, var2 = mm.predict(Xgrid)
|
||||
assert np.allclose(mean1, mean2)
|
||||
assert np.allclose(var1, var2)
|
||||
|
||||
def test_gp_VGPC(self):
|
||||
num_obs = 25
|
||||
X = np.random.randint(0,140,num_obs)
|
||||
X = X[:,None]
|
||||
Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None]
|
||||
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
|
||||
m = GPy.models.GPVariationalGaussianApproximation(X,Y,kern)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print "Running unit tests, please be (very) patient..."
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue