diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index 1fb781d5..cce26783 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -18,3 +18,5 @@ from gp_coregionalized_regression import GPCoregionalizedRegression from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression from gp_heteroscedastic_regression import GPHeteroscedasticRegression from ss_mrd import SSMRD +from gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression +from gp_var_gauss import GPVariationalGaussianApproximation diff --git a/GPy/models/gp_kronecker_gaussian_regression.py b/GPy/models/gp_kronecker_gaussian_regression.py new file mode 100644 index 00000000..0e8dab81 --- /dev/null +++ b/GPy/models/gp_kronecker_gaussian_regression.py @@ -0,0 +1,119 @@ +# Copyright (c) 2014, James Hensman, Alan Saul +# Distributed under the terms of the GNU General public License, see LICENSE.txt + +import numpy as np +from ..core.model import Model +from ..core.parameterization import ObsAr +from .. import likelihoods + +class GPKroneckerGaussianRegression(Model): + """ + Kronecker GP regression + + Take two kernels computed on separate spaces K1(X1), K2(X2), and a data + matrix Y which is f size (N1, N2). + + The effective covaraince is np.kron(K2, K1) + The effective data is vec(Y) = Y.flatten(order='F') + + The noise must be iid Gaussian. + + See Stegle et al. + @inproceedings{stegle2011efficient, + title={Efficient inference in matrix-variate gaussian models with $\\backslash$ iid observation noise}, + author={Stegle, Oliver and Lippert, Christoph and Mooij, Joris M and Lawrence, Neil D and Borgwardt, Karsten M}, + booktitle={Advances in Neural Information Processing Systems}, + pages={630--638}, + year={2011} + } + + """ + def __init__(self, X1, X2, Y, kern1, kern2, noise_var=1., name='KGPR'): + Model.__init__(self, name=name) + # accept the construction arguments + self.X1 = ObsAr(X1) + self.X2 = ObsAr(X2) + self.Y = Y + self.kern1, self.kern2 = kern1, kern2 + self.add_parameter(self.kern1) + self.add_parameter(self.kern2) + + self.likelihood = likelihoods.Gaussian() + self.likelihood.variance = noise_var + self.add_parameter(self.likelihood) + + self.num_data1, self.input_dim1 = self.X1.shape + self.num_data2, self.input_dim2 = self.X2.shape + + assert kern1.input_dim == self.input_dim1 + assert kern2.input_dim == self.input_dim2 + assert Y.shape == (self.num_data1, self.num_data2) + + def log_likelihood(self): + return self._log_marginal_likelihood + + def parameters_changed(self): + (N1, D1), (N2, D2) = self.X1.shape, self.X2.shape + K1, K2 = self.kern1.K(self.X1), self.kern2.K(self.X2) + + # eigendecompositon + S1, U1 = np.linalg.eigh(K1) + S2, U2 = np.linalg.eigh(K2) + W = np.kron(S2, S1) + self.likelihood.variance + + Y_ = U1.T.dot(self.Y).dot(U2) + + # store these quantities: needed for prediction + Wi = 1./W + Ytilde = Y_.flatten(order='F')*Wi + + self._log_marginal_likelihood = -0.5*self.num_data1*self.num_data2*np.log(2*np.pi)\ + -0.5*np.sum(np.log(W))\ + -0.5*np.dot(Y_.flatten(order='F'), Ytilde) + + # gradients for data fit part + Yt_reshaped = Ytilde.reshape(N1, N2, order='F') + tmp = U1.dot(Yt_reshaped) + dL_dK1 = .5*(tmp*S2).dot(tmp.T) + tmp = U2.dot(Yt_reshaped.T) + dL_dK2 = .5*(tmp*S1).dot(tmp.T) + + # gradients for logdet + Wi_reshaped = Wi.reshape(N1, N2, order='F') + tmp = np.dot(Wi_reshaped, S2) + dL_dK1 += -0.5*(U1*tmp).dot(U1.T) + tmp = np.dot(Wi_reshaped.T, S1) + dL_dK2 += -0.5*(U2*tmp).dot(U2.T) + + self.kern1.update_gradients_full(dL_dK1, self.X1) + self.kern2.update_gradients_full(dL_dK2, self.X2) + + # gradients for noise variance + dL_dsigma2 = -0.5*Wi.sum() + 0.5*np.sum(np.square(Ytilde)) + self.likelihood.variance.gradient = dL_dsigma2 + + # store these quantities for prediction: + self.Wi, self.Ytilde, self.U1, self.U2 = Wi, Ytilde, U1, U2 + + def predict(self, X1new, X2new): + """ + Return the predictive mean and variance at a series of new points X1new, X2new + Only returns the diagonal of the predictive variance, for now. + + :param X1new: The points at which to make a prediction + :type X1new: np.ndarray, Nnew x self.input_dim1 + :param X2new: The points at which to make a prediction + :type X2new: np.ndarray, Nnew x self.input_dim2 + + """ + k1xf = self.kern1.K(X1new, self.X1) + k2xf = self.kern2.K(X2new, self.X2) + A = k1xf.dot(self.U1) + B = k2xf.dot(self.U2) + mu = A.dot(self.Ytilde.reshape(self.num_data1, self.num_data2, order='F')).dot(B.T).flatten(order='F') + k1xx = self.kern1.Kdiag(X1new) + k2xx = self.kern2.Kdiag(X2new) + BA = np.kron(B, A) + var = np.kron(k2xx, k1xx) - np.sum(BA**2*self.Wi, 1) + self.likelihood.variance + + return mu[:, None], var[:, None] diff --git a/GPy/models/gp_var_gauss.py b/GPy/models/gp_var_gauss.py new file mode 100644 index 00000000..68b62443 --- /dev/null +++ b/GPy/models/gp_var_gauss.py @@ -0,0 +1,108 @@ +# Copyright (c) 2014, James Hensman, Alan Saul +# Distributed under the terms of the GNU General public License, see LICENSE.txt + +import numpy as np +from scipy import stats +from scipy.special import erf +from ..core.model import Model +from ..core.parameterization import ObsAr +from .. import kern +from ..core.parameterization.param import Param +from ..util.linalg import pdinv + +log_2_pi = np.log(2*np.pi) + + +class GPVariationalGaussianApproximation(Model): + """ + The Variational Gaussian Approximation revisited implementation for regression + + @article{Opper:2009, + title = {The Variational Gaussian Approximation Revisited}, + author = {Opper, Manfred and Archambeau, C{\'e}dric}, + journal = {Neural Comput.}, + year = {2009}, + pages = {786--792}, + } + """ + def __init__(self, X, Y, kernel=None): + Model.__init__(self,'Variational GP classification') + # accept the construction arguments + self.X = ObsAr(X) + if kernel is None: + kernel = kern.RBF(X.shape[1]) + kern.White(X.shape[1], 0.01) + self.kern = kernel + self.add_parameter(self.kern) + self.num_data, self.input_dim = self.X.shape + + self.alpha = Param('alpha', np.zeros(self.num_data)) + self.beta = Param('beta', np.ones(self.num_data)) + self.add_parameter(self.alpha) + self.add_parameter(self.beta) + + self.gh_x, self.gh_w = np.polynomial.hermite.hermgauss(20) + self.Ysign = np.where(Y==1, 1, -1).flatten() + + def log_likelihood(self): + """ + Marginal log likelihood evaluation + """ + return self._log_lik + + def likelihood_quadrature(self, m, v): + """ + Perform Gauss-Hermite quadrature over the log of the likelihood, with a fixed weight + """ + # assume probit for now. + X = self.gh_x[None, :]*np.sqrt(2.*v[:, None]) + (m*self.Ysign)[:, None] + p = stats.norm.cdf(X) + N = stats.norm.pdf(X) + F = np.log(p).dot(self.gh_w) + NoverP = N/p + dF_dm = (NoverP*self.Ysign[:,None]).dot(self.gh_w) + dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(self.gh_w) + return F, dF_dm, dF_dv + + def parameters_changed(self): + K = self.kern.K(self.X) + m = K.dot(self.alpha) + KB = K*self.beta[:, None] + BKB = KB*self.beta[None, :] + A = np.eye(self.num_data) + BKB + Ai, LA, _, Alogdet = pdinv(A) + Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients + var = np.diag(Sigma) + + F, dF_dm, dF_dv = self.likelihood_quadrature(m, var) + dF_da = np.dot(K, dF_dm) + SigmaB = Sigma*self.beta + dF_db = -np.diag(Sigma.dot(np.diag(dF_dv)).dot(SigmaB))*2 + KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + m.dot(self.alpha)) + dKL_da = m + A_A2 = Ai - Ai.dot(Ai) + dKL_db = np.diag(np.dot(KB.T, A_A2)) + self._log_lik = F.sum() - KL + self.alpha.gradient = dF_da - dKL_da + self.beta.gradient = dF_db - dKL_db + + # K-gradients + dKL_dK = 0.5*(self.alpha[None, :]*self.alpha[:, None] + self.beta[:, None]*self.beta[None, :]*A_A2) + tmp = Ai*self.beta[:, None]/self.beta[None, :] + dF_dK = self.alpha[:, None]*dF_dm[None, :] + np.dot(tmp*dF_dv, tmp.T) + self.kern.update_gradients_full(dF_dK - dKL_dK, self.X) + + def predict(self, Xnew): + """ + Predict the function(s) at the new point(s) Xnew. + + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.input_dim + """ + Wi, _, _, _ = pdinv(self.kern.K(self.X) + np.diag(self.beta**-2)) + Kux = self.kern.K(self.X, Xnew) + mu = np.dot(Kux.T, self.alpha) + WiKux = np.dot(Wi, Kux) + Kxx = self.kern.Kdiag(Xnew) + var = Kxx - np.sum(WiKux*Kux, 0) + + return 0.5*(1+erf(mu/np.sqrt(2.*(var+1)))) diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 7b19538b..451ab757 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -423,6 +423,45 @@ class GradientTests(np.testing.TestCase): m = GPy.models.GPHeteroscedasticRegression(X,Y,kern) self.assertTrue(m.checkgrad()) + def test_gp_kronecker_gaussian(self): + N1, N2 = 30, 20 + X1 = np.random.randn(N1, 1) + X2 = np.random.randn(N2, 1) + X1.sort(0); X2.sort(0) + k1 = GPy.kern.RBF(1) # + GPy.kern.White(1) + k2 = GPy.kern.RBF(1) # + GPy.kern.White(1) + Y = np.random.randn(N1, N2) + m = GPy.models.GPKroneckerGaussianRegression(X1, X2, Y, k1, k2) + + # build the model the dumb way + assert (N1*N2<1000), "too much data for standard GPs!" + yy, xx = np.meshgrid(X2, X1) + Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T + kg = GPy.kern.RBF(1, active_dims=[0]) * GPy.kern.RBF(1, active_dims=[1]) + mm = GPy.models.GPRegression(Xgrid, Y.reshape(-1, 1, order='F'), kernel=kg) + + m.randomize() + mm[:] = m[:] + assert np.allclose(m.log_likelihood(), mm.log_likelihood()) + assert np.allclose(m.gradient, mm.gradient) + X1test = np.random.randn(100, 1) + X2test = np.random.randn(100, 1) + mean1, var1 = m.predict(X1test, X2test) + yy, xx = np.meshgrid(X2test, X1test) + Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T + mean2, var2 = mm.predict(Xgrid) + assert np.allclose(mean1, mean2) + assert np.allclose(var1, var2) + + def test_gp_VGPC(self): + num_obs = 25 + X = np.random.randint(0,140,num_obs) + X = X[:,None] + Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None] + kern = GPy.kern.Bias(1) + GPy.kern.RBF(1) + m = GPy.models.GPVariationalGaussianApproximation(X,Y,kern) + self.assertTrue(m.checkgrad()) + if __name__ == "__main__": print "Running unit tests, please be (very) patient..."