Added kernels for GpGrid and GpSsm regression

2026-05-04 01:02:39 +02:00 · 2016-03-22 09:09:30 +01:00 · 2016-03-22 09:09:30 +01:00 · 3d346cbdd6
commit 3d346cbdd6
parent 9a2670b98e
9 changed files with 641 additions and 1 deletions
--- a/GPy/inference/latent_function_inference/gaussian_grid_inference.py
+++ b/GPy/inference/latent_function_inference/gaussian_grid_inference.py
@ -0,0 +1,113 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+# This implementation of converting GPs to state space models is based on the article:
+
+#@article{Gilboa:2015,
+#  title={Scaling multidimensional inference for structured Gaussian processes},
+#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
+#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
+#  volume={37},
+#  number={2},
+#  pages={424--436},
+#  year={2015},
+#  publisher={IEEE}
+#}
+
+from grid_posterior import GridPosterior
+import numpy as np
+from . import LatentFunctionInference
+log_2_pi = np.log(2*np.pi)
+
+class GaussianGridInference(LatentFunctionInference):
+    """
+    An object for inference when the likelihood is Gaussian and inputs are on a grid.
+
+    The function self.inference returns a GridPosterior object, which summarizes
+    the posterior.
+
+    """
+    def __init__(self):
+        pass
+
+    def kron_mvprod(self, A, b):
+        x = b
+        N = 1
+        D = len(A)
+        G = np.zeros((D,1))
+        for d in xrange(0, D):
+            G[d] = len(A[d])
+        N = np.prod(G)
+        for d in xrange(D-1, -1, -1):
+            X = np.reshape(x, (G[d], round(N/G[d])), order='F')
+            Z = np.dot(A[d], X)
+            Z = Z.T
+            x = np.reshape(Z, (-1, 1), order='F')
+        return x
+
+    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
+
+        """
+        Returns a GridPosterior class containing essential quantities of the posterior
+        """
+        N = X.shape[0] #number of training points
+        D = X.shape[1] #number of dimensions
+
+        Kds = np.zeros(D, dtype=object) #vector for holding covariance per dimension
+        Qs = np.zeros(D, dtype=object) #vector for holding eigenvectors of covariance per dimension
+        QTs = np.zeros(D, dtype=object) #vector for holding transposed eigenvectors of covariance per dimension
+        V_kron = 1 # kronecker product of eigenvalues
+
+        # retrieve the one-dimensional variation of the designated kernel
+        oneDkernel = kern.getOneDimensionalKernel(D)
+
+        for d in xrange(D):
+            xg = list(set(X[:,d])) #extract unique values for a dimension
+            xg = np.reshape(xg, (len(xg), 1))
+            oneDkernel.lengthscale = kern.lengthscale[d]
+            Kds[d] = oneDkernel.K(xg)
+            [V, Q] = np.linalg.eig(Kds[d])
+            V_kron = np.kron(V_kron, V)
+            Qs[d] = Q
+            QTs[d] = Q.T
+
+        noise = likelihood.variance + 1e-8
+
+        alpha_kron = self.kron_mvprod(QTs, Y)
+        V_kron = V_kron.reshape(-1, 1)
+        alpha_kron = alpha_kron / (V_kron + noise)
+        alpha_kron = self.kron_mvprod(Qs, alpha_kron)
+
+        log_likelihood = -0.5 * (np.dot(Y.T, alpha_kron) + np.sum((np.log(V_kron + noise))) + N*log_2_pi)
+
+        # compute derivatives wrt parameters Thete
+        derivs = np.zeros(D+2, dtype='object')
+        for t in xrange(len(derivs)):
+            dKd_dTheta = np.zeros(D, dtype='object')
+            gamma = np.zeros(D, dtype='object')
+            gam = 1
+            for d in xrange(D):
+                xg = list(set(X[:,d]))
+                xg = np.reshape(xg, (len(xg), 1))
+                oneDkernel.lengthscale = kern.lengthscale[d]
+                if t < D:
+                    dKd_dTheta[d] = oneDkernel.dKd_dLen(xg, (t==d), lengthscale=kern.lengthscale[t]) #derivative wrt lengthscale
+                elif (t == D):
+                    dKd_dTheta[d] = oneDkernel.dKd_dVar(xg) #derivative wrt variance
+                else:
+                    dKd_dTheta[d] = np.identity(len(xg)) #derivative wrt noise
+                gamma[d] = np.diag(np.dot(np.dot(QTs[d], dKd_dTheta[d].T), Qs[d]))
+                gam = np.kron(gam, gamma[d])
+            
+            gam = gam.reshape(-1,1)
+            kappa = self.kron_mvprod(dKd_dTheta, alpha_kron)
+            derivs[t] = 0.5*np.dot(alpha_kron.T,kappa) - 0.5*np.sum(gam / (V_kron + noise))
+
+        # separate derivatives
+        dL_dLen = derivs[:D]
+        dL_dVar = derivs[D]
+        dL_dThetaL = derivs[D+1]
+
+        return GridPosterior(alpha_kron=alpha_kron, QTs=QTs, Qs=Qs, V_kron=V_kron), log_likelihood, {'dL_dLen':dL_dLen, 'dL_dVar':dL_dVar, 'dL_dthetaL':dL_dThetaL}
--- a/GPy/inference/latent_function_inference/gaussian_ssm_inference.py
+++ b/GPy/inference/latent_function_inference/gaussian_ssm_inference.py
@ -0,0 +1,140 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+# This implementation of converting GPs to state space models is based on the article:
+
+#@article{Gilboa:2015,
+#  title={Scaling multidimensional inference for structured Gaussian processes},
+#  author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P},
+#  journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on},
+#  volume={37},
+#  number={2},
+#  pages={424--436},
+#  year={2015},
+#  publisher={IEEE}
+#}
+
+from ssm_posterior import SsmPosterior
+from ...util.linalg import pdinv, dpotrs, tdot
+from ...util import diag
+import numpy as np
+import math as mt
+from . import LatentFunctionInference
+log_2_pi = np.log(2*np.pi)
+
+
+class GaussianSSMInference(LatentFunctionInference):
+    """
+    An object for inference when the likelihood is Gaussian.
+
+    The function self.inference returns a Posterior object, which summarizes
+    the posterior.
+
+    """
+    def __init__(self):
+        pass#self._YYTfactor_cache = caching.cache()
+
+    def get_YYTfactor(self, Y):
+        """
+        find a matrix L which satisfies LL^T = YY^T.
+
+        Note that L may have fewer columns than Y, else L=Y.
+        """
+        N, D = Y.shape
+        if (N>D):
+            return Y
+        else:
+            #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
+            #print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
+            return Y
+
+    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
+
+        """
+        Returns a Posterior class containing essential quantities of the posterior
+        """
+        order = kern.order
+        K = X.shape[0]
+        log_likelihood = 0
+        results = np.zeros((K,4),dtype=object)
+        H = np.zeros((1,order))
+        H[0][0] = 1
+        v_0 = kern.Phi_of_r(-1)
+        mu_0 = np.zeros((order, 1))
+        noise_var = likelihood.variance + 1e-8
+
+        # carry out forward filtering
+        for t in range(K):
+            if (t == 0):
+                prior_m = np.dot(H,mu_0)
+                prior_v = np.dot(np.dot(H, v_0), H.T) + noise_var
+
+                log_likelihood = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[0] - prior_m)**2)/prior_v)
+
+                kalman_gain = np.dot(v_0, H.T) / prior_v
+                mu = mu_0 + kalman_gain*(Y[0] - prior_m)
+
+
+                V = np.dot(np.eye(order) - np.dot(kalman_gain,H), v_0)
+                results[0][0] = mu
+                results[0][1] = V
+            else:
+                delta = X[t] - X[t-1]
+                Q = kern.Q_of_r(delta)
+                Phi = kern.Phi_of_r(delta)
+                P = np.dot(np.dot(Phi, V), Phi.T) + Q
+                PhiMu = np.dot(Phi, mu)
+                prior_m = np.dot(H, PhiMu)
+                prior_v = np.dot(np.dot(H, P), H.T) + noise_var
+
+                log_likelihood_i = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[t] - prior_m)**2)/prior_v)
+                log_likelihood += log_likelihood_i
+
+                kalman_gain = np.dot(P, H.T)/prior_v
+                mu = PhiMu + kalman_gain*(Y[t] - prior_m)
+                V = np.dot((np.eye(order) - np.dot(kalman_gain, H)), P)
+                
+                results[t-1][2] = Phi
+                results[t-1][3] = P
+                results[t][0] = mu
+                results[t][1] = V
+
+        # carry out backwards smoothing
+
+        W = np.dot((np.eye(order) - np.dot(kalman_gain,H)),(np.dot(Phi,results[K-2][1])))
+
+        mu_s = results[K-1][0]
+        V_s = results[K-1][1]
+
+        posterior_mean = np.zeros((K,1))
+        posterior_var = np.zeros((K,1))
+        E = np.zeros((K,4), dtype='object')
+
+        posterior_mean[K-1] = np.dot(H, mu_s)
+        posterior_var[K-1] = np.dot(np.dot(H, V_s), H.T)
+        E[K-1][0] = mu_s
+        E[K-1][1] = V_s
+
+        for t in range(K-2, -1, -1):
+            mu = results[t][0]
+            V = results[t][1]
+            Phi = results[t][2]
+            P = results[t][3]
+            
+            L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(order))) # forward substitution
+            mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu))
+            V_s = V + np.dot(np.dot(L, V_s - P), L.T)
+            posterior_mean[t] = np.dot(H, mu_s)
+            posterior_var[t] = np.dot(np.dot(H, V_s), H.T)
+
+            if (t < K-2):
+                W = np.dot(results[t+1][1], L.T) + np.dot(E[t+1][2], np.dot(W - np.dot(results[t+1][2], results[t+1][1]), L.T))
+
+            E[t][0] = mu_s
+            E[t][1] = V_s
+            E[t][2] = L
+            E[t][3] = W
+
+        return SsmPosterior(mu_f=results[:,0], V_f=results[:,1], mu_s=E[:,0], V_s=E[:,1], expectations=E), log_likelihood
--- a/GPy/inference/latent_function_inference/grid_posterior.py
+++ b/GPy/inference/latent_function_inference/grid_posterior.py
@ -0,0 +1,62 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+import numpy as np
+
+class GridPosterior(object):
+    """
+    Specially intended for the Grid Regression case
+    An object to represent a Gaussian posterior over latent function values, p(f|D).
+
+    The purpose of this class is to serve as an interface between the inference
+    schemes and the model classes.
+
+    """
+    def __init__(self, alpha_kron=None, QTs=None, Qs=None, V_kron=None):
+        """
+        alpha_kron : 
+        QTs : transpose of eigen vectors resulting from decomposition of single dimension covariance matrices
+        Qs : eigen vectors resulting from decomposition of single dimension covariance matrices
+        V_kron : kronecker product of eigenvalues reulting decomposition of single dimension covariance matrices
+        """
+
+        if ((alpha_kron is not None) and (QTs is not None) 
+            and (Qs is not None) and (V_kron is not None)):
+            pass # we have sufficient to compute the posterior
+        else:
+            raise ValueError("insufficient information for predictions")
+
+        self._alpha_kron = alpha_kron
+        self._qTs = QTs
+        self._qs = Qs
+        self._v_kron = V_kron
+
+    @property
+    def alpha(self):
+        """
+        """
+        return self._alpha_kron
+
+    @property
+    def QTs(self):
+        """
+        array of transposed eigenvectors resulting for single dimension covariance
+        """
+        return self._qTs
+
+    @property
+    def Qs(self):
+        """
+        array of eigenvectors resulting for single dimension covariance
+        """
+        return self._qs
+
+    @property
+    def V_kron(self):
+        """
+        kronecker product of eigenvalues s
+        """
+        return self._v_kron
+    
--- a/GPy/inference/latent_function_inference/ssm_posterior.py
+++ b/GPy/inference/latent_function_inference/ssm_posterior.py
@ -0,0 +1,73 @@
+# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Kurt Cutajar
+
+import numpy as np
+
+class SsmPosterior(object):
+    """
+    Specially intended for the SSM Regression case
+    An object to represent a Gaussian posterior over latent function values, p(f|D).
+
+    The purpose of this class is to serve as an interface between the inference
+    schemes and the model classes.
+
+    """
+    def __init__(self, mu_f = None, V_f=None, mu_s=None, V_s=None, expectations=None):
+        """
+        mu_f : mean values predicted during kalman filtering step
+        var_f : variance predicted during the kalman filtering step
+        mu_s : mean values predicted during backwards smoothing step
+        var_s : variance predicted during backwards smoothing step
+        expectations : posterior expectations
+        """
+
+        if ((mu_f is not None) and (V_f is not None) and
+            (mu_s is not None) and (V_s is not None) and 
+            (expectations is not None)):
+            pass # we have sufficient to compute the posterior
+        else:
+            raise ValueError("insufficient information to compute predictions")
+
+        self._mu_f = mu_f
+        self._V_f = V_f
+        self._mu_s = mu_s
+        self._V_s = V_s
+        self._expectations = expectations
+
+    @property
+    def mu_f(self):
+        """
+        Mean values predicted during kalman filtering step mean
+        """
+        return self._mu_f
+
+    @property
+    def V_f(self):
+        """
+        Variance predicted during the kalman filtering step
+        """
+        return self._V_f
+
+    @property
+    def mu_s(self):
+        """
+        Mean values predicted during kalman backwards smoothin mean
+        """
+        return self._mu_s
+
+    @property
+    def V_s(self):
+        """
+        Variance predicted during backwards smoothing step
+        """
+        return self._V_s
+
+    @property
+    def expectations(self):
+        """
+        Posterior expectations
+        """
+        return self._expectations
+