Merge branch 'updates' into devel

2026-05-12 21:42:39 +02:00 · 2015-09-02 11:23:35 +01:00 · 2015-09-02 11:23:35 +01:00 · 016b3a9965
commit 016b3a9965
parent ce9ee6c758 70a9a26d7e
15 changed files with 366 additions and 65 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -106,6 +106,13 @@ class GP(Model):
        self.link_parameter(self.likelihood)
        self.posterior = None

+        # The predictive variable to be used to predict using the posterior object's
+        # woodbury_vector and woodbury_inv is defined as predictive_variable
+        # This is usually just a link to self.X (full GP) or self.Z (sparse GP).
+        # Make sure to name this variable and the predict functions will "just work"
+        # as long as the posterior has the right woodbury entries.
+        self._predictive_variable = self.X
+

    def set_XY(self, X=None, Y=None, trigger_update=True):
        """
@ -209,6 +216,7 @@ class GP(Model):
                var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
            elif self.posterior.woodbury_inv.ndim == 3:
                var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
+                from ..util.linalg import mdot
                for i in range(var.shape[2]):
                    var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
            var = var
@ -304,6 +312,103 @@ class GP(Model):
        return dmu_dX, dv_dX


+    def predict_jacobian(self, Xnew, kern=None, full_cov=True):
+        """
+        Compute the derivatives of the posterior of the GP.
+
+        Given a set of points at which to predict X* (size [N*,Q]), compute the
+        mean and variance of the derivative. Resulting arrays are sized:
+
+         dL_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
+          Note that this is the mean and variance of the derivative,
+          not the derivative of the mean and variance! (See predictive_gradients for that)
+
+         dv_dX*  -- [N*, Q],    (since all outputs have the same variance)
+          If there is missing data, it is not implemented for now, but
+          there will be one output variance per output dimension.
+
+        :param X: The points at which to get the predictive gradients.
+        :type X: np.ndarray (Xnew x self.input_dim)
+        :param kern: The kernel to compute the jacobian for.
+        :param boolean full_cov: whether to return the full covariance of the jacobian.
+
+        :returns: dmu_dX, dv_dX
+        :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ]
+
+        Note: We always return sum in input_dim gradients, as the off-diagonals
+        in the input_dim are not needed for further calculations.
+        This is a compromise for increase in speed. Mathematically the jacobian would
+        have another dimension in Q.
+        """
+        if kern is None:
+            kern = self.kern
+
+        mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
+
+        for i in range(self.output_dim):
+            mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)
+
+        dK_dXnew_full = np.empty((self._predictive_variable.shape[0], Xnew.shape[0], Xnew.shape[1]))
+        for i in range(self._predictive_variable.shape[0]):
+            dK_dXnew_full[i] = kern.gradients_X([[1.]], Xnew, self._predictive_variable[[i]])
+
+        def compute_cov_inner(wi):
+            if full_cov:
+                # full covariance gradients:
+                dK2_dXdX = kern.gradients_XX([[1.]], Xnew)
+                var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
+            else:
+                dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew)
+                var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
+            return var_jac
+
+        if self.posterior.woodbury_inv.ndim == 3:
+            var_jac = []
+            for d in range(self.posterior.woodbury_inv.shape[2]):
+                var_jac.append(compute_cov_inner(self.posterior.woodbury_inv[:, :, d]))
+            var_jac = np.concatenate(var_jac)
+        else:
+            var_jac = compute_cov_inner(self.posterior.woodbury_inv)
+        return mean_jac, var_jac
+
+    def predict_wishard_embedding(self, Xnew, kern=None):
+        """
+        Predict the wishard embedding G of the GP. This is the density of the
+        input of the GP defined by the probabilistic function mapping f.
+        G = J_mean.T*J_mean + output_dim*J_cov.
+
+        :param array-like Xnew: The points at which to evaluate the magnification.
+        :param :py:class:`~GPy.kern.Kern` kern: The kernel to use for the magnification.
+
+        Supplying only a part of the learning kernel gives insights into the density
+        of the specific kernel part of the input function. E.g. one can see how dense the
+        linear part of a kernel is compared to the non-linear part etc.
+        """
+        if kern is None:
+            kern = self.kern
+
+        mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False)
+        mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
+        if var_jac.ndim == 3:
+            Sigma = np.einsum('iqd,ipd->iqp', var_jac, var_jac)
+            G = mumuT + Sigma
+        else:
+            Sigma = np.einsum('iq,ip->iqp', var_jac, var_jac)
+            G = mumuT + self.output_dim*Sigma
+        return G
+
+    def predict_magnification(self, Xnew, kern=None):
+        """
+        Predict the magnification factor as
+
+        sqrt(det(G))
+
+        for each point N in Xnew
+        """
+        from ..util.linalg import jitchol
+        G = self.predict_wishard_embedding(Xnew, kern)
+        return np.array([2*np.sqrt(np.exp(np.sum(np.log(np.diag(jitchol(G[n, :, :])))))) for n in range(Xnew.shape[0])])
+
    def posterior_samples_f(self,X,size=10, full_cov=True):
        """
        Samples the posterior GP at the points X.