Merge branch 'updates' into devel

2026-05-27 14:25:16 +02:00 · 2015-09-02 11:23:35 +01:00 · 2015-09-02 11:23:35 +01:00 · 016b3a9965
commit 016b3a9965
parent ce9ee6c758 70a9a26d7e
15 changed files with 366 additions and 65 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -106,6 +106,13 @@ class GP(Model):
        self.link_parameter(self.likelihood)
        self.posterior = None

+        # The predictive variable to be used to predict using the posterior object's
+        # woodbury_vector and woodbury_inv is defined as predictive_variable
+        # This is usually just a link to self.X (full GP) or self.Z (sparse GP).
+        # Make sure to name this variable and the predict functions will "just work"
+        # as long as the posterior has the right woodbury entries.
+        self._predictive_variable = self.X
+

    def set_XY(self, X=None, Y=None, trigger_update=True):
        """
@ -209,6 +216,7 @@ class GP(Model):
                var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
            elif self.posterior.woodbury_inv.ndim == 3:
                var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
+                from ..util.linalg import mdot
                for i in range(var.shape[2]):
                    var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
            var = var
@ -304,6 +312,103 @@ class GP(Model):
        return dmu_dX, dv_dX


+    def predict_jacobian(self, Xnew, kern=None, full_cov=True):
+        """
+        Compute the derivatives of the posterior of the GP.
+
+        Given a set of points at which to predict X* (size [N*,Q]), compute the
+        mean and variance of the derivative. Resulting arrays are sized:
+
+         dL_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
+          Note that this is the mean and variance of the derivative,
+          not the derivative of the mean and variance! (See predictive_gradients for that)
+
+         dv_dX*  -- [N*, Q],    (since all outputs have the same variance)
+          If there is missing data, it is not implemented for now, but
+          there will be one output variance per output dimension.
+
+        :param X: The points at which to get the predictive gradients.
+        :type X: np.ndarray (Xnew x self.input_dim)
+        :param kern: The kernel to compute the jacobian for.
+        :param boolean full_cov: whether to return the full covariance of the jacobian.
+
+        :returns: dmu_dX, dv_dX
+        :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ]
+
+        Note: We always return sum in input_dim gradients, as the off-diagonals
+        in the input_dim are not needed for further calculations.
+        This is a compromise for increase in speed. Mathematically the jacobian would
+        have another dimension in Q.
+        """
+        if kern is None:
+            kern = self.kern
+
+        mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
+
+        for i in range(self.output_dim):
+            mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)
+
+        dK_dXnew_full = np.empty((self._predictive_variable.shape[0], Xnew.shape[0], Xnew.shape[1]))
+        for i in range(self._predictive_variable.shape[0]):
+            dK_dXnew_full[i] = kern.gradients_X([[1.]], Xnew, self._predictive_variable[[i]])
+
+        def compute_cov_inner(wi):
+            if full_cov:
+                # full covariance gradients:
+                dK2_dXdX = kern.gradients_XX([[1.]], Xnew)
+                var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
+            else:
+                dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew)
+                var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
+            return var_jac
+
+        if self.posterior.woodbury_inv.ndim == 3:
+            var_jac = []
+            for d in range(self.posterior.woodbury_inv.shape[2]):
+                var_jac.append(compute_cov_inner(self.posterior.woodbury_inv[:, :, d]))
+            var_jac = np.concatenate(var_jac)
+        else:
+            var_jac = compute_cov_inner(self.posterior.woodbury_inv)
+        return mean_jac, var_jac
+
+    def predict_wishard_embedding(self, Xnew, kern=None):
+        """
+        Predict the wishard embedding G of the GP. This is the density of the
+        input of the GP defined by the probabilistic function mapping f.
+        G = J_mean.T*J_mean + output_dim*J_cov.
+
+        :param array-like Xnew: The points at which to evaluate the magnification.
+        :param :py:class:`~GPy.kern.Kern` kern: The kernel to use for the magnification.
+
+        Supplying only a part of the learning kernel gives insights into the density
+        of the specific kernel part of the input function. E.g. one can see how dense the
+        linear part of a kernel is compared to the non-linear part etc.
+        """
+        if kern is None:
+            kern = self.kern
+
+        mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False)
+        mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
+        if var_jac.ndim == 3:
+            Sigma = np.einsum('iqd,ipd->iqp', var_jac, var_jac)
+            G = mumuT + Sigma
+        else:
+            Sigma = np.einsum('iq,ip->iqp', var_jac, var_jac)
+            G = mumuT + self.output_dim*Sigma
+        return G
+
+    def predict_magnification(self, Xnew, kern=None):
+        """
+        Predict the magnification factor as
+
+        sqrt(det(G))
+
+        for each point N in Xnew
+        """
+        from ..util.linalg import jitchol
+        G = self.predict_wishard_embedding(Xnew, kern)
+        return np.array([2*np.sqrt(np.exp(np.sum(np.log(np.diag(jitchol(G[n, :, :])))))) for n in range(Xnew.shape[0])])
+
    def posterior_samples_f(self,X,size=10, full_cov=True):
        """
        Samples the posterior GP at the points X.
--- a/GPy/core/mapping.py
+++ b/GPy/core/mapping.py
@ -32,7 +32,7 @@ class Bijective_mapping(Mapping):
    also back from f to X. The inverse mapping is called g().
    """
    def __init__(self, input_dim, output_dim, name='bijective_mapping'):
-        super(Bijective_apping, self).__init__(name=name)
+        super(Bijective_mapping, self).__init__(name=name)

    def g(self, f):
        """Inverse mapping from output domain of the function to the inputs."""
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -59,6 +59,8 @@ class SparseGP(GP):
        logger.info("Adding Z as parameter")
        self.link_parameter(self.Z, index=0)
        self.posterior = None
+        self._predictive_variable = self.Z
+

    def has_uncertain_inputs(self):
        return isinstance(self.X, VariationalPosterior)
@ -114,10 +116,10 @@ class SparseGP(GP):
        Make a prediction for the latent function values.

        For certain inputs we give back a full_cov of shape NxN,
-        if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of, 
+        if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of,
        we take only the diagonal elements across N.
-        
-        For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we 
+
+        For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we
        return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD).
        This is for both with and without missing data. See for missing data SparseGP implementation py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
        """
@ -125,7 +127,7 @@ class SparseGP(GP):
        if kern is None: kern = self.kern

        if not isinstance(Xnew, VariationalPosterior):
-            Kx = kern.K(self.Z, Xnew)
+            Kx = kern.K(self._predictive_variable, Xnew)
            mu = np.dot(Kx.T, self.posterior.woodbury_vector)
            if full_cov:
                Kxx = kern.K(Xnew)
@ -149,28 +151,28 @@ class SparseGP(GP):
            if self.mean_function is not None:
                mu += self.mean_function.f(Xnew)
        else:
-            psi0_star = kern.psi0(self.Z, Xnew)
-            psi1_star = kern.psi1(self.Z, Xnew)
+            psi0_star = kern.psi0(self._predictive_variable, Xnew)
+            psi1_star = kern.psi1(self._predictive_variable, Xnew)
            #psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
            la = self.posterior.woodbury_vector
            mu = np.dot(psi1_star, la) # TODO: dimensions?
-            
-            if full_cov: 
+
+            if full_cov:
                var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1]))
                di = np.diag_indices(la.shape[1])
-            else: 
+            else:
                var = np.empty((Xnew.shape[0], la.shape[1]))
-                
+
            for i in range(Xnew.shape[0]):
                _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
-                psi2_star = kern.psi2(self.Z, NormalPosterior(_mu, _var))
+                psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var))
                tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))

                var_ = mdot(la.T, tmp, la)
                p0 = psi0_star[i]
                t = np.atleast_3d(self.posterior.woodbury_inv)
                t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2)
-                
+
                if full_cov:
                    var_[di] += p0
                    var_[di] += -t2
--- a/GPy/core/verbose_optimization.py
+++ b/GPy/core/verbose_optimization.py
@ -146,7 +146,7 @@ class VerboseOptimization(object):
        seconds = time.time()-self.start
        #sys.stdout.write(" "*len(self.message))
        self.deltat += seconds
-        if self.deltat > .2:
+        if self.deltat > .3 or seconds < .3:
            self.print_out(seconds)
            self.deltat = 0