Added sampling for predictive quantiles and also mean and variance

where necessary
2026-07-08 16:12:15 +02:00 · 2013-10-28 15:22:06 +00:00 · 2013-10-28 15:22:06 +00:00 · 336f8e11c4
commit 336f8e11c4
parent 5a924ff5cb
4 changed files with 53 additions and 39 deletions
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -61,6 +61,7 @@ def toy_linear_1d_classification(seed=default_seed):
    #m.update_likelihood_approximation()
    # Parameters optimization:
    #m.optimize()
+    #m.update_likelihood_approximation()
    m.pseudo_EM()

    # Plot
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@ -272,11 +272,10 @@ def toy_rbf_1d_50(max_iters=100):

 def toy_poisson_rbf_1d(optimizer='bfgs', max_nb_eval_optim=100):
    """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
-    X = np.linspace(0,10)[:, None]
-    F = np.round(X*3-4)
-    F = np.where(F > 0, F, 0)
-    eps = np.random.randint(0,4, F.shape[0])[:, None]
-    Y = F + eps
+    x_len = 400
+    X = np.linspace(0, 10, x_len)[:, None]
+    f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
+    Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]

    noise_model = GPy.likelihoods.poisson()
    likelihood = GPy.likelihoods.EP(Y,noise_model)
@ -293,11 +292,10 @@ def toy_poisson_rbf_1d(optimizer='bfgs', max_nb_eval_optim=100):

 def toy_poisson_rbf_1d_laplace(optimizer='bfgs', max_nb_eval_optim=100):
    """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
-    X = np.linspace(0,10)[:, None]
-    F = np.round(X*3-4)
-    F = np.where(F > 0, F, 0)
-    eps = np.random.randint(0,4, F.shape[0])[:, None]
-    Y = F + eps
+    x_len = 30
+    X = np.linspace(0, 10, x_len)[:, None]
+    f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
+    Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]

    noise_model = GPy.likelihoods.poisson()
    likelihood = GPy.likelihoods.Laplace(Y,noise_model)
@ -309,6 +307,8 @@ def toy_poisson_rbf_1d_laplace(optimizer='bfgs', max_nb_eval_optim=100):
    m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
    # plot
    m.plot()
+    # plot the real underlying rate function
+    pb.plot(X, np.exp(f_true), '--k', linewidth=2)
    print(m)
    return m

--- a/GPy/likelihoods/laplace.py
+++ b/GPy/likelihoods/laplace.py
@ -1,4 +1,4 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 #
 #Parts of this file were influenced by the Matlab GPML framework written by
--- a/GPy/likelihoods/noise_models/noise_distributions.py
+++ b/GPy/likelihoods/noise_models/noise_distributions.py
@ -150,6 +150,8 @@ class NoiseDistribution(object):
        :param sigma: standard deviation of posterior

        """
+        #FIXME: Quadrature does not work!
+        raise NotImplementedError
        sigma2 = sigma**2
        #Compute first moment
        def int_mean(f):
@ -193,19 +195,6 @@ class NoiseDistribution(object):
        # V(Y_star | f_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
        return exp_var + var_exp

-    def _predictive_percentiles(self,p,mu,sigma):
-        """
-        Percentiles of the predictive distribution
-
-        :parm p: lower tail probability
-        :param mu: cavity distribution mean
-        :param sigma: cavity distribution standard deviation
-        :predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
-
-        """
-        qf = stats.norm.ppf(p,mu,sigma)
-        return self.gp_link.transf(qf)
-
    def pdf_link(self, link_f, y, extra_data=None):
        raise NotImplementedError

@ -386,26 +375,50 @@ class NoiseDistribution(object):
        assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names())
        return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta

-    def predictive_values(self,mu,var):
+    def predictive_values(self, mu, var, full_cov=False, num_samples=5000,
+                          sampling=False):
        """
        Compute  mean, variance and conficence interval (percentiles 5 and 95) of the  prediction.

-        :param mu: mean of the latent variable, f
-        :param var: variance of the latent variable, f
+        :param mu: mean of the latent variable, f, of posterior
+        :param var: variance of the latent variable, f, of posterior
+        :param full_cov: whether to use the full covariance or just the diagonal
+        :type full_cov: Boolean
+        :param num_samples: number of samples to use in computing quantiles and
+                            possibly mean variance
+        :type num_samples: integer
+        :param sampling: Whether to use samples for mean and variances anyway
+        :type sampling: Boolean

        """
-        if isinstance(mu,float) or isinstance(mu,int):
-            mu = [mu]
-            var = [var]
-        pred_mean = []
-        pred_var = []
-        q1 = []
-        q3 = []
-        for m,s in zip(mu,np.sqrt(var)):
-            pred_mean.append(self.predictive_mean(m,s))
-            pred_var.append(self.predictive_variance(m,s,pred_mean[-1]))
-            q1.append(self._predictive_percentiles(.025,m,s))
-            q3.append(self._predictive_percentiles(.975,m,s))
+
+        #Get gp_samples f* using posterior mean and variance
+        if not full_cov:
+            gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()),
+                                                        size=num_samples).T
+        else:
+            gp_samples = np.random.multivariate_normal(mu.flatten(), var,
+                                                           size=num_samples).T
+
+        #Push gp samples (f*) through likelihood to give p(y*|f*)
+        samples = self.samples(gp_samples)
+        axis=-1
+
+        if self.analytical_mean and not sampling:
+            pred_mean = self.predictive_mean(mu, np.sqrt(var))
+        else:
+            pred_mean = np.mean(samples, axis=axis)
+
+        if self.analytical_variance and not sampling:
+            pred_var = self.predictive_variance(mu, np.sqrt(var), pred_mean)
+        else:
+            pred_var = np.var(samples, axis=axis)
+
+        #Calculate quantiles from samples
+        q1 = np.percentile(samples, 2.5, axis=axis)
+        q3 = np.percentile(samples, 97.5, axis=axis)
+        print "WARNING: Using sampling to calculate predictive quantiles"
+
        pred_mean = np.vstack(pred_mean)
        pred_var = np.vstack(pred_var)
        q1 = np.vstack(q1)