Sped up sampling a lot for student t, bernoulli and poisson,

added sampling for gaussian and exponential (untested)
2026-06-11 15:15:15 +02:00 · 2013-10-28 16:47:17 +00:00 · 2013-10-28 16:47:17 +00:00 · 11ee480cbf
commit 11ee480cbf
parent 9a32c5edda
7 changed files with 31 additions and 27 deletions
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@ -123,25 +123,6 @@ def student_t_approx():

    return m

-    #with a student t distribution, since it has heavy tails it should work well
-    #likelihood_function = student_t(deg_free=deg_free, sigma2=real_var)
-    #lap = Laplace(Y, likelihood_function)
-    #cov = kernel.K(X)
-    #lap.fit_full(cov)
-
-    #test_range = np.arange(0, 10, 0.1)
-    #plt.plot(test_range, t_rv.pdf(test_range))
-    #for i in xrange(X.shape[0]):
-        #mode = lap.f_hat[i]
-        #covariance = lap.hess_hat_i[i,i]
-        #scaling = np.exp(lap.ln_z_hat)
-        #normalised_approx = norm(loc=mode, scale=covariance)
-        #print "Normal with mode %f, and variance %f" % (mode, covariance)
-        #plt.plot(test_range, scaling*normalised_approx.pdf(test_range))
-    #plt.show()
-
-    return m
-
 def boston_example():
    import sklearn
    from sklearn.cross_validation import KFold
--- a/GPy/likelihoods/noise_models/bernoulli_noise.py
+++ b/GPy/likelihoods/noise_models/bernoulli_noise.py
@ -207,10 +207,10 @@ class Bernoulli(NoiseDistribution):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

-        :param size: number of samples to compute
        :param gp: latent variable
        """
        orig_shape = gp.shape
        gp = gp.flatten()
-        Ysim = np.array([np.random.binomial(1,self.gp_link.transf(gpj),size=1) for gpj in gp])
+        ns = np.ones_like(gp, dtype=int)
+        Ysim = np.random.binomial(ns, self.gp_link.transf(gp))
        return Ysim.reshape(orig_shape)
--- a/GPy/likelihoods/noise_models/exponential_noise.py
+++ b/GPy/likelihoods/noise_models/exponential_noise.py
@ -143,3 +143,14 @@ class Exponential(NoiseDistribution):
        Mass (or density) function
        """
        return self.gp_link.transf(gp)**2
+
+    def samples(self, gp):
+        """
+        Returns a set of samples of observations based on a given value of the latent variable.
+
+        :param gp: latent variable
+        """
+        orig_shape = gp.shape
+        gp = gp.flatten()
+        Ysim = np.random.exponential(1.0/self.gp_link.transf(gp))
+        return Ysim.reshape(orig_shape)
--- a/GPy/likelihoods/noise_models/gaussian_noise.py
+++ b/GPy/likelihoods/noise_models/gaussian_noise.py
@ -285,3 +285,14 @@ class Gaussian(NoiseDistribution):
            Var_{p(y|f)}[y]
        """
        return self.variance
+
+    def samples(self, gp):
+        """
+        Returns a set of samples of observations based on a given value of the latent variable.
+
+        :param gp: latent variable
+        """
+        orig_shape = gp.shape
+        gp = gp.flatten()
+        Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
+        return Ysim.reshape(orig_shape)
--- a/GPy/likelihoods/noise_models/noise_distributions.py
+++ b/GPy/likelihoods/noise_models/noise_distributions.py
@ -375,7 +375,7 @@ class NoiseDistribution(object):
        assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names())
        return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta

-    def predictive_values(self, mu, var, full_cov=False, num_samples=5000,
+    def predictive_values(self, mu, var, full_cov=False, num_samples=30000,
                          sampling=False):
        """
        Compute  mean, variance and conficence interval (percentiles 5 and 95) of the  prediction.
--- a/GPy/likelihoods/noise_models/poisson_noise.py
+++ b/GPy/likelihoods/noise_models/poisson_noise.py
@ -144,10 +144,9 @@ class Poisson(NoiseDistribution):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

-        :param size: number of samples to compute
        :param gp: latent variable
        """
        orig_shape = gp.shape
        gp = gp.flatten()
-        Ysim = np.array([np.random.poisson(self.gp_link.transf(gpj),size=1) for gpj in gp])
+        Ysim = np.random.poisson(self.gp_link.transf(gp))
        return Ysim.reshape(orig_shape)
--- a/GPy/likelihoods/noise_models/student_t_noise.py
+++ b/GPy/likelihoods/noise_models/student_t_noise.py
@ -269,7 +269,9 @@ class StudentT(NoiseDistribution):
        gp = gp.flatten()
        #FIXME: Very slow as we are computing a new random variable per input!
        #Can't get it to sample all at the same time
-        student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp])
-        #student_t_samples = stats.t.rvs(self.v, loc=self.gp_link.transf(gp),
-                                        #scale=np.sqrt(self.sigma2))
+        #student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp])
+        dfs = np.ones_like(gp)*self.v
+        scales = np.ones_like(gp)*np.sqrt(self.sigma2)
+        student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp),
+                                        scale=scales)
        return student_t_samples.reshape(orig_shape)