very weird merge conflict, including in files that I did not change

2026-05-30 14:35:15 +02:00 · 2014-03-18 16:46:37 +00:00 · 2014-03-18 16:46:37 +00:00 · 601175de2d
commit 601175de2d
parent a6eae08934 a3287c38ea
73 changed files with 2234 additions and 1567 deletions
--- a/GPy/likelihoods/init.py
+++ b/GPy/likelihoods/init.py
@ -5,3 +5,4 @@ from gamma import Gamma
 from poisson import Poisson
 from student_t import StudentT
 from likelihood import Likelihood
+from mixed_noise import MixedNoise
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@ -5,6 +5,7 @@ import numpy as np
 from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
 import link_functions
 from likelihood import Likelihood
+from scipy import stats

 class Bernoulli(Likelihood):
    """
@ -43,7 +44,7 @@ class Bernoulli(Likelihood):
        Y_prep[Y.flatten() == 0] = -1
        return Y_prep

-    def moments_match_ep(self, data_i, tau_i, v_i):
+    def moments_match_ep(self, Y_i, tau_i, v_i):
        """
        Moments match of the marginal approximation in EP algorithm

@ -51,9 +52,9 @@ class Bernoulli(Likelihood):
        :param tau_i: precision of the cavity distribution (float)
        :param v_i: mean/variance of the cavity distribution (float)
        """
-        if data_i == 1:
+        if Y_i == 1:
            sign = 1.
-        elif data_i == 0:
+        elif Y_i == 0:
            sign = -1
        else:
            raise ValueError("bad value for Bernouilli observation (0, 1)")
@ -76,7 +77,7 @@ class Bernoulli(Likelihood):

        return Z_hat, mu_hat, sigma2_hat

-    def predictive_mean(self, mu, variance):
+    def predictive_mean(self, mu, variance, Y_metadata=None):

        if isinstance(self.gp_link, link_functions.Probit):
            return stats.norm.cdf(mu/np.sqrt(1+variance))
@ -87,13 +88,12 @@ class Bernoulli(Likelihood):
        else:
            raise NotImplementedError

-    def predictive_variance(self, mu, variance, pred_mean):
+    def predictive_variance(self, mu, variance, pred_mean, Y_metadata=None):

        if isinstance(self.gp_link, link_functions.Heaviside):
            return 0.
        else:
            return np.nan
-            #raise NotImplementedError

    def pdf_link(self, link_f, y, Y_metadata=None):
        """
@ -212,7 +212,7 @@ class Bernoulli(Likelihood):
        np.seterr(**state)
        return d3logpdf_dlink3

-    def samples(self, gp):
+    def samples(self, gp, Y_metadata=None):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 #TODO
 """
-A lot of this code assumes that the link function is the identity. 
+A lot of this code assumes that the link function is the identity.

 I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity.

@ -18,6 +18,7 @@ import link_functions
 from likelihood import Likelihood
 from ..core.parameterization import Param
 from ..core.parameterization.transformations import Logexp
+from scipy import stats

 class Gaussian(Likelihood):
    """
@ -49,11 +50,18 @@ class Gaussian(Likelihood):
        if isinstance(gp_link, link_functions.Identity):
            self.log_concave = True

-    def covariance_matrix(self, Y, Y_metadata=None):
-        return np.eye(Y.shape[0]) * self.variance
+    def betaY(self,Y,Y_metadata=None):
+        #TODO: ~Ricardo this does not live here
+        return Y/self.gaussian_variance(Y_metadata)

-    def update_gradients(self, partial):
-        self.variance.gradient = np.sum(partial)
+    def gaussian_variance(self, Y_metadata=None):
+        return self.variance
+
+    def update_gradients(self, grad):
+        self.variance.gradient = grad
+
+    def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
+        return dL_dKdiag.sum()

    def _preprocess_values(self, Y):
        """
@ -76,16 +84,12 @@ class Gaussian(Likelihood):
        Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
        return Z_hat, mu_hat, sigma2_hat

-    def predictive_values(self, mu, var, full_cov=False):
+    def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
        if full_cov:
            var += np.eye(var.shape[0])*self.variance
-            d = 2*np.sqrt(np.diag(var))
-            low, up = mu - d, mu + d
        else:
            var += self.variance
-            d = 2*np.sqrt(var)
-            low, up = mu - d, mu + d
-        return mu, var, low, up
+        return mu, var

    def predictive_mean(self, mu, sigma):
        return mu
@ -93,7 +97,14 @@ class Gaussian(Likelihood):
    def predictive_variance(self, mu, sigma, predictive_mean=None):
        return self.variance + sigma**2

+<<<<<<< HEAD
    def pdf_link(self, link_f, y, Y_metadata=None):
+=======
+    def predictive_quantiles(self, mu, var, quantiles, Y_metadata):
+        return  [stats.norm.ppf(q/100.)*np.sqrt(var) + mu for q in quantiles]
+
+    def pdf_link(self, link_f, y, extra_data=None):
+>>>>>>> a3287c38ea775155df4e90f7fe1883d12ffb54b9
        """
        Likelihood function given link(f)

@ -292,7 +303,7 @@ class Gaussian(Likelihood):
        """
        return self.variance

-    def samples(self, gp):
+    def samples(self, gp, Y_metadata=None):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

@ -300,6 +311,8 @@ class Gaussian(Likelihood):
        """
        orig_shape = gp.shape
        gp = gp.flatten()
+        #orig_shape = gp.shape
+        gp = gp.flatten()
        Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
        return Ysim.reshape(orig_shape)

--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@ -58,6 +58,18 @@ class Likelihood(Parameterized):
        """
        return Y

+    def conditional_mean(self, gp):
+        """
+        The mean of the random variable conditioned on one value of the GP
+        """
+        raise NotImplementedError
+
+    def conditional_variance(self, gp):
+        """
+        The variance of the random variable conditioned on one value of the GP
+        """
+        raise NotImplementedError
+
    def log_predictive_density(self, y_test, mu_star, var_star):
        """
        Calculation of the log predictive density
@ -120,7 +132,7 @@ class Likelihood(Parameterized):

        return z, mean, variance

-    def _predictive_mean(self, mu, variance):
+    def predictive_mean(self, mu, variance, Y_metadata=None):
        """
        Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )

@ -128,8 +140,14 @@ class Likelihood(Parameterized):
        :param sigma: standard deviation of posterior

        """
+        #conditional_mean: the edpected value of y given some f, under this likelihood
        def int_mean(f,m,v):
-            return self._mean(f)*np.exp(-(0.5/v)*np.square(f - m))
+            p = np.exp(-(0.5/v)*np.square(f - m))
+            #If p is zero then conditional_mean will overflow
+            if p < 1e-10:
+                return 0.
+            else:
+                return self.conditional_mean(f)*p
        scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
        mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))

@ -139,7 +157,7 @@ class Likelihood(Parameterized):
        """Quadrature calculation of the conditional mean: E(Y_star|f)"""
        raise NotImplementedError, "implement this function to make predictions"

-    def _predictive_variance(self,mu,variance,predictive_mean=None):
+    def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
        """
        Numerical approximation to the predictive variance: V(Y_star)

@ -156,7 +174,12 @@ class Likelihood(Parameterized):

        # E( V(Y_star|f_star) )
        def int_var(f,m,v):
-            return self._variance(f)*np.exp(-(0.5/v)*np.square(f - m))
+            p = np.exp(-(0.5/v)*np.square(f - m))
+            #If p is zero then conditional_variance will overflow
+            if p < 1e-10:
+                return 0.
+            else:
+                return self.conditional_variance(f)*p
        scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
        exp_var = np.array(scaled_exp_variance)[:,None] / normalizer

@ -169,13 +192,20 @@ class Likelihood(Parameterized):

        #E( E(Y_star|f_star)**2 )
        def int_pred_mean_sq(f,m,v,predictive_mean_sq):
-            return self._mean(f)**2*np.exp(-(0.5/v)*np.square(f - m))
+            p = np.exp(-(0.5/v)*np.square(f - m))
+            #If p is zero then conditional_mean**2 will overflow
+            if p < 1e-10:
+                return 0.
+            else:
+                return self.conditional_mean(f)**2*p
+
        scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
        exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer

        var_exp = exp_exp2 - predictive_mean_sq

-        # V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
+        # V(Y_star) = E[ V(Y_star|f_star) ] + V[ E(Y_star|f_star) ]
+        # V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
        return exp_var + var_exp

    def pdf_link(self, link_f, y, Y_metadata=None):
@ -362,18 +392,33 @@ class Likelihood(Parameterized):

        return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta

-    def predictive_values(self, mu, var):
+    def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
        """
        Compute  mean, variance of the  predictive distibution.

        :param mu: mean of the latent variable, f, of posterior
        :param var: variance of the latent variable, f, of posterior
+        :param full_cov: whether to use the full covariance or just the diagonal
+        :type full_cov: Boolean
        """
-        pred_mean = self.predictive_mean(mu, var)
-        pred_var = self.predictive_variance(mu, var, pred_mean)
+
+        pred_mean = self.predictive_mean(mu, var, Y_metadata)
+        pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata)
+
        return pred_mean, pred_var

-    def samples(self, gp):
+    def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
+        #compute the quantiles by sampling!!!
+        N_samp = 1000
+        s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
+        #ss_f = s.flatten()
+        #ss_y = self.samples(ss_f, Y_metadata)
+        ss_y = self.samples(s, Y_metadata)
+        #ss_y = ss_y.reshape(mu.shape[0], N_samp)
+
+        return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
+
+    def samples(self, gp, Y_metadata=None):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

--- a/GPy/likelihoods/link_functions.py
+++ b/GPy/likelihoods/link_functions.py
@ -6,6 +6,9 @@ from scipy import stats
 import scipy as sp
 from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf

+_exp_lim_val = np.finfo(np.float64).max
+_lim_val = np.log(_exp_lim_val)
+
 class GPTransformation(object):
    """
    Link function class for doing non-Gaussian likelihoods approximation
@ -92,16 +95,16 @@ class Log(GPTransformation):

    """
    def transf(self,f):
-        return np.exp(f)
+        return np.exp(np.clip(f, -_lim_val, _lim_val))

    def dtransf_df(self,f):
-        return np.exp(f)
+        return np.exp(np.clip(f, -_lim_val, _lim_val))

    def d2transf_df2(self,f):
-        return np.exp(f)
+        return np.exp(np.clip(f, -_lim_val, _lim_val))

    def d3transf_df3(self,f):
-        return np.exp(f)
+        return np.exp(np.clip(f, -_lim_val, _lim_val))

 class Log_ex_1(GPTransformation):
    """
--- a/GPy/likelihoods/mixed_noise.py
+++ b/GPy/likelihoods/mixed_noise.py
@ -0,0 +1,87 @@
+import numpy as np
+from scipy import stats, special
+from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
+import link_functions
+from likelihood import Likelihood
+from gaussian import Gaussian
+from ..core.parameterization import Param
+from ..core.parameterization.transformations import Logexp
+from ..core.parameterization import Parameterized
+import itertools
+
+class MixedNoise(Likelihood):
+    def __init__(self, likelihoods_list, name='mixed_noise'):
+
+        super(Likelihood, self).__init__(name=name)
+
+        self.add_parameters(*likelihoods_list)
+        self.likelihoods_list = likelihoods_list
+        self.log_concave = False
+
+    def gaussian_variance(self, Y_metadata):
+        assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
+        ind = Y_metadata['output_index'].flatten()
+        variance = np.zeros(ind.size)
+        for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))):
+            variance[ind==j] = lik.variance
+        return variance[:,None]
+
+    def betaY(self,Y,Y_metadata):
+        return Y/self.gaussian_variance(Y_metadata=Y_metadata)
+
+    def update_gradients(self, gradients):
+        self.gradient = gradients
+
+    def exact_inference_gradients(self, dL_dKdiag, Y_metadata):
+        assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
+        ind = Y_metadata['output_index'].flatten()
+        return np.array([dL_dKdiag[ind==i].sum() for i in range(len(self.likelihoods_list))])
+
+    def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
+        if all([isinstance(l, Gaussian) for l in self.likelihoods_list]):
+            ind = Y_metadata['output_index'].flatten()
+            _variance = np.array([self.likelihoods_list[j].variance for j in ind ])
+            if full_cov:
+                var += np.eye(var.shape[0])*_variance
+            else:
+                var += _variance
+            return mu, var
+        else:
+            raise NotImplementedError
+
+    def predictive_variance(self, mu, sigma, **other_shit):
+        if isinstance(noise_index,int):
+            _variance = self.variance[noise_index]
+        else:
+            _variance = np.array([ self.variance[j] for j in noise_index ])[:,None]
+        return _variance + sigma**2
+
+
+    def covariance_matrix(self, Y, Y_metadata):
+        #assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
+        #ind = Y_metadata['output_index'].flatten()
+        #variance = np.zeros(Y.shape[0])
+        #for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))):
+        #    variance[ind==j] = lik.variance
+        #return np.diag(variance)
+        return np.diag(self.gaussian_variance(Y_metadata).flatten())
+
+
+    def samples(self, gp, Y_metadata):
+        """
+        Returns a set of samples of observations based on a given value of the latent variable.
+
+        :param gp: latent variable
+        """
+        N1, N2 = gp.shape
+        Ysim = np.zeros((N1,N2))
+        ind = Y_metadata['output_index'].flatten()
+        for j in np.unique(ind):
+            flt = ind==j
+            gp_filtered = gp[flt,:]
+            n1 = gp_filtered.shape[0]
+            lik = self.likelihoods_list[j]
+            _ysim = np.array([np.random.normal(lik.gp_link.transf(gpj), scale=np.sqrt(lik.variance), size=1) for gpj in gp_filtered.flatten()])
+            Ysim[flt,:] = _ysim.reshape(n1,N2)
+        return Ysim
+
--- a/GPy/likelihoods/poisson.py
+++ b/GPy/likelihoods/poisson.py
@ -21,7 +21,7 @@ class Poisson(Likelihood):
    """
    def __init__(self, gp_link=None):
        if gp_link is None:
-            gp_link = link_functions.Log_ex_1()
+            gp_link = link_functions.Log()

        super(Poisson, self).__init__(gp_link, name='Poisson')

@ -134,7 +134,19 @@ class Poisson(Likelihood):
        d3lik_dlink3 = 2*y/(link_f)**3
        return d3lik_dlink3

-    def samples(self, gp):
+    def conditional_mean(self,gp):
+        """
+        The mean of the random variable conditioned on one value of the GP
+        """
+        return self.gp_link.transf(gp)
+
+    def conditional_variance(self,gp):
+        """
+        The variance of the random variable conditioned on one value of the GP
+        """
+        return self.gp_link.transf(gp)
+
+    def samples(self, gp, Y_metadata=None):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

--- a/GPy/likelihoods/student_t.py
+++ b/GPy/likelihoods/student_t.py
@ -9,6 +9,7 @@ from scipy import stats, integrate
 from scipy.special import gammaln, gamma
 from likelihood import Likelihood
 from ..core.parameterization import Param
+from ..core.parameterization.transformations import Logexp

 class StudentT(Likelihood):
    """
@ -26,7 +27,7 @@ class StudentT(Likelihood):

        super(StudentT, self).__init__(gp_link, name='Student_T')

-        self.sigma2 = Param('t_noise', float(sigma2))
+        self.sigma2 = Param('t_noise', float(sigma2), Logexp())
        self.v = Param('deg_free', float(deg_free))
        self.add_parameter(self.sigma2)
        self.add_parameter(self.v)
@ -37,7 +38,7 @@ class StudentT(Likelihood):
    def parameters_changed(self):
        self.variance = (self.v / float(self.v - 2)) * self.sigma2

-    def update_gradients(self, derivatives):
+    def update_gradients(self, grads):
        """
        Pull out the gradients, be careful as the order must match the order
        in which the parameters are added
@ -244,33 +245,33 @@ class StudentT(Likelihood):
        d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
        return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))

-    def predictive_variance(self, mu, sigma, predictive_mean=None):
+    def predictive_mean(self, mu, sigma, Y_metadata=None):
        """
-        Compute predictive variance of student_t*normal p(y*|f*)p(f*)
-
-        Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
-        (((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
-        *((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
+        Compute mean of the prediction
        """
+        return self.gp_link.transf(mu) # only true in link is monotoci, which it is.

-        #FIXME: Not correct
-        #We want the variance around test points y which comes from int p(y*|f*)p(f*) df*
-        #Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)]
-        #Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this
-        #Which was also given to us as (var)
-        #We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution
-        #However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom
-        true_var = 1/(1/sigma**2 + 1/self.variance)
+    def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
+        if self.deg_free <2.:
+            return np.empty(mu.shape)*np.nan #not defined for small degress fo freedom
+        else:
+            return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)

-        return true_var
+    def conditional_mean(self, gp):
+        return self.gp_link.transf(gp)

+<<<<<<< HEAD
    def predictive_mean(self, mu, sigma):
        """
        Compute mean of the prediction
        """
        return mu
+=======
+    def conditional_variance(self, gp):
+        return self.deg_free/(self.deg_free - 2.)
+>>>>>>> a3287c38ea775155df4e90f7fe1883d12ffb54b9

-    def samples(self, gp):
+    def samples(self, gp, Y_metadata=None):
        """
        Returns a set of samples of observations based on a given value of the latent variable.