Merge branch 'newGP' of github.com:SheffieldML/GPy into newGP

Conflicts: GPy/likelihoods/EP.py
2026-05-06 18:42:39 +02:00 · 2013-02-01 13:56:28 +00:00 · 2013-02-01 13:56:28 +00:00 · 64280d7eb6
commit 64280d7eb6
parent c025e8b68b f941d629e6
7 changed files with 136 additions and 136 deletions
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -10,6 +10,7 @@ from parameterised import parameterised, truncate_pad
 import priors
 from ..util.linalg import jitchol
 from ..inference import optimization
+from .. import likelihoods

 class model(parameterised):
    def __init__(self):
@ -401,7 +402,7 @@ class model(parameterised):
        :type optimzer: string TODO: valid strings?

        """
-        assert self.EP, "EM is not available for gaussian likelihood"
+        assert isinstance(self.likelihood,likelihoods.EP), "EM is not available for Gaussian likelihoods"
        log_change = epsilon + 1.
        self.log_likelihood_record = []
        self.gp_params_record = []
@ -410,18 +411,18 @@ class model(parameterised):
        last_value = -np.exp(1000)
        while log_change > epsilon or not iteration:
            print 'EM iteration %s' %iteration
-            self.approximate_likelihood()
+            self.update_likelihood_approximation()
            self.optimize(**kwargs)
            new_value = self.log_likelihood()
            log_change = new_value - last_value
            if log_change > epsilon:
                self.log_likelihood_record.append(new_value)
                self.gp_params_record.append(self._get_params())
-                self.ep_params_record.append((self.beta,self.Y,self.Z_ep))
+                #self.ep_params_record.append((self.beta,self.Y,self.Z_ep))
                last_value = new_value
            else:
                convergence = False
-                self.beta, self.Y,  self.Z_ep = self.ep_params_record[-1]
+                #self.beta, self.Y,  self.Z_ep = self.ep_params_record[-1]
                self._set_params(self.gp_params_record[-1])
                print "Log-likelihood decrement: %s \nLast iteration discarded." %log_change
            iteration += 1
--- a/GPy/examples/ep_fix.py
+++ b/GPy/examples/ep_fix.py
@ -3,7 +3,8 @@


 """
-Simple Gaussian Processes classification
+Simple Gaussian Processes classification 1D
+probit likelihood
 """
 import pylab as pb
 import numpy as np
@ -12,28 +13,31 @@ pb.ion()

 pb.close('all')

-model_type='Full'
-inducing=4
-"""Simple 1D classification example.
-:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
-:param seed : seed value for data generation (default is 4).
-:type seed: int
-:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
-:type inducing: int
-"""
-data = GPy.util.datasets.toy_linear_1d_classification(seed=0)
-likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
+# Inputs
+N = 30
+X1 = np.random.normal(5,2,N/2)
+X2 = np.random.normal(10,2,N/2)
+X = np.hstack([X1,X2])[:,None]

-m = GPy.models.GP(data['X'],likelihood=likelihood)
-#m = GPy.models.GP(data['X'],likelihood.Y)
+# Outputs
+Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
+
+# Kernel object
+kernel = GPy.kern.rbf(1)
+
+# Define likelihood
+distribution = GPy.likelihoods.likelihood_functions.probit()
+likelihood_object = GPy.likelihoods.EP(Y,distribution)
+
+# Model definition
+m = GPy.models.GP(X,kernel,likelihood=likelihood_object)
 m.ensure_default_constraints()
+m.update_likelihood_approximation()
+#m.checkgrad(verbose=1)
+m.optimize()
+print "Round 2"
+#rm.update_likelihood_approximation()

-# Optimize and plot
-#if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian):
-#    m.approximate_likelihood()
-#m.optimize()
-m.EM()
-
-print m.log_likelihood()
-m.plot(samples=3)
-print(m)
+#m.EPEM()
+m.plot()
+#print(m)
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@ -16,6 +16,8 @@ class EP(likelihood):
        self.likelihood_function = likelihood_function
        self.epsilon = epsilon
        self.eta, self.delta = power_ep
+        self.data = data
+        self.N = self.data.size
        self.is_heteroscedastic = True

        #Initial values - Likelihood approximation parameters:
@ -23,6 +25,24 @@ class EP(likelihood):
        self.tau_tilde = np.zeros(self.N)
        self.v_tilde = np.zeros(self.N)

+        #initial values for the GP variables
+        self.Y = np.zeros((self.N,1))
+        self.covariance_matrix = np.eye(self.N)
+        self.Z = 0
+        self.YYT = None
+
+    def predictive_values(self,mu,var):
+        return self.likelihood_function.predictive_values(mu,var)
+
+    def _get_params(self):
+        return np.zeros(0)
+    def _get_param_names(self):
+        return []
+    def _set_params(self,p):
+        pass # TODO: the EP likelihood might want to take some parameters...
+    def _gradients(self,partial):
+        return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
+
    def _compute_GP_variables(self):
        #Variables to be called from GP
        mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
@ -31,7 +51,8 @@ class EP(likelihood):
        self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep

        self.Y =  mu_tilde[:,None]
-        self.precision = self.tau_tilde[:,None]
+        self.YYT = np.dot(self.Y,self.Y.T)
+        self.precision = self.tau_tilde
        self.covariance_matrix = np.diag(1./self.precision)

    def fit_full(self,K):
@ -41,6 +62,8 @@ class EP(likelihood):
        """
        #Prior distribution parameters: p(f|X) = N(f|0,K)

+        self.tau_tilde = np.zeros(self.N)
+        self.v_tilde = np.zeros(self.N)
        #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
        self.mu = np.zeros(self.N)
        self.Sigma = K.copy()
@ -73,8 +96,9 @@ class EP(likelihood):
                #Cavity distribution parameters
                self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i]
                self.v_[i] = self.mu[i]/self.Sigma[i,i] - self.eta*self.v_tilde[i]
+                print 1./self.Sigma[i,i],self.tau_tilde[i]
                #Marginal moments
-                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
+                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
                #Site parameters update
                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma[i,i])
                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma[i,i])
@ -85,6 +109,7 @@ class EP(likelihood):
                self.Sigma = self.Sigma - Delta_tau/(1.+ Delta_tau*self.Sigma[i,i])*np.dot(si,si.T)
                self.mu = np.dot(self.Sigma,self.v_tilde)
                self.iterations += 1
+                print self.tau_tilde[i]
            #Sigma recomptutation with Cholesky decompositon
            Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
            B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
@ -105,7 +130,7 @@ class EP(likelihood):
        For nomenclature see ... 2013.
        """

-        #TODO: this doesn;t work with uncertain inputs! 
+        #TODO: this doesn;t work with uncertain inputs!

        """
        Prior approximation parameters:
@ -246,7 +271,7 @@ class EP(likelihood):
                self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i]
                self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i]
                #Marginal moments
-                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
+                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i])
                #Site parameters update
                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i])
                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i])
--- a/GPy/likelihoods/Gaussian.py
+++ b/GPy/likelihoods/Gaussian.py
@ -33,7 +33,17 @@ class Gaussian(likelihood):
        self.covariance_matrix = np.eye(self.N)*self._variance
        self.precision = 1./self._variance

-    def fit(self):
+    def predictive_values(self,mu,var):
+        """
+        Un-normalise the prediction and add the likelihood variance, then return the 5%, 95% interval
+        """
+        mean = mu*self._std + self._mean
+        true_var = (var + self._variance)*self._std**2
+        _5pc = mean + mean - 2.*np.sqrt(var)
+        _95pc = mean + 2.*np.sqrt(var)
+        return mean, _5pc, _95pc
+
+    def fit_full(self):
        """
        No approximations needed
        """
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@ -7,6 +7,7 @@ from scipy import stats
 import scipy as sp
 import pylab as pb
 from ..util.plot import gpplot
+#from . import EP

 class likelihood_function:
    """
@ -28,8 +29,6 @@ class probit(likelihood_function):
    L(x) = \\Phi (Y_i*f_i)
    $$
    """
-    def __init__(self,location=0,scale=1):
-        likelihood_function.__init__(self,Y,location,scale)

    def moments_match(self,data_i,tau_i,v_i):
        """
@ -47,24 +46,18 @@ class probit(likelihood_function):
        sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
        return Z_hat, mu_hat, sigma2_hat

-    def predictive_values(self,mu,var,all=False):
+    def predictive_values(self,mu,var):
        """
-        Compute  mean, variance, and conficence interval (percentiles 5 and 95) of the  prediction
+        Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
        """
        mu = mu.flatten()
        var = var.flatten()
        mean = stats.norm.cdf(mu/np.sqrt(1+var))
-        if all:
-            p_05 = np.zeros([mu.size])
-            p_95 = np.ones([mu.size])
-            return mean, mean*(1-mean),p_05,p_95
-        else:
-            return mean
+        p_05 = np.zeros([mu.size])
+        p_95 = np.ones([mu.size])
+        return mean, p_05, p_95

-    def _log_likelihood_gradients():
-        return np.zeros(0) # there are no parameters of whcih to compute the gradients
-
-class poisson(likelihood_function):
+class Poisson(likelihood_function):
    """
    Poisson likelihood
    Y is expected to take values in {0,1,2,...}
@ -73,10 +66,6 @@ class poisson(likelihood_function):
    L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
    $$
    """
-    def __init__(self,Y,location=0,scale=1):
-        assert len(Y[Y<0]) == 0, "Output cannot have negative values"
-        likelihood_function.__init__(self,Y,location,scale)
-
    def moments_match(self,i,tau_i,v_i):
        """
        Moments match of the marginal approximation in EP algorithm
@ -134,52 +123,12 @@ class poisson(likelihood_function):
        sigma2_hat = m2 - mu_hat**2 # Second central moment
        return float(Z_hat), float(mu_hat), float(sigma2_hat)

-    def predictive_values(self,mu,var,all=False):
+    def predictive_values(self,mu,var):
        """
-        Compute  mean, variance, and conficence interval (percentiles 5 and 95) of the  prediction
+        Compute  mean, and conficence interval (percentiles 5 and 95) of the  prediction
        """
        mean = np.exp(mu*self.scale + self.location)
-        if all:
-            tmp = stats.poisson.ppf(np.array([.05,.95]),mu)
-            p_05 = tmp[:,0]
-            p_95 = tmp[:,1]
-            return mean,mean,p_05,p_95
-        else:
-            return mean
-
-    def _log_likelihood_gradients():
-        raise NotImplementedError
-
-    def plot(self,X,mu,var,phi,X_obs,Z=None,samples=0):
-        assert X_obs.shape[1] == 1, 'Number of dimensions must be 1'
-        gpplot(X,phi,phi.flatten())
-        pb.plot(X_obs,self.Y,'kx',mew=1.5)
-        if samples:
-            phi_samples = np.vstack([np.random.poisson(phi.flatten(),phi.size) for s in range(samples)])
-            pb.plot(X,phi_samples.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
-        if Z is not None:
-            pb.plot(Z,Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
-
-class gaussian(likelihood_function):
-    """
-    Gaussian likelihood
-    Y is expected to take values in (-inf,inf)
-    """
-    def moments_match(self,i,tau_i,v_i):
-        """
-        Moments match of the marginal approximation in EP algorithm
-
-        :param i: number of observation (int)
-        :param tau_i: precision of the cavity distribution (float)
-        :param v_i: mean/variance of the cavity distribution (float)
-        """
-        mu = v_i/tau_i
-        sigma = np.sqrt(1./tau_i)
-        s = 1. if self.Y[i] == 0 else 1./self.Y[i]
-        sigma2_hat = 1./(1./sigma**2 + 1./s**2)
-        mu_hat = sigma2_hat*(mu/sigma**2 + self.Y[i]/s**2)
-        Z_hat = 1./np.sqrt(2*np.pi) * 1./np.sqrt(sigma**2+s**2) * np.exp(-.5*(mu-self.Y[i])**2/(sigma**2 + s**2))
-        return Z_hat, mu_hat, sigma2_hat
-
-    def _log_likelihood_gradients():
-        raise NotImplementedError
+        tmp = stats.poisson.ppf(np.array([.05,.95]),mu)
+        p_05 = tmp[:,0]
+        p_95 = tmp[:,1]
+        return mean,p_05,p_95
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@ -8,6 +8,7 @@ from .. import kern
 from ..core import model
 from ..util.linalg import pdinv,mdot
 from ..util.plot import gpplot, Tango
+from ..likelihoods import EP

 class GP(model):
    """
@ -52,8 +53,10 @@ class GP(model):
            self._Xstd = np.ones((1,self.X.shape[1]))

        self.likelihood = likelihood
-        assert self.X.shape[0] == self.likelihood.Y.shape[0]
-        self.N, self.D = self.likelihood.Y.shape
+        #assert self.X.shape[0] == self.likelihood.Y.shape[0]
+        #self.N, self.D = self.likelihood.Y.shape
+        assert self.X.shape[0] == self.likelihood.data.shape[0]
+        self.N, self.D = self.likelihood.data.shape

        model.__init__(self)

@ -62,7 +65,7 @@ class GP(model):
        self.likelihood._set_params(p[self.kern.Nparam:])

        self.K = self.kern.K(self.X,slices1=self.Xslices)
-        self.K += self.likelihood.variance
+        self.K += self.likelihood.covariance_matrix

        self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)

@ -87,7 +90,8 @@ class GP(model):
        For a Gaussian (or direct: TODO) likelihood, no iteration is required:
        this function does nothing
        """
-        self.likelihood.fit(self.K)
+        self.likelihood.fit_full(self.kern.K(self.X))
+        self._set_params(self._get_params()) # update the GP

    def _model_fit_term(self):
        """
@ -119,7 +123,7 @@ class GP(model):
        """
        return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X), self.likelihood._gradients(partial=self.dL_dK)))

-    def _raw_predict(self,_Xnew,slices, full_cov=False):
+    def _raw_predict(self,_Xnew,slices=None, full_cov=False):
        """
        Internal helper function for making predictions, does not account
        for normalisation or likelihood
@ -129,11 +133,11 @@ class GP(model):
        KiKx = np.dot(self.Ki,Kx)
        if full_cov:
            Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
-            var = Kxx - np.dot(KiKx.T,Kx)
+            var = Kxx - np.dot(KiKx.T,Kx) #NOTE is the shape of v right?
        else:
            Kxx = self.kern.Kdiag(_Xnew, slices=slices)
            var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
-        return mu, var
+        return mu, var[:,None]


    def predict(self,Xnew, slices=None, full_cov=False):
@ -166,29 +170,15 @@ class GP(model):
        mu, var = self._raw_predict(Xnew, slices, full_cov=full_cov)

        #now push through likelihood TODO
+        mean, _5pc, _95pc = self.likelihood.predictive_values(mu, var)

        return mean, _5pc, _95pc

-    def raw_plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
+
+    def _x_frame(self,plot_limits=None,which_data='all',which_functions='all',resolution=None):
        """
-        Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
-
-        :param samples: the number of a posteriori samples to plot
-        :param which_data: which if the training data to plot (default all)
-        :type which_data: 'all' or a slice object to slice self.X, self.Y
-        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
-        :param which_functions: which of the kernel functions to plot (additively)
-        :type which_functions: list of bools
-        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
-
-        Plot the posterior of the GP.
-          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
-          - In two dimsensions, a contour-plot shows the mean predicted function
-          - In higher dimensions, we've no implemented this yet !TODO!
-
-        Can plot only part of the data and part of the posterior functions using which_data and which_functions
+        Internal helper function for making plots, return a set of new input values to plot as well as lower and upper limits
        """
-
        if which_functions=='all':
            which_functions = [True]*self.kern.Nparts
        if which_data=='all':
@ -207,28 +197,47 @@ class GP(model):

        if self.X.shape[1]==1:
            Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
-            m,v = self._raw_predict(Xnew,slices=which_functions,full_cov=False)
-            lower, upper = m.flatten() - 2.*np.sqrt(v) , m.flatten()+ 2.*np.sqrt(v)
-            gpplot(Xnew,m,lower,upper)
-            pb.plot(X,Y,'kx',mew=1.5)
-            pb.xlim(xmin,xmax)
        elif self.X.shape[1]==2:
            resolution = resolution or 50
            xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
-            Xtest = np.vstack((xx.flatten(),yy.flatten())).T
-            zz,vv = self._raw_predict(Xtest,slices=which_functions,full_cov=False)
-            zz = zz.reshape(resolution,resolution)
-            pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
-            pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
-            pb.xlim(xmin[0],xmax[0])
-            pb.ylim(xmin[1],xmax[1])
-
+            Xnew = np.vstack((xx.flatten(),yy.flatten())).T
        else:
            raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
+        return Xnew, xmin, xmax

-    def plot(self):
+    def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
+        """
+        Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
+
+        :param samples: the number of a posteriori samples to plot
+        :param which_data: which if the training data to plot (default all)
+        :type which_data: 'all' or a slice object to slice self.X, self.Y
+        :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
+        :param which_functions: which of the kernel functions to plot (additively)
+        :type which_functions: list of bools
+        :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
+
+        Plot the posterior of the GP.
+          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
+          - In two dimsensions, a contour-plot shows the mean predicted function
+          - In higher dimensions, we've no implemented this yet !TODO!
+
+        Can plot only part of the data and part of the posterior functions using which_data and which_functions
+        """
        """
        Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
        """
-        pass# TODO!!!!!
+        Xnew, xmin, xmax = self._x_frame()
+        m,v = self._raw_predict(Xnew)
+        if isinstance(self.likelihood,EP):
+            pb.subplot(211)
+        gpplot(Xnew,m,m-np.sqrt(v),m+np.sqrt(v))
+        pb.plot(self.X,self.likelihood.Y,'kx',mew=1.5)
+        pb.xlim(xmin,xmax)

+        if isinstance(self.likelihood,EP):
+            pb.subplot(212)
+            phi_m,phi_l,phi_u = self.likelihood.predictive_values(m,v)
+            gpplot(Xnew,phi_m,phi_l,phi_u)
+            pb.plot(self.X,self.likelihood.data,'kx',mew=1.5)
+            pb.xlim(xmin,xmax)
--- a/GPy/util/plot.py
+++ b/GPy/util/plot.py
@ -11,6 +11,8 @@ def gpplot(x,mu,lower,upper,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.c
        axes = pb.gca()
    mu = mu.flatten()
    x = x.flatten()
+    lower = lower.flatten()
+    upper = upper.flatten()

    #here's the mean
    axes.plot(x,mu,color=edgecol,linewidth=2)