Merge branch 'newGP' of github.com:SheffieldML/GPy into newGP

2026-04-28 14:26:23 +02:00 · 2013-02-01 17:13:10 +00:00 · 2013-02-01 17:13:10 +00:00 · f7d2fc6ca4
commit f7d2fc6ca4
parent 5447d6fbfc 24b6dfa086
7 changed files with 147 additions and 184 deletions
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -3,16 +3,15 @@


 """
-Simple Gaussian Processes classification
+Gaussian Processes classification
 """
 import pylab as pb
 import numpy as np
 import GPy

 default_seed=10000
-######################################
-## 2 dimensional example
-def crescent_data(model_type='Full', inducing=10, seed=default_seed):
+
+def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME
    """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.

    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
@ -30,7 +29,7 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
        # create sparse GP EP model
        m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)

-    m.approximate_likelihood()
+    m.update_likelihood_approximation()
    print(m)

    # optimize
@ -42,53 +41,66 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
    return m

 def oil():
-    """Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood."""
+    """
+    Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
+    """
    data = GPy.util.datasets.oil()
-    likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
+    # Kernel object
+    kernel = GPy.kern.rbf(12)

-    # create simple GP model
-    m = GPy.models.GP_EP(data['X'],likelihood)
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1],distribution)

-    # contrain all parameters to be positive
+    # Create GP model
+    m = GPy.models.GP(data['X'],kernel,likelihood=likelihood)
+
+    # Contrain all parameters to be positive
    m.constrain_positive('')
    m.tie_param('lengthscale')
-    m.approximate_likelihood()
+    m.update_likelihood_approximation()

-    # optimize
+    # Optimize
    m.optimize()

-    # plot
-    #m.plot()
    print(m)
    return m

-def toy_linear_1d_classification(model_type='Full', inducing=4, seed=default_seed):
-    """Simple 1D classification example.
-    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
+def toy_linear_1d_classification(seed=default_seed):
+    """
+    Simple 1D classification example
    :param seed : seed value for data generation (default is 4).
    :type seed: int
-    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
    :type inducing: int
    """
+
    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
-    likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
-    assert model_type in ('Full','DTC','FITC')

-    # create simple GP model
-    if model_type=='Full':
-        m = GPy.models.GP_EP(data['X'],likelihood)
-    else:
-        # create sparse GP EP model
-        m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
+    # Kernel object
+    kernel = GPy.kern.rbf(1)

-    m.constrain_positive('var')
-    m.constrain_positive('len')
-    m.tie_param('lengthscale')
-    m.approximate_likelihood()
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1],distribution)

-    # Optimize and plot
-    m.em(plot_all=False) # EM algorithm
-    m.plot()
+    # Model definition
+    m = GPy.models.GP(data['X'],kernel,likelihood=likelihood)

+    # Optimize
+    """
+    EPEM runs a loop that consists of two steps:
+    1) EP likelihood approximation:
+        m.update_likelihood_approximation()
+    2) Parameters optimization:
+        m.optimize()
+    """
+    m.EPEM()
+
+    # Plot
+    pb.subplot(211)
+    m.plot_GP()
+    pb.subplot(212)
+    m.plot_output()
    print(m)
+
    return m
--- a/GPy/examples/ep_fix.py
+++ b/GPy/examples/ep_fix.py
@ -1,43 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-"""
-Simple Gaussian Processes classification 1D
-probit likelihood
-"""
-import pylab as pb
-import numpy as np
-import GPy
-pb.ion()
-
-pb.close('all')
-
-# Inputs
-N = 30
-X1 = np.random.normal(5,2,N/2)
-X2 = np.random.normal(10,2,N/2)
-X = np.hstack([X1,X2])[:,None]
-
-# Outputs
-Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
-
-# Kernel object
-kernel = GPy.kern.rbf(1)
-
-# Define likelihood
-distribution = GPy.likelihoods.likelihood_functions.probit()
-likelihood_object = GPy.likelihoods.EP(Y,distribution)
-
-# Model definition
-m = GPy.models.GP(X,kernel,likelihood=likelihood_object)
-m.ensure_default_constraints()
-m.update_likelihood_approximation()
-#m.checkgrad(verbose=1)
-m.optimize()
-print "Round 2"
-#rm.update_likelihood_approximation()
-
-#m.EPEM()
-m.plot()
-#print(m)
--- a/GPy/likelihoods/EP.py
+++ b/GPy/likelihoods/EP.py
@ -67,8 +67,8 @@ class EP(likelihood):
        self.tau_tilde = np.zeros(self.N)
        self.v_tilde = np.zeros(self.N)
        #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
-        self.mu = np.zeros(self.N)
-        self.Sigma = K.copy()
+        mu = np.zeros(self.N)
+        Sigma = K.copy()

        """
        Initial values - Cavity distribution parameters:
@ -96,29 +96,27 @@ class EP(likelihood):
            update_order = np.random.permutation(self.N)
            for i in update_order:
                #Cavity distribution parameters
-                self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i]
-                self.v_[i] = self.mu[i]/self.Sigma[i,i] - self.eta*self.v_tilde[i]
-                print 1./self.Sigma[i,i],self.tau_tilde[i]
+                self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
+                self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
                #Marginal moments
                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
                #Site parameters update
-                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma[i,i])
-                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma[i,i])
+                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
+                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
                self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
                self.v_tilde[i] = self.v_tilde[i] + Delta_v
                #Posterior distribution parameters update
-                si=self.Sigma[:,i].reshape(self.N,1)
-                self.Sigma = self.Sigma - Delta_tau/(1.+ Delta_tau*self.Sigma[i,i])*np.dot(si,si.T)
-                self.mu = np.dot(self.Sigma,self.v_tilde)
+                si=Sigma[:,i].reshape(self.N,1)
+                Sigma = Sigma - Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)
+                mu = np.dot(Sigma,self.v_tilde)
                self.iterations += 1
-                print self.tau_tilde[i]
            #Sigma recomptutation with Cholesky decompositon
            Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
            B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
            L = jitchol(B)
            V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
-            self.Sigma = K - np.dot(V.T,V)
-            self.mu = np.dot(self.Sigma,self.v_tilde)
+            Sigma = K - np.dot(V.T,V)
+            mu = np.dot(Sigma,self.v_tilde)
            epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
            epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
            self.np1.append(self.tau_tilde.copy())
@ -141,7 +139,7 @@ class EP(likelihood):
        """
        Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
        KmnKnm = np.dot(Kmn, Kmn.T)
-        KmmiKmn = np.dot(Kmmi,self.Kmn)
+        KmmiKmn = np.dot(Kmmi,Kmn)
        Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
        LLT0 = Kmm.copy()

@ -223,13 +221,13 @@ class EP(likelihood):
        q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
        Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
        """
-        self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
-        self.P0 = self.Kmn.T
-        self.KmnKnm = np.dot(self.P0.T, self.P0)
-        self.KmmiKmn = np.dot(self.Kmmi,self.P0.T)
-        self.Qnn_diag = np.sum(self.P0.T*self.KmmiKmn,-2)
-        self.Diag0 = self.Knn_diag - self.Qnn_diag
-        self.R0 = jitchol(self.Kmmi).T
+        Kmmi, self.Lm, self.Lmi, Kmm_logdet = pdinv(Kmm)
+        P0 = Kmn.T
+        KmnKnm = np.dot(P0.T, P0)
+        KmmiKmn = np.dot(Kmmi,P0.T)
+        Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
+        Diag0 = Knn_diag - Qnn_diag
+        R0 = jitchol(Kmmi).T

        """
        Posterior approximation: q(f|y) = N(f| mu, Sigma)
@ -238,11 +236,11 @@ class EP(likelihood):
        """
        self.w = np.zeros(self.N)
        self.gamma = np.zeros(self.M)
-        self.mu = np.zeros(self.N)
-        self.P = self.P0.copy()
-        self.R = self.R0.copy()
-        self.Diag = self.Diag0.copy()
-        self.Sigma_diag = self.Knn_diag
+        mu = np.zeros(self.N)
+        P = P0.copy()
+        R = R0.copy()
+        Diag = Diag0.copy()
+        Sigma_diag = Knn_diag

        """
        Initial values - Cavity distribution parameters:
@ -270,41 +268,41 @@ class EP(likelihood):
            update_order = np.random.permutation(self.N)
            for i in update_order:
                #Cavity distribution parameters
-                self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i]
-                self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i]
+                self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
+                self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
                #Marginal moments
                self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i])
                #Site parameters update
-                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i])
-                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i])
+                Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
+                Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
                self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
                self.v_tilde[i] = self.v_tilde[i] + Delta_v
                #Posterior distribution parameters update
-                dtd1 = Delta_tau*self.Diag[i] + 1.
-                dii = self.Diag[i]
-                self.Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
-                pi_ = self.P[i,:].reshape(1,self.M)
-                self.P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
-                Rp_i = np.dot(self.R,pi_.T)
-                RTR = np.dot(self.R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*self.Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),self.R))
-                self.R = jitchol(RTR).T
+                dtd1 = Delta_tau*Diag[i] + 1.
+                dii = Diag[i]
+                Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
+                pi_ = P[i,:].reshape(1,self.M)
+                P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
+                Rp_i = np.dot(R,pi_.T)
+                RTR = np.dot(R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
+                R = jitchol(RTR).T
                self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
-                self.gamma = self.gamma + (Delta_v - Delta_tau*self.mu[i])*np.dot(RTR,self.P[i,:].T)
-                self.RPT = np.dot(self.R,self.P.T)
-                self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
-                self.mu = self.w + np.dot(self.P,self.gamma)
+                self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
+                RPT = np.dot(R,P.T)
+                Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
+                mu = self.w + np.dot(P,self.gamma)
                self.iterations += 1
            #Sigma recomptutation with Cholesky decompositon
-            self.Diag = self.Diag0/(1.+ self.Diag0 * self.tau_tilde)
-            self.P = (self.Diag / self.Diag0)[:,None] * self.P0
-            self.RPT0 = np.dot(self.R0,self.P0.T)
-            L = jitchol(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
-            self.R,info = linalg.flapack.dtrtrs(L,self.R0,lower=1)
-            self.RPT = np.dot(self.R,self.P.T)
-            self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
-            self.w = self.Diag * self.v_tilde
-            self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.v_tilde))
-            self.mu = self.w + np.dot(self.P,self.gamma)
+            Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
+            P = (Diag / Diag0)[:,None] * P0
+            RPT0 = np.dot(R0,P0.T)
+            L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
+            R,info = linalg.flapack.dtrtrs(L,R0,lower=1)
+            RPT = np.dot(R,P.T)
+            Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
+            self.w = Diag * self.v_tilde
+            self.gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
+            mu = self.w + np.dot(P,self.gamma)
            epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
            epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
            self.np1.append(self.tau_tilde.copy())
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@ -7,7 +7,6 @@ from scipy import stats
 import scipy as sp
 import pylab as pb
 from ..util.plot import gpplot
-#from . import EP

 class likelihood_function:
    """
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@ -7,7 +7,7 @@ import pylab as pb
 from .. import kern
 from ..core import model
 from ..util.linalg import pdinv,mdot
-from ..util.plot import gpplot, Tango
+from ..util.plot import gpplot,x_frame, Tango
 from ..likelihoods import EP

 class GP(model):
@ -175,37 +175,7 @@ class GP(model):
        return mean, _5pc, _95pc


-    def _x_frame(self,plot_limits=None,which_data='all',which_functions='all',resolution=None):
-        """
-        Internal helper function for making plots, return a set of new input values to plot as well as lower and upper limits
-        """
-        if which_functions=='all':
-            which_functions = [True]*self.kern.Nparts
-        if which_data=='all':
-            which_data = slice(None)
-
-        X = self.X[which_data,:]
-        Y = self.likelihood.Y[which_data,:]
-
-        if plot_limits is None:
-            xmin,xmax = X.min(0),X.max(0)
-            xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
-        elif len(plot_limits)==2:
-            xmin, xmax = plot_limits
-        else:
-            raise ValueError, "Bad limits for plotting"
-
-        if self.X.shape[1]==1:
-            Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
-        elif self.X.shape[1]==2:
-            resolution = resolution or 50
-            xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
-            Xnew = np.vstack((xx.flatten(),yy.flatten())).T
-        else:
-            raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
-        return Xnew, xmin, xmax
-
-    def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
+    def plot_GP(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
        """
        Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian

@ -223,21 +193,29 @@ class GP(model):
          - In higher dimensions, we've no implemented this yet !TODO!

        Can plot only part of the data and part of the posterior functions using which_data and which_functions
-        """
-        """
        Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
        """
-        Xnew, xmin, xmax = self._x_frame()
-        m,v = self._raw_predict(Xnew)
-        if isinstance(self.likelihood,EP):
-            pb.subplot(211)
+        if which_functions=='all':
+            which_functions = [True]*self.kern.Nparts
+        if which_data=='all':
+            which_data = slice(None)
+
+        Xnew, xmin, xmax = x_frame(self.X, plot_limits=plot_limits)
+
+        m,v = self._raw_predict(Xnew, slices=which_functions)
        gpplot(Xnew,m,m-np.sqrt(v),m+np.sqrt(v))
-        pb.plot(self.X,self.likelihood.Y,'kx',mew=1.5)
+        pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
        pb.xlim(xmin,xmax)

-        if isinstance(self.likelihood,EP):
-            pb.subplot(212)
-            phi_m,phi_l,phi_u = self.likelihood.predictive_values(m,v)
-            gpplot(Xnew,phi_m,phi_l,phi_u)
-            pb.plot(self.X,self.likelihood.data,'kx',mew=1.5)
-            pb.xlim(xmin,xmax)
+    def plot_output(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
+        if which_functions=='all':
+            which_functions = [True]*self.kern.Nparts
+        if which_data=='all':
+            which_data = slice(None)
+        Xnew, xmin, xmax = x_frame(self.X, plot_limits=plot_limits)
+        m, lower, upper = self.predict(Xnew, slices=which_functions)
+        gpplot(Xnew,m, lower, upper)
+        pb.plot(self.X[which_data],self.likelihood.data[which_data],'kx',mew=1.5)
+        ymin,ymax = self.likelihood.data.min()*1.2,self.likelihood.data.max()*1.2
+        pb.xlim(xmin,xmax)
+        pb.ylim(ymin,ymax)
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@ -154,17 +154,16 @@ class GradientTests(unittest.TestCase):
        m.constrain_positive('(linear|bias|white)')
        self.assertTrue(m.checkgrad())

-    def test_GP_EP(self):
-        return # Disabled TODO
+    def test_GP_EP_probit(self):
        N = 20
-        X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None]
-        k = GPy.kern.rbf(1) + GPy.kern.white(1)
-        Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None]
-        likelihood = GPy.inference.likelihoods.probit(Y)
-        m = GPy.models.GP_EP(X,likelihood,k)
-        m.constrain_positive('(var|len)')
-        m.approximate_likelihood()
-        self.assertTrue(m.checkgrad())
+        X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
+        Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
+        kernel = GPy.kern.rbf(1)
+        distribution = GPy.likelihoods.likelihood_functions.probit()
+        likelihood = GPy.likelihoods.EP(Y,distribution)
+        m = GPy.models.GP(X,kernel,likelihood=likelihood)
+        m.ensure_default_constraints()
+        self.assertTrue(m.EPEM)

    @unittest.skip("FITC will be broken for a while")
    def test_generalized_FITC(self):
--- a/GPy/util/plot.py
+++ b/GPy/util/plot.py
@ -70,4 +70,24 @@ def align_subplots(N,M,xlim=None, ylim=None):
        else:
            removeUpperTicks()

+def x_frame(X,plot_limits=None,resolution=None):
+    """
+    Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
+    """
+    if plot_limits is None:
+        xmin,xmax = X.min(0),X.max(0)
+        xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
+    elif len(plot_limits)==2:
+        xmin, xmax = plot_limits
+    else:
+        raise ValueError, "Bad limits for plotting"

+    if X.shape[1]==1:
+        Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
+    elif X.shape[1]==2:
+        resolution = resolution or 50
+        xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
+        Xnew = np.vstack((xx.flatten(),yy.flatten())).T
+    else:
+        raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+    return Xnew, xmin, xmax