From 02dc5c7b482c23788ca70373c9a31d06e7a3c2bb Mon Sep 17 00:00:00 2001 From: Ricardo Andrade Date: Thu, 7 Feb 2013 11:35:29 +0000 Subject: [PATCH 1/3] Example is working --- GPy/examples/poisson.py | 63 ++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/GPy/examples/poisson.py b/GPy/examples/poisson.py index 934637f1..ce68e921 100644 --- a/GPy/examples/poisson.py +++ b/GPy/examples/poisson.py @@ -3,46 +3,45 @@ """ -Simple Gaussian Processes classification +Gaussian Processes + Expectation Propagation - Poisson Likelihood """ import pylab as pb import numpy as np import GPy -pb.ion() -pb.close('all') default_seed=10000 -model_type='Full' -inducing=4 -seed=default_seed -"""Simple 1D classification example. -:param model_type: type of model to fit ['Full', 'FITC', 'DTC']. -:param seed : seed value for data generation (default is 4). -:type seed: int -:param inducing : number of inducing variables (only used for 'FITC' or 'DTC'). -:type inducing: int -""" +def toy_1d(seed=default_seed): + """ + Simple 1D classification example + :param seed : seed value for data generation (default is 4). + :type seed: int + """ -X = np.arange(0,100,5)[:,None] -F = np.round(np.sin(X/18.) + .1*X) + np.arange(5,25)[:,None] -E = np.random.randint(-5,5,20)[:,None] -Y = F + E -pb.figure() -likelihood = GPy.inference.likelihoods.poisson(Y,scale=1.) + X = np.arange(0,100,5)[:,None] + F = np.round(np.sin(X/18.) + .1*X) + np.arange(5,25)[:,None] + E = np.random.randint(-5,5,20)[:,None] + Y = F + E -m = GPy.models.GP(X,likelihood=likelihood) -#m = GPy.models.GP(X,Y=likelihood.Y) + kernel = GPy.kern.rbf(1) + distribution = GPy.likelihoods.likelihood_functions.Poisson() + likelihood = GPy.likelihoods.EP(Y,distribution) -m.constrain_positive('var') -m.constrain_positive('len') -m.tie_param('lengthscale') -if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian): - m.approximate_likelihood() -print m.checkgrad() -# Optimize and plot -m.optimize() -#m.em(plot_all=False) # EM algorithm -m.plot(samples=4) + m = GPy.models.GP(X,likelihood,kernel) + m.ensure_default_constraints() -print(m) + # Approximate likelihood + m.update_likelihood_approximation() + + # Optimize and plot + m.optimize() + #m.EPEM FIXME + print m + + # Plot + pb.subplot(211) + m.plot_f() #GP plot + pb.subplot(212) + m.plot() #Output plot + + return m From cf3e5220697230b780a2a1cdcda9b81aaa002823 Mon Sep 17 00:00:00 2001 From: Ricardo Andrade Date: Thu, 7 Feb 2013 11:36:22 +0000 Subject: [PATCH 2/3] Change in plot() y-limits --- GPy/models/GP.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/models/GP.py b/GPy/models/GP.py index f8bd27bf..403f1597 100644 --- a/GPy/models/GP.py +++ b/GPy/models/GP.py @@ -243,7 +243,7 @@ class GP(model): m, var, lower, upper = self.predict(Xnew, slices=which_functions) gpplot(Xnew,m, lower, upper) pb.plot(self.X[which_data],self.likelihood.data[which_data],'kx',mew=1.5) - ymin,ymax = lower.min(),upper.max() + ymin,ymax = self.likelihood.data.min() -.1*(upper.max()-lower.min()), self.likelihood.data.max()+.1*(upper.max()-lower.min()) pb.xlim(xmin,xmax) pb.ylim(ymin,ymax) From 4563a5f8a68345170e33e7b08b762351106a314c Mon Sep 17 00:00:00 2001 From: Ricardo Andrade Date: Thu, 7 Feb 2013 11:36:45 +0000 Subject: [PATCH 3/3] Probit likelihood modified for plotting. --- GPy/examples/classification.py | 23 ++++++++++++++------ GPy/likelihoods/likelihood_functions.py | 29 +++++++++++++------------ 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py index c25ea124..592299d8 100644 --- a/GPy/examples/classification.py +++ b/GPy/examples/classification.py @@ -20,11 +20,19 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME :param inducing : number of inducing variables (only used for 'FITC' or 'DTC'). :type inducing: int """ + data = GPy.util.datasets.crescent_data(seed=seed) - likelihood = GPy.inference.likelihoods.probit(data['Y']) + + # Kernel object + kernel = GPy.kern.rbf(data['X'].shape[1]) + + # Likelihood object + distribution = GPy.likelihoods.likelihood_functions.probit() + likelihood = GPy.likelihoods.EP(data['Y'],distribution) + if model_type=='Full': - m = GPy.models.GP_EP(data['X'],likelihood) + m = GPy.models.GP(data['X'],likelihood,kernel) else: # create sparse GP EP model m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type) @@ -33,7 +41,7 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME print(m) # optimize - m.em() + m.optimize() print(m) # plot @@ -53,7 +61,7 @@ def oil(): likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1],distribution) # Create GP model - m = GPy.models.GP(data['X'],kernel,likelihood=likelihood) + m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel) # Contrain all parameters to be positive m.constrain_positive('') @@ -71,17 +79,18 @@ def toy_linear_1d_classification(seed=default_seed): Simple 1D classification example :param seed : seed value for data generation (default is 4). :type seed: int - :type inducing: int """ data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) + Y = data['Y'][:, 0:1] + Y[Y == -1] = 0 # Kernel object kernel = GPy.kern.rbf(1) # Likelihood object distribution = GPy.likelihoods.likelihood_functions.probit() - likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1],distribution) + likelihood = GPy.likelihoods.EP(Y,distribution) # Model definition m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel) @@ -98,7 +107,7 @@ def toy_linear_1d_classification(seed=default_seed): # Plot pb.subplot(211) - m.plot_internal() + m.plot_f() pb.subplot(212) m.plot() print(m) diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index de97824a..23881899 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -38,6 +38,7 @@ class probit(likelihood_function): :param v_i: mean/variance of the cavity distribution (float) """ # TODO: some version of assert np.sum(np.abs(Y)-1) == 0, "Output values must be either -1 or 1" + if data_i == 0: data_i = -1 #NOTE Binary classification works better classes {-1,1}, 1D-plotting works better with classes {0,1}. z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) Z_hat = stats.norm.cdf(z) phi = stats.norm.pdf(z) @@ -52,9 +53,9 @@ class probit(likelihood_function): mu = mu.flatten() var = var.flatten() mean = stats.norm.cdf(mu/np.sqrt(1+var)) - p_05 = np.zeros(mu.shape)#np.zeros([mu.size]) - p_95 = np.zeros(mu.shape)#np.ones([mu.size]) - return mean, p_05, p_95 + p_025 = np.zeros(mu.shape) + p_975 = np.ones(mu.shape) + return mean, p_025, p_975 class Poisson(likelihood_function): """ @@ -65,7 +66,7 @@ class Poisson(likelihood_function): L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! $$ """ - def moments_match(self,i,tau_i,v_i): + def moments_match(self,data_i,tau_i,v_i): """ Moments match of the marginal approximation in EP algorithm @@ -81,14 +82,14 @@ class Poisson(likelihood_function): """ pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma) rate = np.exp( (f*self.scale)+self.location) - poisson = stats.poisson.pmf(float(self.Y[i]),rate) + poisson = stats.poisson.pmf(float(data_i),rate) return pdf_norm_f*poisson def log_pnm(f): """ Log of poisson_norm """ - return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*self.Y[i]) + return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*data_i) """ Golden Search and Simpson's Rule @@ -99,17 +100,17 @@ class Poisson(likelihood_function): #TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case. #Golden search - golden_A = -1 if self.Y[i] == 0 else np.array([np.log(self.Y[i]),mu]).min() #Lower limit - golden_B = np.array([np.log(self.Y[i]),mu]).max() #Upper limit + golden_A = -1 if data_i == 0 else np.array([np.log(data_i),mu]).min() #Lower limit + golden_B = np.array([np.log(data_i),mu]).max() #Upper limit golden_A = (golden_A - self.location)/self.scale golden_B = (golden_B - self.location)/self.scale opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm # Simpson's approximation - width = 3./np.log(max(self.Y[i],2)) + width = 3./np.log(max(data_i,2)) A = opt - width #Lower limit B = opt + width #Upper limit - K = 10*int(np.log(max(self.Y[i],150))) #Number of points in the grid, we DON'T want K to be the same number for every case + K = 10*int(np.log(max(data_i,150))) #Number of points in the grid, we DON'T want K to be the same number for every case h = (B-A)/K # length of the intervals grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis) x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier @@ -127,7 +128,7 @@ class Poisson(likelihood_function): Compute mean, and conficence interval (percentiles 5 and 95) of the prediction """ mean = np.exp(mu*self.scale + self.location) - tmp = stats.poisson.ppf(np.array([.05,.95]),mu) - p_05 = tmp[:,0] - p_95 = tmp[:,1] - return mean,p_05,p_95 + tmp = stats.poisson.ppf(np.array([.025,.975]),mean) + p_025 = tmp[:,0] + p_975 = tmp[:,1] + return mean,p_025,p_975