diff --git a/.travis.yml b/.travis.yml index af20bec4..e7944d8a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,20 @@ language: python python: - "2.7" + +#Set virtual env with system-site-packages to true +virtualenv: + system_site_packages: true + # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors -install: - - sudo apt-get install python-scipy - - pip install sphinx +before_install: + - sudo apt-get install -qq python-scipy python-pip + - sudo apt-get install -qq python-matplotlib + +install: + - pip install sphinx + - pip install nose - pip install . --use-mirrors # command to run tests, e.g. python setup.py test script: - - nosetests --with-xcoverage --with-xunit --cover-package=GPy --cover-erase GPy/testing + - nosetests GPy/testing \ No newline at end of file diff --git a/GPy/__init__.py b/GPy/__init__.py index 381d6232..c0772c27 100644 --- a/GPy/__init__.py +++ b/GPy/__init__.py @@ -7,5 +7,5 @@ import models import inference import util import examples -#import examples TODO: discuss! from core import priors +import likelihoods diff --git a/GPy/core/model.py b/GPy/core/model.py index 145a607f..a0628c42 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -10,6 +10,7 @@ from parameterised import parameterised, truncate_pad import priors from ..util.linalg import jitchol from ..inference import optimization +from .. import likelihoods class model(parameterised): def __init__(self): @@ -82,7 +83,7 @@ class model(parameterised): def get(self,name, return_names=False): """ - Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. + Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. """ matches = self.grep_param_names(name) if len(matches): @@ -107,7 +108,7 @@ class model(parameterised): def get_gradient(self,name, return_names=False): """ - Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. + Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. """ matches = self.grep_param_names(name) if len(matches): @@ -303,54 +304,62 @@ class model(parameterised): return '\n'.join(s) - def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3, return_ratio=False, *args): + def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3): """ Check the gradient of the model by comparing to a numerical estimate. - If the overall gradient fails, invividual components are tested. + If the verbose flag is passed, invividual components are tested (and printed) + + :param verbose: If True, print a "full" checking of each parameter + :type verbose: bool + :param step: The size of the step around which to linearise the objective + :type step: float (defaul 1e-6) + :param tolerance: the tolerance allowed (see note) + :type tolerance: float (default 1e-3) + + Note:- + The gradient is considered correct if the ratio of the analytical + and numerical gradients is within of unity. """ x = self._get_params_transformed().copy() - #choose a random direction to step in: - dx = step*np.sign(np.random.uniform(-1,1,x.size)) + if not verbose: + #just check the global ratio + dx = step*np.sign(np.random.uniform(-1,1,x.size)) - #evaulate around the point x - self._set_params_transformed(x+dx) - f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed() - self._set_params_transformed(x-dx) - f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed() - self._set_params_transformed(x) - gradient = self._log_likelihood_gradients_transformed() + #evaulate around the point x + self._set_params_transformed(x+dx) + f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed() + self._set_params_transformed(x-dx) + f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed() + self._set_params_transformed(x) + gradient = self._log_likelihood_gradients_transformed() - numerical_gradient = (f1-f2)/(2*dx) - global_ratio = (f1-f2)/(2*np.dot(dx,gradient)) - if verbose: - print "Gradient ratio = ", global_ratio, '\n' - sys.stdout.flush() + numerical_gradient = (f1-f2)/(2*dx) + global_ratio = (f1-f2)/(2*np.dot(dx,gradient)) - if (np.abs(1.-global_ratio) epsilon or not iteration: + print 'EM iteration %s' %iteration + self.update_likelihood_approximation() + self.optimize(**kwargs) + new_value = self.log_likelihood() + log_change = new_value - last_value + if log_change > epsilon: + self.log_likelihood_record.append(new_value) + self.gp_params_record.append(self._get_params()) + #self.ep_params_record.append((self.beta,self.Y,self.Z_ep)) + last_value = new_value else: - return False - - if return_ratio: - return global_ratio - else: - return True + convergence = False + #self.beta, self.Y, self.Z_ep = self.ep_params_record[-1] + self._set_params(self.gp_params_record[-1]) + print "Log-likelihood decrement: %s \nLast iteration discarded." %log_change + iteration += 1 diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py index 6e5493ad..ab656f52 100644 --- a/GPy/core/parameterised.py +++ b/GPy/core/parameterised.py @@ -102,6 +102,11 @@ class parameterised(object): else: return expr + def Nparam_transformed(self): + ties = 0 + for ar in self.tied_indices: + ties += ar.size - 1 + return self.Nparam - len(self.constrained_fixed_indices) - ties def constrain_positive(self, which): """ @@ -149,8 +154,6 @@ class parameterised(object): - - def constrain_negative(self,which): """ Set negative constraints. diff --git a/GPy/examples/BGPLVM_demo.py b/GPy/examples/BGPLVM_demo.py index 583a66ad..0fd58571 100644 --- a/GPy/examples/BGPLVM_demo.py +++ b/GPy/examples/BGPLVM_demo.py @@ -16,8 +16,13 @@ k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001) K = k.K(X) Y = np.random.multivariate_normal(np.zeros(N),K,D).T +<<<<<<< HEAD k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q) # k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q, 0.00001) +======= +# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) +k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001) +>>>>>>> master m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M) m.constrain_positive('(rbf|bias|noise|white|S)') # m.constrain_fixed('S', 1) diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py index 989ed08a..592299d8 100644 --- a/GPy/examples/classification.py +++ b/GPy/examples/classification.py @@ -3,16 +3,15 @@ """ -Simple Gaussian Processes classification +Gaussian Processes classification """ import pylab as pb import numpy as np import GPy default_seed=10000 -###################################### -## 2 dimensional example -def crescent_data(model_type='Full', inducing=10, seed=default_seed): + +def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. :param model_type: type of model to fit ['Full', 'FITC', 'DTC']. @@ -21,20 +20,28 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed): :param inducing : number of inducing variables (only used for 'FITC' or 'DTC'). :type inducing: int """ + data = GPy.util.datasets.crescent_data(seed=seed) - likelihood = GPy.inference.likelihoods.probit(data['Y']) + + # Kernel object + kernel = GPy.kern.rbf(data['X'].shape[1]) + + # Likelihood object + distribution = GPy.likelihoods.likelihood_functions.probit() + likelihood = GPy.likelihoods.EP(data['Y'],distribution) + if model_type=='Full': - m = GPy.models.GP_EP(data['X'],likelihood) + m = GPy.models.GP(data['X'],likelihood,kernel) else: # create sparse GP EP model m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type) - m.approximate_likelihood() + m.update_likelihood_approximation() print(m) # optimize - m.em() + m.optimize() print(m) # plot @@ -42,54 +49,67 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed): return m def oil(): - """Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.""" + """ + Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. + """ data = GPy.util.datasets.oil() - likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1]) + # Kernel object + kernel = GPy.kern.rbf(12) - # create simple GP model - m = GPy.models.GP_EP(data['X'],likelihood) + # Likelihood object + distribution = GPy.likelihoods.likelihood_functions.probit() + likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1],distribution) - # contrain all parameters to be positive + # Create GP model + m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel) + + # Contrain all parameters to be positive m.constrain_positive('') m.tie_param('lengthscale') - m.approximate_likelihood() + m.update_likelihood_approximation() - # optimize + # Optimize m.optimize() - # plot - #m.plot() print(m) return m -def toy_linear_1d_classification(model_type='Full', inducing=4, seed=default_seed): - """Simple 1D classification example. - :param model_type: type of model to fit ['Full', 'FITC', 'DTC']. +def toy_linear_1d_classification(seed=default_seed): + """ + Simple 1D classification example :param seed : seed value for data generation (default is 4). :type seed: int - :param inducing : number of inducing variables (only used for 'FITC' or 'DTC'). - :type inducing: int """ + data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) - likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1]) - assert model_type in ('Full','DTC','FITC') + Y = data['Y'][:, 0:1] + Y[Y == -1] = 0 - # create simple GP model - if model_type=='Full': - m = GPy.models.simple_GP_EP(data['X'],likelihood) - else: - # create sparse GP EP model - m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type) - + # Kernel object + kernel = GPy.kern.rbf(1) - m.constrain_positive('var') - m.constrain_positive('len') - m.tie_param('lengthscale') - m.approximate_likelihood() + # Likelihood object + distribution = GPy.likelihoods.likelihood_functions.probit() + likelihood = GPy.likelihoods.EP(Y,distribution) - # Optimize and plot - m.em(plot_all=False) # EM algorithm + # Model definition + m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel) + + # Optimize + """ + EPEM runs a loop that consists of two steps: + 1) EP likelihood approximation: + m.update_likelihood_approximation() + 2) Parameters optimization: + m.optimize() + """ + m.EPEM() + + # Plot + pb.subplot(211) + m.plot_f() + pb.subplot(212) m.plot() - print(m) + return m diff --git a/GPy/examples/poisson.py b/GPy/examples/poisson.py new file mode 100644 index 00000000..ce68e921 --- /dev/null +++ b/GPy/examples/poisson.py @@ -0,0 +1,47 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +""" +Gaussian Processes + Expectation Propagation - Poisson Likelihood +""" +import pylab as pb +import numpy as np +import GPy + +default_seed=10000 + +def toy_1d(seed=default_seed): + """ + Simple 1D classification example + :param seed : seed value for data generation (default is 4). + :type seed: int + """ + + X = np.arange(0,100,5)[:,None] + F = np.round(np.sin(X/18.) + .1*X) + np.arange(5,25)[:,None] + E = np.random.randint(-5,5,20)[:,None] + Y = F + E + + kernel = GPy.kern.rbf(1) + distribution = GPy.likelihoods.likelihood_functions.Poisson() + likelihood = GPy.likelihoods.EP(Y,distribution) + + m = GPy.models.GP(X,likelihood,kernel) + m.ensure_default_constraints() + + # Approximate likelihood + m.update_likelihood_approximation() + + # Optimize and plot + m.optimize() + #m.EPEM FIXME + print m + + # Plot + pb.subplot(211) + m.plot_f() #GP plot + pb.subplot(212) + m.plot() #Output plot + + return m diff --git a/GPy/examples/sparse_ep_fix.py b/GPy/examples/sparse_ep_fix.py new file mode 100644 index 00000000..defcb4eb --- /dev/null +++ b/GPy/examples/sparse_ep_fix.py @@ -0,0 +1,60 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +""" +Sparse Gaussian Processes regression with an RBF kernel +""" +import pylab as pb +import numpy as np +import GPy +np.random.seed(2) +pb.ion() +N = 500 +M = 5 + +pb.close('all') +###################################### +## 1 dimensional example + +# sample inputs and outputs +X = np.random.uniform(-3.,3.,(N,1)) +#Y = np.sin(X)+np.random.randn(N,1)*0.05 +F = np.sin(X)+np.random.randn(N,1)*0.05 +Y = np.ones([F.shape[0],1]) +Y[F<0] = -1 +likelihood = GPy.inference.likelihoods.probit(Y) + +# construct kernel +rbf = GPy.kern.rbf(1) +noise = GPy.kern.white(1) +kernel = rbf + noise + +# create simple GP model +#m = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood) + +# contrain all parameters to be positive +#m.constrain_fixed('prec',100.) +m = GPy.models.sparse_GP(X, Y, kernel, M=M) +m.ensure_default_constraints() +#if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian): +# m.approximate_likelihood() +print m.checkgrad() +m.optimize('tnc', messages = 1) +m.plot(samples=3) +print m + +n = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood) +n.ensure_default_constraints() +if not isinstance(n.likelihood,GPy.inference.likelihoods.gaussian): + n.approximate_likelihood() +print n.checkgrad() +pb.figure() +n.plot() + +""" +m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M) +m.ensure_default_constraints() +print m.checkgrad() +""" diff --git a/GPy/inference/Expectation_Propagation.py b/GPy/inference/Expectation_Propagation.py deleted file mode 100644 index 05453f1d..00000000 --- a/GPy/inference/Expectation_Propagation.py +++ /dev/null @@ -1,240 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -import random -from scipy import stats, linalg -from .likelihoods import likelihood -from ..core import model -from ..util.linalg import pdinv,mdot,jitchol -from ..util.plot import gpplot -from .. import kern - -class EP_base: - """ - Expectation Propagation. - - This is just the base class for expectation propagation. We'll extend it for full and sparse EP. - """ - def __init__(self,likelihood,epsilon=1e-3,powerep=[1.,1.]): - self.likelihood = likelihood - self.epsilon = epsilon - self.eta, self.delta = powerep - self.jitter = 1e-12 - - #Initial values - Likelihood approximation parameters: - #p(y|f) = t(f|tau_tilde,v_tilde) - self.restart_EP() - - def restart_EP(self): - """ - Set the EP approximation to initial state - """ - self.tau_tilde = np.zeros(self.N) - self.v_tilde = np.zeros(self.N) - self.mu = np.zeros(self.N) - -class Full(EP_base): - """ - :param likelihood: Output's likelihood (e.g. probit) - :type likelihood: GPy.inference.likelihood instance - :param K: prior covariance matrix - :type K: np.ndarray (N x N) - :param likelihood: Output's likelihood (e.g. probit) - :type likelihood: GPy.inference.likelihood instance - :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) - :param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats) - """ - def __init__(self,K,likelihood,*args,**kwargs): - assert K.shape[0] == K.shape[1] - self.K = K - self.N = self.K.shape[0] - EP_base.__init__(self,likelihood,*args,**kwargs) - def fit_EP(self,messages=False): - """ - The expectation-propagation algorithm. - For nomenclature see Rasmussen & Williams 2006 (pag. 52-60) - """ - #Prior distribution parameters: p(f|X) = N(f|0,K) - #self.K = self.kernel.K(self.X,self.X) - - #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) - self.mu=np.zeros(self.N) - self.Sigma=self.K.copy() - - """ - Initial values - Cavity distribution parameters: - q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)} - sigma_ = 1./tau_ - mu_ = v_/tau_ - """ - - self.tau_ = np.empty(self.N,dtype=np.float64) - self.v_ = np.empty(self.N,dtype=np.float64) - - #Initial values - Marginal moments - z = np.empty(self.N,dtype=np.float64) - self.Z_hat = np.empty(self.N,dtype=np.float64) - phi = np.empty(self.N,dtype=np.float64) - mu_hat = np.empty(self.N,dtype=np.float64) - sigma2_hat = np.empty(self.N,dtype=np.float64) - - #Approximation - epsilon_np1 = self.epsilon + 1. - epsilon_np2 = self.epsilon + 1. - self.iterations = 0 - self.np1 = [self.tau_tilde.copy()] - self.np2 = [self.v_tilde.copy()] - while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: - update_order = np.random.permutation(self.N) - for i in update_order: - #Cavity distribution parameters - self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i] - self.v_[i] = self.mu[i]/self.Sigma[i,i] - self.eta*self.v_tilde[i] - #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i]) - #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma[i,i]) - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma[i,i]) - self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau - self.v_tilde[i] = self.v_tilde[i] + Delta_v - #Posterior distribution parameters update - si=self.Sigma[:,i].reshape(self.N,1) - self.Sigma = self.Sigma - Delta_tau/(1.+ Delta_tau*self.Sigma[i,i])*np.dot(si,si.T) - self.mu = np.dot(self.Sigma,self.v_tilde) - self.iterations += 1 - #Sigma recomptutation with Cholesky decompositon - Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*(self.K) - B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K - L = jitchol(B) - V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1) - self.Sigma = self.K - np.dot(V.T,V) - self.mu = np.dot(self.Sigma,self.v_tilde) - epsilon_np1 = np.mean(self.tau_tilde-self.np1[-1]**2) - epsilon_np2 = np.mean(self.v_tilde-self.np2[-1]**2) - self.np1.append(self.tau_tilde.copy()) - self.np2.append(self.v_tilde.copy()) - if messages: - print "EP iteration %i, epsiolon %d"%(self.iterations,epsilon_np1) - -class FITC(EP_base): - """ - :param likelihood: Output's likelihood (e.g. probit) - :type likelihood: GPy.inference.likelihood instance - :param Knn_diag: The diagonal elements of Knn is a 1D vector - :param Kmn: The 'cross' variance between inducing inputs and data - :param Kmm: the covariance matrix of the inducing inputs - :param likelihood: Output's likelihood (e.g. probit) - :type likelihood: GPy.inference.likelihood instance - :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) - :param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats) - """ - def __init__(self,likelihood,Knn_diag,Kmn,Kmm,*args,**kwargs): - self.Knn_diag = Knn_diag - self.Kmn = Kmn - self.Kmm = Kmm - self.M = self.Kmn.shape[0] - self.N = self.Kmn.shape[1] - assert self.M <= self.N, 'The number of inducing inputs must be smaller than the number of observations' - assert len(Knn_diag) == self.N, 'Knn_diagonal has size different from N' - EP_base.__init__(self,likelihood,*args,**kwargs) - - def fit_EP(self): - """ - The expectation-propagation algorithm with sparse pseudo-input. - For nomenclature see Naish-Guzman and Holden, 2008. - """ - - """ - Prior approximation parameters: - q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) - Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn - """ - self.Kmmi, self.Kmm_hld = pdinv(self.Kmm) - self.P0 = self.Kmn.T - self.KmnKnm = np.dot(self.P0.T, self.P0) - self.KmmiKmn = np.dot(self.Kmmi,self.P0.T) - self.Qnn_diag = np.sum(self.P0.T*self.KmmiKmn,-2) - self.Diag0 = self.Knn_diag - self.Qnn_diag - self.R0 = jitchol(self.Kmmi).T - - """ - Posterior approximation: q(f|y) = N(f| mu, Sigma) - Sigma = Diag + P*R.T*R*P.T + K - mu = w + P*gamma - """ - self.w = np.zeros(self.N) - self.gamma = np.zeros(self.M) - self.mu = np.zeros(self.N) - self.P = self.P0.copy() - self.R = self.R0.copy() - self.Diag = self.Diag0.copy() - self.Sigma_diag = self.Knn_diag - - """ - Initial values - Cavity distribution parameters: - q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} - sigma_ = 1./tau_ - mu_ = v_/tau_ - """ - self.tau_ = np.empty(self.N,dtype=np.float64) - self.v_ = np.empty(self.N,dtype=np.float64) - - #Initial values - Marginal moments - z = np.empty(self.N,dtype=np.float64) - self.Z_hat = np.empty(self.N,dtype=np.float64) - phi = np.empty(self.N,dtype=np.float64) - mu_hat = np.empty(self.N,dtype=np.float64) - sigma2_hat = np.empty(self.N,dtype=np.float64) - - #Approximation - epsilon_np1 = 1 - epsilon_np2 = 1 - self.iterations = 0 - self.np1 = [self.tau_tilde.copy()] - self.np2 = [self.v_tilde.copy()] - while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: - update_order = np.arange(self.N) - random.shuffle(update_order) - for i in update_order: - #Cavity distribution parameters - self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i] - self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i] - #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i]) - #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i]) - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i]) - self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau - self.v_tilde[i] = self.v_tilde[i] + Delta_v - #Posterior distribution parameters update - dtd1 = Delta_tau*self.Diag[i] + 1. - dii = self.Diag[i] - self.Diag[i] = dii - (Delta_tau * dii**2.)/dtd1 - pi_ = self.P[i,:].reshape(1,self.M) - self.P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_ - Rp_i = np.dot(self.R,pi_.T) - RTR = np.dot(self.R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*self.Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),self.R)) - self.R = jitchol(RTR).T - self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1 - self.gamma = self.gamma + (Delta_v - Delta_tau*self.mu[i])*np.dot(RTR,self.P[i,:].T) - self.RPT = np.dot(self.R,self.P.T) - self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1) - self.mu = self.w + np.dot(self.P,self.gamma) - self.iterations += 1 - #Sigma recomptutation with Cholesky decompositon - self.Diag = self.Diag0/(1.+ self.Diag0 * self.tau_tilde) - self.P = (self.Diag / self.Diag0)[:,None] * self.P0 - self.RPT0 = np.dot(self.R0,self.P0.T) - L = jitchol(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T)) - self.R,info = linalg.flapack.dtrtrs(L,self.R0,lower=1) - self.RPT = np.dot(self.R,self.P.T) - self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1) - self.w = self.Diag * self.v_tilde - self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.v_tilde)) - self.mu = self.w + np.dot(self.P,self.gamma) - epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N - epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N - self.np1.append(self.tau_tilde.copy()) - self.np2.append(self.v_tilde.copy()) diff --git a/GPy/inference/likelihoods.py b/GPy/inference/likelihoods.py deleted file mode 100644 index c9b36e10..00000000 --- a/GPy/inference/likelihoods.py +++ /dev/null @@ -1,219 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import stats -import scipy as sp -import pylab as pb -from ..util.plot import gpplot - -class likelihood: - def __init__(self,Y,location=0,scale=1): - """ - Likelihood class for doing Expectation propagation - - :param Y: observed output (Nx1 numpy.darray) - ..Note:: Y values allowed depend on the likelihood used - """ - self.Y = Y - self.N = self.Y.shape[0] - self.location = location - self.scale = scale - - def plot1Da(self,X_new,Mean_new,Var_new,X_u,Mean_u,Var_u): - """ - Plot the predictive distribution of the GP model for 1-dimensional inputs - - :param X_new: The points at which to make a prediction - :param Mean_new: mean values at X_new - :param Var_new: variance values at X_new - :param X_u: input (inducing) points used to train the model - :param Mean_u: mean values at X_u - :param Var_new: variance values at X_u - """ - assert X_new.shape[1] == 1, 'Number of dimensions must be 1' - gpplot(X_new,Mean_new,Var_new) - pb.errorbar(X_u,Mean_u,2*np.sqrt(Var_u),fmt='r+') - pb.plot(X_u,Mean_u,'ro') - - def plot2D(self,X,X_new,F_new,U=None): - """ - Predictive distribution of the fitted GP model for 2-dimensional inputs - - :param X_new: The points at which to make a prediction - :param Mean_new: mean values at X_new - :param Var_new: variance values at X_new - :param X_u: input points used to train the model - :param Mean_u: mean values at X_u - :param Var_new: variance values at X_u - """ - N,D = X_new.shape - assert D == 2, 'Number of dimensions must be 2' - n = np.sqrt(N) - x1min = X_new[:,0].min() - x1max = X_new[:,0].max() - x2min = X_new[:,1].min() - x2max = X_new[:,1].max() - pb.imshow(F_new.reshape(n,n),extent=(x1min,x1max,x2max,x2min),vmin=0,vmax=1) - pb.colorbar() - C1 = np.arange(self.N)[self.Y.flatten()==1] - C2 = np.arange(self.N)[self.Y.flatten()==-1] - [pb.plot(X[i,0],X[i,1],'ro') for i in C1] - [pb.plot(X[i,0],X[i,1],'bo') for i in C2] - pb.xlim(x1min,x1max) - pb.ylim(x2min,x2max) - if U is not None: - [pb.plot(a,b,'wo') for a,b in U] - -class probit(likelihood): - """ - Probit likelihood - Y is expected to take values in {-1,1} - ----- - $$ - L(x) = \\Phi (Y_i*f_i) - $$ - """ - def moments_match(self,i,tau_i,v_i): - """ - Moments match of the marginal approximation in EP algorithm - - :param i: number of observation (int) - :param tau_i: precision of the cavity distribution (float) - :param v_i: mean/variance of the cavity distribution (float) - """ - z = self.Y[i]*v_i/np.sqrt(tau_i**2 + tau_i) - Z_hat = stats.norm.cdf(z) - phi = stats.norm.pdf(z) - mu_hat = v_i/tau_i + self.Y[i]*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) - sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) - return Z_hat, mu_hat, sigma2_hat - - def plot1Db(self,X,X_new,F_new,U=None): - assert X.shape[1] == 1, 'Number of dimensions must be 1' - gpplot(X_new,F_new,np.zeros(X_new.shape[0])) - pb.plot(X,(self.Y+1)/2,'kx',mew=1.5) - pb.ylim(-0.2,1.2) - if U is not None: - pb.plot(U,U*0+.5,'r|',mew=1.5,markersize=12) - - def predictive_mean(self,mu,variance): - return stats.norm.cdf(mu/np.sqrt(1+variance)) - - def _log_likelihood_gradients(): - raise NotImplementedError - -class poisson(likelihood): - """ - Poisson likelihood - Y is expected to take values in {0,1,2,...} - ----- - $$ - L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! - $$ - """ - def moments_match(self,i,tau_i,v_i): - """ - Moments match of the marginal approximation in EP algorithm - - :param i: number of observation (int) - :param tau_i: precision of the cavity distribution (float) - :param v_i: mean/variance of the cavity distribution (float) - """ - mu = v_i/tau_i - sigma = np.sqrt(1./tau_i) - def poisson_norm(f): - """ - Product of the likelihood and the cavity distribution - """ - pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma) - rate = np.exp( (f*self.scale)+self.location) - poisson = stats.poisson.pmf(float(self.Y[i]),rate) - return pdf_norm_f*poisson - - def log_pnm(f): - """ - Log of poisson_norm - """ - return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*self.Y[i]) - - """ - Golden Search and Simpson's Rule - -------------------------------- - Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input. - Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule - """ - #TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case. - - #Golden search - golden_A = -1 if self.Y[i] == 0 else np.array([np.log(self.Y[i]),mu]).min() #Lower limit - golden_B = np.array([np.log(self.Y[i]),mu]).max() #Upper limit - golden_A = (golden_A - self.location)/self.scale - golden_B = (golden_B - self.location)/self.scale - opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm - - # Simpson's approximation - width = 3./np.log(max(self.Y[i],2)) - A = opt - width #Lower limit - B = opt + width #Upper limit - K = 10*int(np.log(max(self.Y[i],150))) #Number of points in the grid, we DON'T want K to be the same number for every case - h = (B-A)/K # length of the intervals - grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis) - x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier - zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x - first = zeroth*x - second = first*x - Z_hat = sum(zeroth)*h/3 # Zero-th moment - mu_hat = sum(first)*h/(3*Z_hat) # First moment - m2 = sum(second)*h/(3*Z_hat) # Second moment - sigma2_hat = m2 - mu_hat**2 # Second central moment - return float(Z_hat), float(mu_hat), float(sigma2_hat) - - def plot1Db(self,X,X_new,F_new,F2_new=None,U=None): - pb.subplot(212) - #gpplot(X_new,F_new,np.sqrt(F2_new)) - pb.plot(X_new,F_new)#,np.sqrt(F2_new)) #FIXME - pb.plot(X,self.Y,'kx',mew=1.5) - if U is not None: - pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12) - def predictive_mean(self,mu,variance): - return np.exp(mu*self.scale + self.location) - def predictive_variance(self,mu,variance): - return mu - def _log_likelihood_gradients(): - raise NotImplementedError - -class gaussian(likelihood): - """ - Gaussian likelihood - Y is expected to take values in (-inf,inf) - """ - def moments_match(self,i,tau_i,v_i): - """ - Moments match of the marginal approximation in EP algorithm - - :param i: number of observation (int) - :param tau_i: precision of the cavity distribution (float) - :param v_i: mean/variance of the cavity distribution (float) - """ - mu = v_i/tau_i - sigma = np.sqrt(1./tau_i) - s = 1. if self.Y[i] == 0 else 1./self.Y[i] - sigma2_hat = 1./(1./sigma**2 + 1./s**2) - mu_hat = sigma2_hat*(mu/sigma**2 + self.Y[i]/s**2) - Z_hat = 1./np.sqrt(2*np.pi) * 1./np.sqrt(sigma**2+s**2) * np.exp(-.5*(mu-self.Y[i])**2/(sigma**2 + s**2)) - return Z_hat, mu_hat, sigma2_hat - - def plot1Db(self,X,X_new,F_new,U=None): - assert X.shape[1] == 1, 'Number of dimensions must be 1' - gpplot(X_new,F_new,np.zeros(X_new.shape[0])) - pb.plot(X,self.Y,'kx',mew=1.5) - if U is not None: - pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12) - - def predictive_mean(self,mu,Sigma): - return mu - - def _log_likelihood_gradients(): - raise NotImplementedError diff --git a/GPy/kern/Matern32.py b/GPy/kern/Matern32.py index cfad17c9..9831ae40 100644 --- a/GPy/kern/Matern32.py +++ b/GPy/kern/Matern32.py @@ -14,14 +14,14 @@ class Matern32(kernpart): .. math:: - k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } + k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } :param D: the number of input dimensions :type D: int :param variance: the variance :math:`\sigma^2` :type variance: float :param lengthscale: the vector of lengthscale :math:`\ell_i` - :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD + :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. :type ARD: Boolean :rtype: kernel object @@ -35,17 +35,19 @@ class Matern32(kernpart): self.Nparam = 2 self.name = 'Mat32' if lengthscale is not None: - assert lengthscale.shape == (1,) + lengthscale = np.asarray(lengthscale) + assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" else: lengthscale = np.ones(1) else: self.Nparam = self.D + 1 - self.name = 'Mat32_ARD' + self.name = 'Mat32' if lengthscale is not None: - assert lengthscale.shape == (self.D,) + lengthscale = np.asarray(lengthscale) + assert lengthscale.size == self.D, "bad number of lengthscales" else: lengthscale = np.ones(self.D) - self._set_params(np.hstack((variance,lengthscale))) + self._set_params(np.hstack((variance,lengthscale.flatten()))) def _get_params(self): """return the value of the parameters.""" @@ -116,9 +118,9 @@ class Matern32(kernpart): :param F1: vector of derivatives of F :type F1: np.array :param F2: vector of second derivatives of F - :type F2: np.array + :type F2: np.array :param lower,upper: boundaries of the input domain - :type lower,upper: floats + :type lower,upper: floats """ assert self.D == 1 def L(x,i): @@ -133,4 +135,3 @@ class Matern32(kernpart): #print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n" #return(G) return(self.lengthscale**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T)) - diff --git a/GPy/kern/Matern52.py b/GPy/kern/Matern52.py index cbe02c83..2994fc45 100644 --- a/GPy/kern/Matern52.py +++ b/GPy/kern/Matern52.py @@ -13,14 +13,14 @@ class Matern52(kernpart): .. math:: - k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } + k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } :param D: the number of input dimensions :type D: int :param variance: the variance :math:`\sigma^2` :type variance: float :param lengthscale: the vector of lengthscale :math:`\ell_i` - :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD + :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. :type ARD: Boolean :rtype: kernel object @@ -33,17 +33,19 @@ class Matern52(kernpart): self.Nparam = 2 self.name = 'Mat52' if lengthscale is not None: - assert lengthscale.shape == (1,) + lengthscale = np.asarray(lengthscale) + assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" else: lengthscale = np.ones(1) else: self.Nparam = self.D + 1 - self.name = 'Mat52_ARD' + self.name = 'Mat52' if lengthscale is not None: - assert lengthscale.shape == (self.D,) + lengthscale = np.asarray(lengthscale) + assert lengthscale.size == self.D, "bad number of lengthscales" else: lengthscale = np.ones(self.D) - self._set_params(np.hstack((variance,lengthscale))) + self._set_params(np.hstack((variance,lengthscale.flatten()))) def _get_params(self): """return the value of the parameters.""" diff --git a/GPy/kern/exponential.py b/GPy/kern/exponential.py index 6c463a63..3c9cb192 100644 --- a/GPy/kern/exponential.py +++ b/GPy/kern/exponential.py @@ -13,14 +13,14 @@ class exponential(kernpart): .. math:: - k(r) = \sigma^2 \exp(- r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } + k(r) = \sigma^2 \exp(- r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } :param D: the number of input dimensions :type D: int :param variance: the variance :math:`\sigma^2` :type variance: float :param lengthscale: the vector of lengthscale :math:`\ell_i` - :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD + :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. :type ARD: Boolean :rtype: kernel object @@ -33,17 +33,19 @@ class exponential(kernpart): self.Nparam = 2 self.name = 'exp' if lengthscale is not None: - assert lengthscale.shape == (1,) + lengthscale = np.asarray(lengthscale) + assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" else: lengthscale = np.ones(1) else: self.Nparam = self.D + 1 - self.name = 'exp_ARD' + self.name = 'exp' if lengthscale is not None: - assert lengthscale.shape == (self.D,) + lengthscale = np.asarray(lengthscale) + assert lengthscale.size == self.D, "bad number of lengthscales" else: lengthscale = np.ones(self.D) - self._set_params(np.hstack((variance,lengthscale))) + self._set_params(np.hstack((variance,lengthscale.flatten()))) def _get_params(self): """return the value of the parameters.""" @@ -87,7 +89,7 @@ class exponential(kernpart): dl = self.variance*dvar*dist2M.sum(-1)*invdist target[1] += np.sum(dl*partial) - def dKdiag_dtheta(self,partial,X,target): + def dKdiag_dtheta(self,partial,X,target): """derivative of the diagonal of the covariance matrix with respect to the parameters.""" #NB: derivative of diagonal elements wrt lengthscale is 0 target[0] += np.sum(partial) @@ -110,9 +112,9 @@ class exponential(kernpart): :param F: vector of functions :type F: np.array :param F1: vector of derivatives of F - :type F1: np.array + :type F1: np.array :param lower,upper: boundaries of the input domain - :type lower,upper: floats + :type lower,upper: floats """ assert self.D == 1 def L(x,i): @@ -124,8 +126,3 @@ class exponential(kernpart): G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0] Flower = np.array([f(lower) for f in F])[:,None] return(self.lengthscale/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T)) - - - - - diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index d8881579..c01ba815 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -6,7 +6,7 @@ import numpy as np from ..core.parameterised import parameterised from kernpart import kernpart import itertools -from product_orthogonal import product_orthogonal +from product_orthogonal import product_orthogonal class kern(parameterised): def __init__(self,D,parts=[], input_slices=None): @@ -155,7 +155,7 @@ class kern(parameterised): D = K1.D + K2.D - newkernparts = [product_orthogonal(k1,k2).parts[0] for k1, k2 in itertools.product(K1.parts,K2.parts)] + newkernparts = [product_orthogonal(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)] slices = [] for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices): @@ -235,6 +235,8 @@ class kern(parameterised): X2 = X target = np.zeros(self.Nparam) [p.dK_dtheta(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)] + + #TODO: transform the gradients here! return target def dK_dX(self,partial,X,X2=None,slices1=None,slices2=None): @@ -324,6 +326,7 @@ class kern(parameterised): [p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] + # "crossterms". Here we are recomputing psi1 for white (we don't need to), but it's # not really expensive, since it's just a matrix of zeroes. # psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts] diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py index 8fb2c867..d7869f0a 100644 --- a/GPy/kern/linear.py +++ b/GPy/kern/linear.py @@ -15,34 +15,33 @@ class linear(kernpart): :param D: the number of input dimensions :type D: int :param variances: the vector of variances :math:`\sigma^2_i` - :type variances: np.ndarray of size (1,) or (D,) depending on ARD - :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single variance parameter \sigma^2), otherwise there is one variance parameter per dimension. + :type variances: array or list of the appropriate size (or float if there is only one variance parameter) + :param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension. :type ARD: Boolean :rtype: kernel object """ - def __init__(self,D,variances=None,ARD=True): + def __init__(self,D,variances=None,ARD=False): self.D = D self.ARD = ARD if ARD == False: self.Nparam = 1 self.name = 'linear' if variances is not None: - if isinstance(variances, float): - variances = np.array([variances]) - - assert variances.shape == (1,) + variances = np.asarray(variances) + assert variances.size == 1, "Only one variance needed for non-ARD kernel" else: variances = np.ones(1) self._Xcache, self._X2cache = np.empty(shape=(2,)) else: self.Nparam = self.D - self.name = 'linear_ARD' + self.name = 'linear' if variances is not None: - assert variances.shape == (self.D,) + variances = np.asarray(variances) + assert variances.size == self.D, "bad number of lengthscales" else: variances = np.ones(self.D) - self._set_params(variances) + self._set_params(variances.flatten()) #initialize cache self._Z, self._mu, self._S = np.empty(shape=(3,1)) diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index 1506b323..5babfa4f 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -12,7 +12,7 @@ class rbf(kernpart): .. math:: - k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \qquad \qquad \\text{ where } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}} + k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}} where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input. @@ -21,7 +21,7 @@ class rbf(kernpart): :param variance: the variance of the kernel :type variance: float :param lengthscale: the vector of lengthscale of the kernel - :type lengthscale: np.ndarray od size (1,) or (D,) depending on ARD + :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. :type ARD: Boolean :rtype: kernel object @@ -75,6 +75,7 @@ class rbf(kernpart): def K(self,X,X2,target): if X2 is None: X2 = X + self._K_computations(X,X2) np.add(self.variance*self._K_dvar, target,target) diff --git a/GPy/likelihoods/EP.py b/GPy/likelihoods/EP.py new file mode 100644 index 00000000..efd887ae --- /dev/null +++ b/GPy/likelihoods/EP.py @@ -0,0 +1,311 @@ +import numpy as np +from scipy import stats, linalg +from ..util.linalg import pdinv,mdot,jitchol +from likelihood import likelihood + +class EP(likelihood): + def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]): + """ + Expectation Propagation + + Arguments + --------- + epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) + likelihood_function : a likelihood function (see likelihood_functions.py) + """ + self.likelihood_function = likelihood_function + self.epsilon = epsilon + self.eta, self.delta = power_ep + self.data = data + self.N = self.data.size + self.is_heteroscedastic = True + self.Nparams = 0 + + #Initial values - Likelihood approximation parameters: + #p(y|f) = t(f|tau_tilde,v_tilde) + self.tau_tilde = np.zeros(self.N) + self.v_tilde = np.zeros(self.N) + + #initial values for the GP variables + self.Y = np.zeros((self.N,1)) + self.covariance_matrix = np.eye(self.N) + self.precision = np.ones(self.N) + self.Z = 0 + self.YYT = None + + def predictive_values(self,mu,var): + return self.likelihood_function.predictive_values(mu,var) + + def _get_params(self): + return np.zeros(0) + def _get_param_names(self): + return [] + def _set_params(self,p): + pass # TODO: the EP likelihood might want to take some parameters... + def _gradients(self,partial): + return np.zeros(0) # TODO: the EP likelihood might want to take some parameters... + + def _compute_GP_variables(self): + #Variables to be called from GP + mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model + sigma_sum = 1./self.tau_ + 1./self.tau_tilde + mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2 + self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep + + self.Y = mu_tilde[:,None] + self.YYT = np.dot(self.Y,self.Y.T) + self.precision = self.tau_tilde + self.covariance_matrix = np.diag(1./self.precision) + + def fit_full(self,K): + """ + The expectation-propagation algorithm. + For nomenclature see Rasmussen & Williams 2006. + """ + #Prior distribution parameters: p(f|X) = N(f|0,K) + + self.tau_tilde = np.zeros(self.N) + self.v_tilde = np.zeros(self.N) + #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) + mu = np.zeros(self.N) + Sigma = K.copy() + + """ + Initial values - Cavity distribution parameters: + q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)} + sigma_ = 1./tau_ + mu_ = v_/tau_ + """ + self.tau_ = np.empty(self.N,dtype=float) + self.v_ = np.empty(self.N,dtype=float) + + #Initial values - Marginal moments + z = np.empty(self.N,dtype=float) + self.Z_hat = np.empty(self.N,dtype=float) + phi = np.empty(self.N,dtype=float) + mu_hat = np.empty(self.N,dtype=float) + sigma2_hat = np.empty(self.N,dtype=float) + + #Approximation + epsilon_np1 = self.epsilon + 1. + epsilon_np2 = self.epsilon + 1. + self.iterations = 0 + self.np1 = [self.tau_tilde.copy()] + self.np2 = [self.v_tilde.copy()] + while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: + update_order = np.random.permutation(self.N) + for i in update_order: + #Cavity distribution parameters + self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] + self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] + #Marginal moments + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i]) + #Site parameters update + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) + self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau + self.v_tilde[i] = self.v_tilde[i] + Delta_v + #Posterior distribution parameters update + si=Sigma[:,i].reshape(self.N,1) + Sigma = Sigma - Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T) + mu = np.dot(Sigma,self.v_tilde) + self.iterations += 1 + #Sigma recomptutation with Cholesky decompositon + Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K + B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K + L = jitchol(B) + V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1) + Sigma = K - np.dot(V.T,V) + mu = np.dot(Sigma,self.v_tilde) + epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N + epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N + self.np1.append(self.tau_tilde.copy()) + self.np2.append(self.v_tilde.copy()) + + return self._compute_GP_variables() + + def fit_DTC(self, Knn_diag, Kmn, Kmm): + """ + The expectation-propagation algorithm with sparse pseudo-input. + For nomenclature see ... 2013. + """ + + #TODO: this doesn;t work with uncertain inputs! + + """ + Prior approximation parameters: + q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) + Sigma0 = Qnn = Knm*Kmmi*Kmn + """ + Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm) + KmnKnm = np.dot(Kmn, Kmn.T) + KmmiKmn = np.dot(Kmmi,Kmn) + Qnn_diag = np.sum(Kmn*KmmiKmn,-2) + LLT0 = Kmm.copy() + + """ + Posterior approximation: q(f|y) = N(f| mu, Sigma) + Sigma = Diag + P*R.T*R*P.T + K + mu = w + P*gamma + """ + mu = np.zeros(self.N) + LLT = Kmm.copy() + Sigma_diag = Qnn_diag.copy() + + """ + Initial values - Cavity distribution parameters: + q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} + sigma_ = 1./tau_ + mu_ = v_/tau_ + """ + tau_ = np.empty(self.N,dtype=float) + v_ = np.empty(self.N,dtype=float) + + #Initial values - Marginal moments + z = np.empty(self.N,dtype=float) + Z_hat = np.empty(self.N,dtype=float) + phi = np.empty(self.N,dtype=float) + mu_hat = np.empty(self.N,dtype=float) + sigma2_hat = np.empty(self.N,dtype=float) + + #Approximation + epsilon_np1 = 1 + epsilon_np2 = 1 + self.iterations = 0 + np1 = [tau_tilde.copy()] + np2 = [v_tilde.copy()] + while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: + update_order = np.random.permutation(self.N) + for i in update_order: + #Cavity distribution parameters + tau_[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i] + v_[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i] + #Marginal moments + Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],tau_[i],v_[i]) + #Site parameters update + Delta_tau = delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) + tau_tilde[i] = tau_tilde[i] + Delta_tau + v_tilde[i] = v_tilde[i] + Delta_v + #Posterior distribution parameters update + LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau + L = jitchol(LLT) + V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1) + Sigma_diag = np.sum(V*V,-2) + si = np.sum(V.T*V[:,i],-1) + mu = mu + (Delta_v-Delta_tau*mu[i])*si + self.iterations += 1 + #Sigma recomputation with Cholesky decompositon + LLT0 = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T) + L = jitchol(LLT) + V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1) + V2,info = linalg.flapack.dtrtrs(L.T,V,lower=0) + Sigma_diag = np.sum(V*V,-2) + Knmv_tilde = np.dot(Kmn,v_tilde) + mu = np.dot(V2.T,Knmv_tilde) + epsilon_np1 = sum((tau_tilde-np1[-1])**2)/self.N + epsilon_np2 = sum((v_tilde-np2[-1])**2)/self.N + np1.append(tau_tilde.copy()) + np2.append(v_tilde.copy()) + + self._compute_GP_variables() + + def fit_FITC(self, Knn_diag, Kmn): + """ + The expectation-propagation algorithm with sparse pseudo-input. + For nomenclature see Naish-Guzman and Holden, 2008. + """ + + """ + Prior approximation parameters: + q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) + Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn + """ + Kmmi, self.Lm, self.Lmi, Kmm_logdet = pdinv(Kmm) + P0 = Kmn.T + KmnKnm = np.dot(P0.T, P0) + KmmiKmn = np.dot(Kmmi,P0.T) + Qnn_diag = np.sum(P0.T*KmmiKmn,-2) + Diag0 = Knn_diag - Qnn_diag + R0 = jitchol(Kmmi).T + + """ + Posterior approximation: q(f|y) = N(f| mu, Sigma) + Sigma = Diag + P*R.T*R*P.T + K + mu = w + P*gamma + """ + self.w = np.zeros(self.N) + self.gamma = np.zeros(self.M) + mu = np.zeros(self.N) + P = P0.copy() + R = R0.copy() + Diag = Diag0.copy() + Sigma_diag = Knn_diag + + """ + Initial values - Cavity distribution parameters: + q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} + sigma_ = 1./tau_ + mu_ = v_/tau_ + """ + self.tau_ = np.empty(self.N,dtype=float) + self.v_ = np.empty(self.N,dtype=float) + + #Initial values - Marginal moments + z = np.empty(self.N,dtype=float) + self.Z_hat = np.empty(self.N,dtype=float) + phi = np.empty(self.N,dtype=float) + mu_hat = np.empty(self.N,dtype=float) + sigma2_hat = np.empty(self.N,dtype=float) + + #Approximation + epsilon_np1 = 1 + epsilon_np2 = 1 + self.iterations = 0 + self.np1 = [self.tau_tilde.copy()] + self.np2 = [self.v_tilde.copy()] + while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: + update_order = np.random.permutation(self.N) + for i in update_order: + #Cavity distribution parameters + self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] + self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] + #Marginal moments + self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i]) + #Site parameters update + Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) + Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) + self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau + self.v_tilde[i] = self.v_tilde[i] + Delta_v + #Posterior distribution parameters update + dtd1 = Delta_tau*Diag[i] + 1. + dii = Diag[i] + Diag[i] = dii - (Delta_tau * dii**2.)/dtd1 + pi_ = P[i,:].reshape(1,self.M) + P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_ + Rp_i = np.dot(R,pi_.T) + RTR = np.dot(R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R)) + R = jitchol(RTR).T + self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1 + self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T) + RPT = np.dot(R,P.T) + Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) + mu = self.w + np.dot(P,self.gamma) + self.iterations += 1 + #Sigma recomptutation with Cholesky decompositon + Diag = Diag0/(1.+ Diag0 * self.tau_tilde) + P = (Diag / Diag0)[:,None] * P0 + RPT0 = np.dot(R0,P0.T) + L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T)) + R,info = linalg.flapack.dtrtrs(L,R0,lower=1) + RPT = np.dot(R,P.T) + Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) + self.w = Diag * self.v_tilde + self.gamma = np.dot(R.T, np.dot(RPT,self.v_tilde)) + mu = self.w + np.dot(P,self.gamma) + epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N + epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N + self.np1.append(self.tau_tilde.copy()) + self.np2.append(self.v_tilde.copy()) + + return self._compute_GP_variables() diff --git a/GPy/likelihoods/Gaussian.py b/GPy/likelihoods/Gaussian.py new file mode 100644 index 00000000..a34b3e6c --- /dev/null +++ b/GPy/likelihoods/Gaussian.py @@ -0,0 +1,56 @@ +import numpy as np +from likelihood import likelihood + +class Gaussian(likelihood): + def __init__(self,data,variance=1.,normalize=False): + self.is_heteroscedastic = False + self.Nparams = 1 + self.data = data + self.N,D = data.shape + self.Z = 0. # a correction factor which accounts for the approximation made + + #normalisation + if normalize: + self._mean = data.mean(0)[None,:] + self._std = data.std(0)[None,:] + self.Y = (self.data - self._mean)/self._std + else: + self._mean = np.zeros((1,D)) + self._std = np.ones((1,D)) + self.Y = self.data + + #TODO: make this work efficiently (only compute YYT if D>>N) + self.YYT = np.dot(self.Y,self.Y.T) + self.trYYT = np.trace(self.YYT) + self._set_params(np.asarray(variance)) + + + def _get_params(self): + return np.asarray(self._variance) + + def _get_param_names(self): + return ["noise variance"] + + def _set_params(self,x): + self._variance = float(x) + self.covariance_matrix = np.eye(self.N)*self._variance + self.precision = 1./self._variance + + def predictive_values(self,mu,var): + """ + Un-normalise the prediction and add the likelihood variance, then return the 5%, 95% interval + """ + mean = mu*self._std + self._mean + true_var = (var + self._variance)*self._std**2 + _5pc = mean + - 2.*np.sqrt(true_var) + _95pc = mean + 2.*np.sqrt(true_var) + return mean, _5pc, _95pc + + def fit_full(self): + """ + No approximations needed + """ + pass + + def _gradients(self,partial): + return np.sum(partial) diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py new file mode 100644 index 00000000..83413255 --- /dev/null +++ b/GPy/likelihoods/__init__.py @@ -0,0 +1,4 @@ +from EP import EP +from Gaussian import Gaussian +# TODO: from Laplace import Laplace +import likelihood_functions as functions diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py new file mode 100644 index 00000000..6ec57c07 --- /dev/null +++ b/GPy/likelihoods/likelihood.py @@ -0,0 +1,35 @@ +import numpy as np + +class likelihood: + """ + The atom for a likelihood class + + This object interfaces the GP and the data. The most basic likelihood + (Gaussian) inherits directly from this, as does the EP algorithm + + Some things must be defined for this to work properly: + self.Y : the effective Gaussian target of the GP + self.N, self.D : Y.shape + self.covariance_matrix : the effective (noise) covariance of the GP targets + self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP) + self.is_heteroscedastic : enables significant computational savings in GP + self.precision : a scalar or vector representation of the effective target precision + self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N + """ + def __init__(self,data): + raise ValueError, "this class is not to be instantiated" + + def _get_params(self): + raise NotImplementedError + + def _get_param_names(self): + raise NotImplementedError + + def _set_params(self,x): + raise NotImplementedError + + def fit(self): + raise NotImplementedError + + def _gradients(self,partial): + raise NotImplementedError diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py new file mode 100644 index 00000000..23881899 --- /dev/null +++ b/GPy/likelihoods/likelihood_functions.py @@ -0,0 +1,134 @@ +# Copyright (c) 2012, 2013 Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from scipy import stats +import scipy as sp +import pylab as pb +from ..util.plot import gpplot + +class likelihood_function: + """ + Likelihood class for doing Expectation propagation + + :param Y: observed output (Nx1 numpy.darray) + ..Note:: Y values allowed depend on the likelihood_function used + """ + def __init__(self,location=0,scale=1): + self.location = location + self.scale = scale + +class probit(likelihood_function): + """ + Probit likelihood + Y is expected to take values in {-1,1} + ----- + $$ + L(x) = \\Phi (Y_i*f_i) + $$ + """ + + def moments_match(self,data_i,tau_i,v_i): + """ + Moments match of the marginal approximation in EP algorithm + + :param i: number of observation (int) + :param tau_i: precision of the cavity distribution (float) + :param v_i: mean/variance of the cavity distribution (float) + """ + # TODO: some version of assert np.sum(np.abs(Y)-1) == 0, "Output values must be either -1 or 1" + if data_i == 0: data_i = -1 #NOTE Binary classification works better classes {-1,1}, 1D-plotting works better with classes {0,1}. + z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) + Z_hat = stats.norm.cdf(z) + phi = stats.norm.pdf(z) + mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) + sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) + return Z_hat, mu_hat, sigma2_hat + + def predictive_values(self,mu,var): + """ + Compute mean, and conficence interval (percentiles 5 and 95) of the prediction + """ + mu = mu.flatten() + var = var.flatten() + mean = stats.norm.cdf(mu/np.sqrt(1+var)) + p_025 = np.zeros(mu.shape) + p_975 = np.ones(mu.shape) + return mean, p_025, p_975 + +class Poisson(likelihood_function): + """ + Poisson likelihood + Y is expected to take values in {0,1,2,...} + ----- + $$ + L(x) = \exp(\lambda) * \lambda**Y_i / Y_i! + $$ + """ + def moments_match(self,data_i,tau_i,v_i): + """ + Moments match of the marginal approximation in EP algorithm + + :param i: number of observation (int) + :param tau_i: precision of the cavity distribution (float) + :param v_i: mean/variance of the cavity distribution (float) + """ + mu = v_i/tau_i + sigma = np.sqrt(1./tau_i) + def poisson_norm(f): + """ + Product of the likelihood and the cavity distribution + """ + pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma) + rate = np.exp( (f*self.scale)+self.location) + poisson = stats.poisson.pmf(float(data_i),rate) + return pdf_norm_f*poisson + + def log_pnm(f): + """ + Log of poisson_norm + """ + return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*data_i) + + """ + Golden Search and Simpson's Rule + -------------------------------- + Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input. + Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule + """ + #TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case. + + #Golden search + golden_A = -1 if data_i == 0 else np.array([np.log(data_i),mu]).min() #Lower limit + golden_B = np.array([np.log(data_i),mu]).max() #Upper limit + golden_A = (golden_A - self.location)/self.scale + golden_B = (golden_B - self.location)/self.scale + opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm + + # Simpson's approximation + width = 3./np.log(max(data_i,2)) + A = opt - width #Lower limit + B = opt + width #Upper limit + K = 10*int(np.log(max(data_i,150))) #Number of points in the grid, we DON'T want K to be the same number for every case + h = (B-A)/K # length of the intervals + grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis) + x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier + zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x + first = zeroth*x + second = first*x + Z_hat = sum(zeroth)*h/3 # Zero-th moment + mu_hat = sum(first)*h/(3*Z_hat) # First moment + m2 = sum(second)*h/(3*Z_hat) # Second moment + sigma2_hat = m2 - mu_hat**2 # Second central moment + return float(Z_hat), float(mu_hat), float(sigma2_hat) + + def predictive_values(self,mu,var): + """ + Compute mean, and conficence interval (percentiles 5 and 95) of the prediction + """ + mean = np.exp(mu*self.scale + self.location) + tmp = stats.poisson.ppf(np.array([.025,.975]),mean) + p_025 = tmp[:,0] + p_975 = tmp[:,1] + return mean,p_025,p_975 diff --git a/GPy/models/BGPLVM.py b/GPy/models/BGPLVM.py index b42e3b98..45304915 100644 --- a/GPy/models/BGPLVM.py +++ b/GPy/models/BGPLVM.py @@ -5,10 +5,12 @@ import numpy as np import pylab as pb import sys, pdb from GPLVM import GPLVM -from sparse_GP_regression import sparse_GP_regression +from sparse_GP import sparse_GP from GPy.util.linalg import pdinv +from ..likelihoods import Gaussian +from .. import kern -class Bayesian_GPLVM(sparse_GP_regression, GPLVM): +class Bayesian_GPLVM(sparse_GP, GPLVM): """ Bayesian Gaussian Process Latent Variable Model @@ -20,18 +22,24 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM): :type init: 'PCA'|'random' """ - def __init__(self, Y, Q, X = None, S = None, init='PCA', **kwargs): - if X == None: - X = self.initialise_latent(init, Q, Y) - if S == None: - S = np.ones_like(X) * 1e-2 + def __init__(self, Y, Q, init='PCA', M=10, Z=None, kernel=None, **kwargs): + X = self.initialise_latent(init, Q, Y) - sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs) + if Z is None: + Z = np.random.permutation(X.copy())[:M] + else: + assert Z.shape[1]==X.shape[1] + + if kernel is None: + kernel = kern.rbf(Q) + kern.white(Q) + + S = np.ones_like(X) * 1e-2# + sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_uncertainty=S, **kwargs) def _get_param_names(self): X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) - return (X_names + S_names + sparse_GP_regression._get_param_names(self)) + return (X_names + S_names + sparse_GP._get_param_names(self)) def _get_params(self): """ @@ -39,17 +47,17 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM): The resulting 1-D array has this structure: =============================================================== - | mu | S | Z | beta | theta | + | mu | S | Z | theta | beta | =============================================================== """ - return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression._get_params(self))) + return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP._get_params(self))) def _set_params(self,x): N, Q = self.N, self.Q self.X = x[:self.X.size].reshape(N,Q).copy() self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy() - sparse_GP_regression._set_params(self, x[(2*N*Q):]) + sparse_GP._set_params(self, x[(2*N*Q):]) def dL_dmuS(self): dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty) @@ -58,17 +66,8 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM): dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 - dKL_dS = (1. - (1./self.X_uncertainty))*0.5 - dKL_dmu = self.X - return np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten())) - - def KL_divergence(self): - var_mean = np.square(self.X).sum() - var_S = np.sum(self.X_uncertainty - np.log(self.X_uncertainty)) - return 0.5*(var_mean + var_S) - 0.5*self.Q*self.N - - def log_likelihood(self): - return sparse_GP_regression.log_likelihood(self) - self.KL_divergence() + return np.hstack((dL_dmu.flatten(), dL_dS.flatten())) def _log_likelihood_gradients(self): - return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression._log_likelihood_gradients(self))) + return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self))) + diff --git a/GPy/models/GP.py b/GPy/models/GP.py new file mode 100644 index 00000000..c4c37e44 --- /dev/null +++ b/GPy/models/GP.py @@ -0,0 +1,274 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +import pylab as pb +from .. import kern +from ..core import model +from ..util.linalg import pdinv,mdot +from ..util.plot import gpplot,x_frame1D,x_frame2D, Tango +from ..likelihoods import EP + +class GP(model): + """ + Gaussian Process model for regression and EP + + :param X: input observations + :param kernel: a GPy kernel, defaults to rbf+white + :parm likelihood: a GPy likelihood + :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_X: False|True + :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_Y: False|True + :param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing) + :rtype: model object + :param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1 + :param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.] + :type powerep: list + + .. Note:: Multiple independent outputs are allowed using columns of Y + + """ + #FIXME normalize vs normalise + def __init__(self, X, likelihood, kernel, normalize_X=False, Xslices=None): + + # parse arguments + self.Xslices = Xslices + self.X = X + assert len(self.X.shape)==2 + self.N, self.Q = self.X.shape + assert isinstance(kernel, kern.kern) + self.kern = kernel + + #here's some simple normalisation for the inputs + if normalize_X: + self._Xmean = X.mean(0)[None,:] + self._Xstd = X.std(0)[None,:] + self.X = (X.copy() - self._Xmean) / self._Xstd + if hasattr(self,'Z'): + self.Z = (self.Z - self._Xmean) / self._Xstd + else: + self._Xmean = np.zeros((1,self.X.shape[1])) + self._Xstd = np.ones((1,self.X.shape[1])) + + self.likelihood = likelihood + #assert self.X.shape[0] == self.likelihood.Y.shape[0] + #self.N, self.D = self.likelihood.Y.shape + assert self.X.shape[0] == self.likelihood.data.shape[0] + self.N, self.D = self.likelihood.data.shape + + model.__init__(self) + + def _set_params(self,p): + self.kern._set_params_transformed(p[:self.kern.Nparam]) + #self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas + self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas + + + self.K = self.kern.K(self.X,slices1=self.Xslices) + self.K += self.likelihood.covariance_matrix + + self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K) + + #the gradient of the likelihood wrt the covariance matrix + if self.likelihood.YYT is None: + alpha = np.dot(self.Ki,self.likelihood.Y) + self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki) + else: + tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki) + self.dL_dK = 0.5*(tmp - self.D*self.Ki) + + def _get_params(self): + return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params())) + + def _get_param_names(self): + return self.kern._get_param_names_transformed() + self.likelihood._get_param_names() + + def update_likelihood_approximation(self): + """ + Approximates a non-gaussian likelihood using Expectation Propagation + + For a Gaussian (or direct: TODO) likelihood, no iteration is required: + this function does nothing + """ + self.likelihood.fit_full(self.kern.K(self.X)) + self._set_params(self._get_params()) # update the GP + + def _model_fit_term(self): + """ + Computes the model fit using YYT if it's available + """ + if self.likelihood.YYT is None: + return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y))) + else: + return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT)) + + def log_likelihood(self): + """ + The log marginal likelihood of the GP. + + For an EP model, can be written as the log likelihood of a regression + model for a new variable Y* = v_tilde/tau_tilde, with a covariance + matrix K* = K + diag(1./tau_tilde) plus a normalization term. + """ + return -0.5*self.D*self.K_logdet + self._model_fit_term() + self.likelihood.Z + + + def _log_likelihood_gradients(self): + """ + The gradient of all parameters. + + For the kernel parameters, use the chain rule via dL_dK + + For the likelihood parameters, pass in alpha = K^-1 y + """ + return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + + def _raw_predict(self,_Xnew,slices=None, full_cov=False): + """ + Internal helper function for making predictions, does not account + for normalisation or likelihood + """ + Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices) + mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y) + KiKx = np.dot(self.Ki,Kx) + if full_cov: + Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices) + var = Kxx - np.dot(KiKx.T,Kx) #NOTE this won't work for plotting + else: + Kxx = self.kern.Kdiag(_Xnew, slices=slices) + var = Kxx - np.sum(np.multiply(KiKx,Kx),0) + var = var[:,None] + return mu, var + + + def predict(self,Xnew, slices=None, full_cov=False): + """ + Predict the function(s) at the new point(s) Xnew. + + Arguments + --------- + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.Q + :param slices: specifies which outputs kernel(s) the Xnew correspond to (see below) + :type slices: (None, list of slice objects, list of ints) + :param full_cov: whether to return the folll covariance matrix, or just the diagonal + :type full_cov: bool + :rtype: posterior mean, a Numpy array, Nnew x self.D + :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise + :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D + + .. Note:: "slices" specifies how the the points X_new co-vary wich the training points. + + - If None, the new points covary throigh every kernel part (default) + - If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part + - If a list of booleans, specifying which kernel parts are active + + If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew. + This is to allow for different normalisations of the output dimensions. + + """ + #normalise X values + Xnew = (Xnew.copy() - self._Xmean) / self._Xstd + mu, var = self._raw_predict(Xnew, slices, full_cov) + + #now push through likelihood TODO + mean, _025pm, _975pm = self.likelihood.predictive_values(mu, var) + + return mean, var, _025pm, _975pm + + + def plot_f(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, full_cov=False): + """ + Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian + + :param samples: the number of a posteriori samples to plot + :param which_data: which if the training data to plot (default all) + :type which_data: 'all' or a slice object to slice self.X, self.Y + :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits + :param which_functions: which of the kernel functions to plot (additively) + :type which_functions: list of bools + :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D + + Plot the posterior of the GP. + - In one dimension, the function is plotted with a shaded region identifying two standard deviations. + - In two dimsensions, a contour-plot shows the mean predicted function + - In higher dimensions, we've no implemented this yet !TODO! + + Can plot only part of the data and part of the posterior functions using which_data and which_functions + Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood + """ + if which_functions=='all': + which_functions = [True]*self.kern.Nparts + if which_data=='all': + which_data = slice(None) + + if self.X.shape[1] == 1: + Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits) + if samples == 0: + m,v = self._raw_predict(Xnew, slices=which_functions) + gpplot(Xnew,m,m-2*np.sqrt(v),m+2*np.sqrt(v)) + pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5) + else: + m,v = self._raw_predict(Xnew, slices=which_functions,full_cov=True) + Ysim = np.random.multivariate_normal(m.flatten(),v,samples) + gpplot(Xnew,m,m-2*np.sqrt(np.diag(v)[:,None]),m+2*np.sqrt(np.diag(v))[:,None]) + for i in range(samples): + pb.plot(Xnew,Ysim[i,:],Tango.coloursHex['darkBlue'],linewidth=0.25) + pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5) + pb.xlim(xmin,xmax) + ymin,ymax = min(np.append(self.likelihood.Y,m-2*np.sqrt(np.diag(v)[:,None]))), max(np.append(self.likelihood.Y,m+2*np.sqrt(np.diag(v)[:,None]))) + ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin) + pb.ylim(ymin,ymax) + if hasattr(self,'Z'): + pb.plot(self.Z,self.Z*0+pb.ylim()[0],'r|',mew=1.5,markersize=12) + + elif self.X.shape[1] == 2: + resolution = resolution or 50 + Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits,resolution) + m,v = self._raw_predict(Xnew, slices=which_functions) + m = m.reshape(resolution,resolution).T + pb.contour(xx,yy,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet) + pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=m.min(), vmax=m.max()) + pb.xlim(xmin[0],xmax[0]) + pb.ylim(xmin[1],xmax[1]) + else: + raise NotImplementedError, "Cannot define a frame with more than two input dimensions" + + def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False): + # TODO include samples + if which_functions=='all': + which_functions = [True]*self.kern.Nparts + if which_data=='all': + which_data = slice(None) + + if self.X.shape[1] == 1: + + Xu = self.X * self._Xstd + self._Xmean #NOTE self.X are the normalized values now + + Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) + m, var, lower, upper = self.predict(Xnew, slices=which_functions) + gpplot(Xnew,m, lower, upper) + pb.plot(Xu[which_data],self.likelihood.data[which_data],'kx',mew=1.5) + ymin,ymax = min(np.append(self.likelihood.data,lower)), max(np.append(self.likelihood.data,upper)) + ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin) + pb.xlim(xmin,xmax) + pb.ylim(ymin,ymax) + if hasattr(self,'Z'): + Zu = self.Z*self._Xstd + self._Xmean + pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12) + + elif self.X.shape[1]==2: #FIXME + resolution = resolution or 50 + Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits,resolution) + x, y = np.linspace(xmin[0],xmax[0],resolution), np.linspace(xmin[1],xmax[1],resolution) + m, var, lower, upper = self.predict(Xnew, slices=which_functions) + m = m.reshape(resolution,resolution).T + pb.contour(x,y,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet) + Yf = self.likelihood.Y.flatten() + pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.) + pb.xlim(xmin[0],xmax[0]) + pb.ylim(xmin[1],xmax[1]) + else: + raise NotImplementedError, "Cannot define a frame with more than two input dimensions" diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py index a8f6a5b1..73762433 100644 --- a/GPy/models/GPLVM.py +++ b/GPy/models/GPLVM.py @@ -8,9 +8,10 @@ import sys, pdb from .. import kern from ..core import model from ..util.linalg import pdinv, PCA -from GP_regression import GP_regression +from GP import GP +from ..likelihoods import Gaussian -class GPLVM(GP_regression): +class GPLVM(GP): """ Gaussian Process Latent Variable Model @@ -22,10 +23,13 @@ class GPLVM(GP_regression): :type init: 'PCA'|'random' """ - def __init__(self, Y, Q, init='PCA', X = None, **kwargs): + def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs): if X is None: X = self.initialise_latent(init, Q, Y) - GP_regression.__init__(self, X, Y, **kwargs) + if kernel is None: + kernel = kern.rbf(Q) + kern.bias(Q) + likelihood = Gaussian(Y) + GP.__init__(self, X, likelihood, kernel, **kwargs) def initialise_latent(self, init, Q, Y): if init == 'PCA': @@ -34,23 +38,19 @@ class GPLVM(GP_regression): return np.random.randn(Y.shape[0], Q) def _get_param_names(self): - return (sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) - + self.kern._get_param_names_transformed()) + return sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) + GP._get_param_names(self) def _get_params(self): - return np.hstack((self.X.flatten(), self.kern._get_params_transformed())) + return np.hstack((self.X.flatten(), GP._get_params(self))) def _set_params(self,x): self.X = x[:self.X.size].reshape(self.N,self.Q).copy() - GP_regression._set_params(self, x[self.X.size:]) + GP._set_params(self, x[self.X.size:]) def _log_likelihood_gradients(self): - dL_dK = self.dL_dK() + dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X) - dL_dtheta = self.kern.dK_dtheta(dL_dK,self.X) - dL_dX = 2*self.kern.dK_dX(dL_dK,self.X) - - return np.hstack((dL_dX.flatten(),dL_dtheta)) + return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self))) def plot(self): assert self.Y.shape[1]==2 diff --git a/GPy/models/GP_EP.py b/GPy/models/GP_EP.py deleted file mode 100644 index 51d69d0a..00000000 --- a/GPy/models/GP_EP.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -import pylab as pb -from scipy import stats, linalg -from .. import kern -from ..inference.Expectation_Propagation import Full -from ..inference.likelihoods import likelihood,probit#,poisson,gaussian -from ..core import model -from ..util.linalg import pdinv,jitchol -from ..util.plot import gpplot - -class GP_EP(model): - def __init__(self,X,likelihood,kernel=None,epsilon_ep=1e-3,epsion_em=.1,powerep=[1.,1.]): - """ - Simple Gaussian Process with Non-Gaussian likelihood - - Arguments - --------- - :param X: input observations (NxD numpy.darray) - :param likelihood: a GPy likelihood (likelihood class) - :param kernel: a GPy kernel (kern class) - :param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1 (float) - :param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.] (list) - :rtype: GPy model class. - """ - if kernel is None: - kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1]) - - assert isinstance(kernel,kern.kern), 'kernel is not a kern instance' - self.likelihood = likelihood - self.Y = self.likelihood.Y - self.kernel = kernel - self.X = X - self.N, self.D = self.X.shape - self.eta,self.delta = powerep - self.epsilon_ep = epsilon_ep - self.jitter = 1e-12 - self.K = self.kernel.K(self.X) - model.__init__(self) - - def _set_params(self,p): - self.kernel._set_params_transformed(p) - - def _get_params(self): - return self.kernel._get_params_transformed() - - def _get_param_names(self): - return self.kernel._get_param_names_transformed() - - def approximate_likelihood(self): - self.ep_approx = Full(self.K,self.likelihood,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta]) - self.ep_approx.fit_EP() - - def posterior_param(self): - self.K = self.kernel.K(self.X) - self.Sroot_tilde_K = np.sqrt(self.ep_approx.tau_tilde)[:,None]*self.K - B = np.eye(self.N) + np.sqrt(self.ep_approx.tau_tilde)[None,:]*self.Sroot_tilde_K - #self.L = np.linalg.cholesky(B) - self.L = jitchol(B) - V,info = linalg.flapack.dtrtrs(self.L,self.Sroot_tilde_K,lower=1) - self.Sigma = self.K - np.dot(V.T,V) - self.mu = np.dot(self.Sigma,self.ep_approx.v_tilde) - - def log_likelihood(self): - """ - Returns - ------- - The EP approximation to the log-marginal likelihood - """ - self.posterior_param() - mu_ = self.ep_approx.v_/self.ep_approx.tau_ - L1 =.5*sum(np.log(1+self.ep_approx.tau_tilde*1./self.ep_approx.tau_))-sum(np.log(np.diag(self.L))) - L2A =.5*np.sum((self.Sigma-np.diag(1./(self.ep_approx.tau_+self.ep_approx.tau_tilde))) * np.dot(self.ep_approx.v_tilde[:,None],self.ep_approx.v_tilde[None,:])) - L2B = .5*np.dot(mu_*(self.ep_approx.tau_/(self.ep_approx.tau_tilde+self.ep_approx.tau_)),self.ep_approx.tau_tilde*mu_ - 2*self.ep_approx.v_tilde) - L3 = sum(np.log(self.ep_approx.Z_hat)) - return L1 + L2A + L2B + L3 - - def _log_likelihood_gradients(self): - dK_dp = self.kernel.dK_dtheta(self.X) - self.dK_dp = dK_dp - aux1,info_1 = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1) - b = self.ep_approx.v_tilde - np.sqrt(self.ep_approx.tau_tilde)*linalg.flapack.dtrtrs(self.L.T,aux1)[0] - U,info_u = linalg.flapack.dtrtrs(self.L,np.diag(np.sqrt(self.ep_approx.tau_tilde)),lower=1) - dL_dK = 0.5*(np.outer(b,b)-np.dot(U.T,U)) - self.dL_dK = dL_dK - return np.array([np.sum(dK_dpi*dL_dK) for dK_dpi in dK_dp.T]) - - def predict(self,X): - #TODO: check output dimensions - self.posterior_param() - K_x = self.kernel.K(self.X,X) - Kxx = self.kernel.K(X) - aux1,info = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1) - aux2,info = linalg.flapack.dtrtrs(self.L.T, aux1,lower=0) - zeta = np.sqrt(self.ep_approx.tau_tilde)*aux2 - f = np.dot(K_x.T,self.ep_approx.v_tilde-zeta) - v,info = linalg.flapack.dtrtrs(self.L,np.sqrt(self.ep_approx.tau_tilde)[:,None]*K_x,lower=1) - variance = Kxx - np.dot(v.T,v) - vdiag = np.diag(variance) - y=self.likelihood.predictive_mean(f,vdiag) - return f,vdiag,y - - def plot(self): - """ - Plot the fitted model: training function values, inducing points used, mean estimate and confidence intervals. - """ - if self.X.shape[1]==1: - pb.figure() - xmin,xmax = self.X.min(),self.X.max() - xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin) - Xnew = np.linspace(xmin,xmax,100)[:,None] - mu_f, var_f, mu_phi = self.predict(Xnew) - pb.subplot(211) - self.likelihood.plot1Da(X_new=Xnew,Mean_new=mu_f,Var_new=var_f,X_u=self.X,Mean_u=self.mu,Var_u=np.diag(self.Sigma)) - pb.subplot(212) - self.likelihood.plot1Db(self.X,Xnew,mu_phi) - elif self.X.shape[1]==2: - pb.figure() - x1min,x1max = self.X[:,0].min(0),self.X[:,0].max(0) - x2min,x2max = self.X[:,1].min(0),self.X[:,1].max(0) - x1min, x1max = x1min-0.2*(x1max-x1min), x1max+0.2*(x1max-x1min) - x2min, x2max = x2min-0.2*(x2max-x2min), x2max+0.2*(x1max-x1min) - axis1 = np.linspace(x1min,x1max,50) - axis2 = np.linspace(x2min,x2max,50) - XX1, XX2 = [e.flatten() for e in np.meshgrid(axis1,axis2)] - Xnew = np.c_[XX1.flatten(),XX2.flatten()] - f,v,p = self.predict(Xnew) - self.likelihood.plot2D(self.X,Xnew,p) - else: - raise NotImplementedError, "Cannot plot GPs with more than two input dimensions" - - def em(self,max_f_eval=1e4,epsilon=.1,plot_all=False): #TODO check this makes sense - """ - Fits sparse_EP and optimizes the hyperparametes iteratively until convergence is achieved. - """ - self.epsilon_em = epsilon - log_likelihood_change = self.epsilon_em + 1. - self.parameters_path = [self.kernel._get_params()] - self.approximate_likelihood() - self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]] - self.log_likelihood_path = [self.log_likelihood()] - iteration = 0 - while log_likelihood_change > self.epsilon_em: - print 'EM iteration', iteration - self.optimize(max_f_eval = max_f_eval) - log_likelihood_new = self.log_likelihood() - log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1] - if log_likelihood_change < 0: - print 'log_likelihood decrement' - self.kernel._set_params_transformed(self.parameters_path[-1]) - self.kernM._set_params_transformed(self.parameters_path[-1]) - else: - self.approximate_likelihood() - self.log_likelihood_path.append(self.log_likelihood()) - self.parameters_path.append(self.kernel._get_params()) - self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde]) - iteration += 1 diff --git a/GPy/models/GP_regression.py b/GPy/models/GP_regression.py index 72a24307..5f9f9f3e 100644 --- a/GPy/models/GP_regression.py +++ b/GPy/models/GP_regression.py @@ -1,18 +1,18 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Copyright (c) 2012, James Hensman # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -import pylab as pb +from GP import GP +from .. import likelihoods from .. import kern -from ..core import model -from ..util.linalg import pdinv,mdot -from ..util.plot import gpplot, Tango -class GP_regression(model): +class GP_regression(GP): """ Gaussian Process model for regression + This is a thin wrapper around the GP class, with a set of sensible defalts + :param X: input observations :param Y: observed values :param kernel: a GPy kernel, defaults to rbf+white @@ -29,199 +29,8 @@ class GP_regression(model): def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None): if kernel is None: - kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1]) + kernel = kern.rbf(X.shape[1]) - # parse arguments - self.Xslices = Xslices - assert isinstance(kernel, kern.kern) - self.kern = kernel - self.X = X - self.Y = Y - assert len(self.X.shape)==2 - assert len(self.Y.shape)==2 - assert self.X.shape[0] == self.Y.shape[0] - self.N, self.D = self.Y.shape - self.N, self.Q = self.X.shape + likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y) - #here's some simple normalisation - if normalize_X: - self._Xmean = X.mean(0)[None,:] - self._Xstd = X.std(0)[None,:] - self.X = (X.copy() - self._Xmean) / self._Xstd - if hasattr(self,'Z'): - self.Z = (self.Z - self._Xmean) / self._Xstd - else: - self._Xmean = np.zeros((1,self.X.shape[1])) - self._Xstd = np.ones((1,self.X.shape[1])) - - if normalize_Y: - self._Ymean = Y.mean(0)[None,:] - self._Ystd = Y.std(0)[None,:] - self.Y = (Y.copy()- self._Ymean) / self._Ystd - else: - self._Ymean = np.zeros((1,self.Y.shape[1])) - self._Ystd = np.ones((1,self.Y.shape[1])) - - if self.D > self.N: - # then it's more efficient to store YYT - self.YYT = np.dot(self.Y, self.Y.T) - else: - self.YYT = None - - model.__init__(self) - - def _set_params(self,p): - self.kern._set_params_transformed(p) - self.K = self.kern.K(self.X,slices1=self.Xslices) - self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K) - - def _get_params(self): - return self.kern._get_params_transformed() - - def _get_param_names(self): - return self.kern._get_param_names_transformed() - - def _model_fit_term(self): - """ - Computes the model fit using YYT if it's available - """ - if self.YYT is None: - return -0.5*np.sum(np.square(np.dot(self.Li,self.Y))) - else: - return -0.5*np.sum(np.multiply(self.Ki, self.YYT)) - - def log_likelihood(self): - complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - 0.5*self.D*self.K_logdet - return complexity_term + self._model_fit_term() - - def dL_dK(self): - if self.YYT is None: - alpha = np.dot(self.Ki,self.Y) - dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki) - else: - dL_dK = 0.5*(mdot(self.Ki, self.YYT, self.Ki) - self.D*self.Ki) - - return dL_dK - - def _log_likelihood_gradients(self): - return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X) - - def predict(self,Xnew, slices=None, full_cov=False): - """ - - Predict the function(s) at the new point(s) Xnew. - - Arguments - --------- - :param Xnew: The points at which to make a prediction - :type Xnew: np.ndarray, Nnew x self.Q - :param slices: specifies which outputs kernel(s) the Xnew correspond to (see below) - :type slices: (None, list of slice objects, list of ints) - :param full_cov: whether to return the folll covariance matrix, or just the diagonal - :type full_cov: bool - :rtype: posterior mean, a Numpy array, Nnew x self.D - :rtype: posterior variance, a Numpy array, Nnew x Nnew x (self.D) - - .. Note:: "slices" specifies how the the points X_new co-vary wich the training points. - - - If None, the new points covary throigh every kernel part (default) - - If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part - - If a list of booleans, specifying which kernel parts are active - - If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew. - This is to allow for different normalisations of the output dimensions. - - - """ - - #normalise X values - Xnew = (Xnew.copy() - self._Xmean) / self._Xstd - mu, var = self._raw_predict(Xnew, slices, full_cov) - - #un-normalise - mu = mu*self._Ystd + self._Ymean - if full_cov: - if self.D==1: - var *= np.square(self._Ystd) - else: - var = var[:,:,None] * np.square(self._Ystd) - else: - if self.D==1: - var *= np.square(np.squeeze(self._Ystd)) - else: - var = var[:,None] * np.square(self._Ystd) - - return mu,var - - def _raw_predict(self,_Xnew,slices, full_cov=False): - """Internal helper function for making predictions, does not account for normalisation""" - Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices) - mu = np.dot(np.dot(Kx.T,self.Ki),self.Y) - KiKx = np.dot(self.Ki,Kx) - if full_cov: - Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices) - var = Kxx - np.dot(KiKx.T,Kx) - else: - Kxx = self.kern.Kdiag(_Xnew, slices=slices) - var = Kxx - np.sum(np.multiply(KiKx,Kx),0) - return mu, var - - def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None): - """ - :param samples: the number of a posteriori samples to plot - :param which_data: which if the training data to plot (default all) - :type which_data: 'all' or a slice object to slice self.X, self.Y - :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits - :param which_functions: which of the kernel functions to plot (additively) - :type which_functions: list of bools - :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D - - Plot the posterior of the GP. - - In one dimension, the function is plotted with a shaded region identifying two standard deviations. - - In two dimsensions, a contour-plot shows the mean predicted function - - In higher dimensions, we've no implemented this yet !TODO! - - Can plot only part of the data and part of the posterior functions using which_data and which_functions - """ - if which_functions=='all': - which_functions = [True]*self.kern.Nparts - if which_data=='all': - which_data = slice(None) - - X = self.X[which_data,:] - Y = self.Y[which_data,:] - - Xorig = X*self._Xstd + self._Xmean - Yorig = Y*self._Ystd + self._Ymean - if plot_limits is None: - xmin,xmax = Xorig.min(0),Xorig.max(0) - xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin) - elif len(plot_limits)==2: - xmin, xmax = plot_limits - else: - raise ValueError, "Bad limits for plotting" - - - if self.X.shape[1]==1: - Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None] - m,v = self.predict(Xnew,slices=which_functions) - gpplot(Xnew,m,v) - if samples: - s = np.random.multivariate_normal(m.flatten(),v,samples) - pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8) - pb.plot(Xorig,Yorig,'kx',mew=1.5) - pb.xlim(xmin,xmax) - - elif self.X.shape[1]==2: - resolution = 50 or resolution - xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution] - Xtest = np.vstack((xx.flatten(),yy.flatten())).T - zz,vv = self.predict(Xtest,slices=which_functions) - zz = zz.reshape(resolution,resolution) - pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet) - pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max()) - pb.xlim(xmin[0],xmax[0]) - pb.ylim(xmin[1],xmax[1]) - - else: - raise NotImplementedError, "Cannot plot GPs with more than two input dimensions" + GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices) diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index 9d5b1d00..8e2c5d84 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -2,12 +2,12 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) +from GP import GP from GP_regression import GP_regression +from sparse_GP import sparse_GP from sparse_GP_regression import sparse_GP_regression from GPLVM import GPLVM from warped_GP import warpedGP -from GP_EP import GP_EP -from generalized_FITC import generalized_FITC from sparse_GPLVM import sparse_GPLVM -from uncollapsed_sparse_GP import uncollapsed_sparse_GP +#from uncollapsed_sparse_GP import uncollapsed_sparse_GP from BGPLVM import Bayesian_GPLVM diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py deleted file mode 100644 index a5ed8d0a..00000000 --- a/GPy/models/generalized_FITC.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -import pylab as pb -from scipy import stats, linalg -from .. import kern -from ..core import model -from ..util.linalg import pdinv,mdot -from ..util.plot import gpplot -from ..inference.Expectation_Propagation import FITC -from ..inference.likelihoods import likelihood,probit - -class generalized_FITC(model): - def __init__(self,X,likelihood,kernel=None,inducing=10,epsilon_ep=1e-3,powerep=[1.,1.]): - """ - Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC. - - :param X: input observations - :param likelihood: Output's likelihood (likelihood class) - :param kernel: a GPy kernel - :param inducing: Either an array specifying the inducing points location or a scalar defining their number. - :param epsilon_ep: EP convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) - :param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats) - """ - assert isinstance(kernel,kern.kern) - self.likelihood = likelihood - self.Y = self.likelihood.Y - self.kernel = kernel - self.X = X - self.N, self.D = self.X.shape - assert self.Y.shape[0] == self.N - if type(inducing) == int: - self.M = inducing - self.Z = (np.random.random_sample(self.D*self.M)*(self.X.max()-self.X.min())+self.X.min()).reshape(self.M,-1) - elif type(inducing) == np.ndarray: - self.Z = inducing - self.M = self.Z.shape[0] - self.eta,self.delta = powerep - self.epsilon_ep = epsilon_ep - self.jitter = 1e-12 - model.__init__(self) - - def _set_params(self,p): - self.kernel._set_params_transformed(p[0:-self.Z.size]) - self.Z = p[-self.Z.size:].reshape(self.M,self.D) - - def _get_params(self): - return np.hstack([self.kernel._get_params_transformed(),self.Z.flatten()]) - - def _get_param_names(self): - return self.kernel._get_param_names_transformed()+['iip_%i'%i for i in range(self.Z.size)] - - def approximate_likelihood(self): - self.Kmm = self.kernel.K(self.Z) - self.Knm = self.kernel.K(self.X,self.Z) - self.Knn_diag = self.kernel.Kdiag(self.X) - self.ep_approx = FITC(self.Kmm,self.likelihood,self.Knm.T,self.Knn_diag,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta]) - self.ep_approx.fit_EP() - - def posterior_param(self): - self.Knn_diag = self.kernel.Kdiag(self.X) - self.Kmm = self.kernel.K(self.Z) - self.Kmmi, self.Lmm, self.Lmmi, self.Kmm_logdet = pdinv(self.Kmm) - self.Knm = self.kernel.K(self.X,self.Z) - self.KmmiKmn = np.dot(self.Kmmi,self.Knm.T) - self.Qnn = np.dot(self.Knm,self.KmmiKmn) - self.Diag0 = self.Knn_diag - np.diag(self.Qnn) - self.R0 = np.linalg.cholesky(self.Kmmi).T - - self.Taut = self.ep_approx.tau_tilde/(1.+ self.ep_approx.tau_tilde*self.Diag0) - self.KmnTaut = self.Knm.T*self.Taut[None,:] - self.KmnTautKnm = np.dot(self.KmnTaut, self.Knm) - self.Woodbury_inv, self.Wood_L, self.Wood_Li, self.Woodbury_logdet = pdinv(self.Kmm + self.KmnTautKnm) - self.Qnn_diag = self.Knn_diag - np.diag(self.Qnn) + 1./self.ep_approx.tau_tilde - self.Qi = -np.dot(self.KmnTaut.T, np.dot(self.Woodbury_inv,self.KmnTaut)) + np.diag(self.Taut) - self.hld = 0.5*np.sum(np.log(self.Diag0 + 1./self.ep_approx.tau_tilde)) - 0.5*self.Kmm_logdet + 0.5*self.Woodbury_logdet - - self.Diag = self.Diag0/(1.+ self.Diag0 * self.ep_approx.tau_tilde) - self.P = (self.Diag / self.Diag0)[:,None] * self.Knm - self.RPT0 = np.dot(self.R0,self.Knm.T) - self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T)) - self.R,info = linalg.flapack.dtrtrs(self.L,self.R0,lower=1) - self.RPT = np.dot(self.R,self.P.T) - self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT) - self.w = self.Diag * self.ep_approx.v_tilde - self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.ep_approx.v_tilde)) - self.mu = self.w + np.dot(self.P,self.gamma) - self.mu_tilde = (self.ep_approx.v_tilde/self.ep_approx.tau_tilde)[:,None] - - def log_likelihood(self): - self.posterior_param() - self.YYT = np.dot(self.mu_tilde,self.mu_tilde.T) - A = -self.hld - B = -.5*np.sum(self.Qi*self.YYT) - C = sum(np.log(self.ep_approx.Z_hat)) - D = .5*np.sum(np.log(1./self.ep_approx.tau_tilde + 1./self.ep_approx.tau_)) - E = .5*np.sum((self.ep_approx.v_/self.ep_approx.tau_ - self.mu_tilde.flatten())**2/(1./self.ep_approx.tau_ + 1./self.ep_approx.tau_tilde)) - return A + B + C + D + E - - def _log_likelihood_gradients(self): - dKmm_dtheta = self.kernel.dK_dtheta(self.Z) - dKnn_dtheta = self.kernel.dK_dtheta(self.X) - dKmn_dtheta = self.kernel.dK_dtheta(self.Z,self.X) - dKmm_dZ = -self.kernel.dK_dX(self.Z) - dKnm_dZ = -self.kernel.dK_dX(self.X,self.Z) - tmp = [np.dot(dKmn_dtheta_i,self.KmmiKmn) for dKmn_dtheta_i in dKmn_dtheta.T] - dQnn_dtheta = [tmp_i + tmp_i.T - np.dot(np.dot(self.KmmiKmn.T,dKmm_dtheta_i),self.KmmiKmn) for tmp_i,dKmm_dtheta_i in zip(tmp,dKmm_dtheta.T)] - dDiag0_dtheta = [np.diag(dKnn_dtheta_i) - np.diag(dQnn_dtheta_i) for dKnn_dtheta_i,dQnn_dtheta_i in zip(dKnn_dtheta.T,dQnn_dtheta)] - dQ_dtheta = [np.diag(dDiag0_dtheta_i) + dQnn_dtheta_i for dDiag0_dtheta_i,dQnn_dtheta_i in zip(dDiag0_dtheta,dQnn_dtheta)] - dW_dtheta = [dKmm_dtheta_i + 2*np.dot(self.KmnTaut,dKmn_dtheta_i) - np.dot(self.KmnTaut*dDiag0_dtheta_i,self.KmnTaut.T) for dKmm_dtheta_i,dDiag0_dtheta_i,dKmn_dtheta_i in zip(dKmm_dtheta.T,dDiag0_dtheta,dKmn_dtheta.T)] - - QiY = np.dot(self.Qi, self.mu_tilde) - QiYYQi = np.outer(QiY,QiY) - WiKmnTaut = np.dot(self.Woodbury_inv,self.KmnTaut) - K_Y = np.dot(self.KmmiKmn,QiY) - # gradient - theta - Atheta = [-0.5*np.dot(self.Taut,dDiag0_dtheta_i) + 0.5*np.sum(self.Kmmi*dKmm_dtheta_i) - 0.5*np.sum(self.Woodbury_inv*dW_dtheta_i) for dDiag0_dtheta_i,dKmm_dtheta_i,dW_dtheta_i in zip(dDiag0_dtheta,dKmm_dtheta.T,dW_dtheta)] - Btheta = np.array([0.5*np.sum(QiYYQi*dQ_dtheta_i) for dQ_dtheta_i in dQ_dtheta]) - dL_dtheta = Atheta + Btheta - # gradient - Z - # Az - dQnn_dZ_diag_a2 = (np.array([d[:,:,None]*self.KmmiKmn[:,:,None] for d in dKnm_dZ.transpose(2,0,1)]).reshape(self.D,self.M,self.N)).transpose(1,2,0) - dQnn_dZ_diag_b2 = (np.array([(self.KmmiKmn*np.sum(d[:,:,None]*self.KmmiKmn,-2))[:,:,None] for d in dKmm_dZ.transpose(2,0,1)]).reshape(self.D,self.M,self.N)).transpose(1,2,0) - dQnn_dZ_diag = dQnn_dZ_diag_a2 - dQnn_dZ_diag_b2 - d_hld_Diag1_dZ = -np.sum(np.dot(self.KmmiKmn*self.Taut,self.KmmiKmn.T)[:,:,None]*dKmm_dZ,-2) + np.sum((self.KmmiKmn*self.Taut)[:,:,None]*dKnm_dZ,-2) - d_hld_Kmm_dZ = np.sum(self.Kmmi[:,:,None]*dKmm_dZ,-2) - d_hld_W_dZ1 = np.sum(WiKmnTaut[:,:,None]*dKnm_dZ,-2) - d_hld_W_dZ3 = np.sum(self.Woodbury_inv[:,:,None]*dKmm_dZ,-2) - d_hld_W_dZ2 = np.array([np.sum(np.sum(WiKmnTaut.T*d[:,:,None]*self.KmnTaut.T,-2),-1) for d in dQnn_dZ_diag.transpose(2,0,1)]).T - Az = d_hld_Diag1_dZ + d_hld_Kmm_dZ - d_hld_W_dZ1 - d_hld_W_dZ2 - d_hld_W_dZ3 - # Bz - Bz2 = np.sum(np.dot(K_Y,QiY.T)[:,:,None]*dKnm_dZ,-2) - Bz3 = - np.sum(np.dot(K_Y,K_Y.T)[:,:,None]*dKmm_dZ,-2) - Bz1 = -np.array([np.sum((QiY**2)*d[:,:,None],-2) for d in dQnn_dZ_diag.transpose(2,0,1)]).reshape(self.D,self.M).T - Bz = Bz1 + Bz2 + Bz3 - dL_dZ = (Az + Bz).flatten() - return np.hstack([dL_dtheta, dL_dZ]) - - def predict(self,X): - """ - Make a prediction for the vsGP model - - Arguments - --------- - X : Input prediction data - Nx1 numpy array (floats) - """ - #TODO: check output dimensions - K_x = self.kernel.K(self.Z,X) - Kxx = self.kernel.K(X) - #K_x = self.kernM.cross.K(X) - # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T) - - # Ci = I + (RPT0)Di(RPT0).T - # C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T - # = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T - # = I - [RPT0] * (U*U.T)^-1 * [RPT0].T - # = I - V.T * V - U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn) - V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1) - C = np.eye(self.M) - np.dot(V.T,V) - mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:]) - #self.C = C - #self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T - #self.mu_u = mu_u - #self.U = U - # q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T) - mu_H = np.dot(mu_u,self.mu) - self.mu_H = mu_H - Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T)) - # q(f_star|y) = N(f_star|mu_star,sigma2_star) - KR0T = np.dot(K_x.T,self.R0.T) - mu_star = np.dot(KR0T,mu_H) - sigma2_star = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) - vdiag = np.diag(sigma2_star) - # q(y_star|y) = non-gaussian posterior probability of class membership - p = self.likelihood.predictive_mean(mu_star,vdiag) - return mu_star,vdiag,p - - def plot(self): - """ - Plot the fitted model: training function values, inducing points used, mean estimate and confidence intervals. - """ - if self.X.shape[1]==1: - pb.figure() - xmin,xmax = np.r_[self.X,self.Z].min(),np.r_[self.X,self.Z].max() - xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin) - Xnew = np.linspace(xmin,xmax,100)[:,None] - mu_f, var_f, mu_phi = self.predict(Xnew) - self.mu_inducing,self.var_diag_inducing,self.phi_inducing = self.predict(self.Z) - pb.subplot(211) - self.likelihood.plot1Da(X_new=Xnew,Mean_new=mu_f,Var_new=var_f,X_u=self.Z,Mean_u=self.mu_inducing,Var_u=self.var_diag_inducing) - pb.subplot(212) - self.likelihood.plot1Db(self.X,Xnew,mu_phi,self.Z) - elif self.X.shape[1]==2: - pb.figure() - x1min,x1max = self.X[:,0].min(0),self.X[:,0].max(0) - x2min,x2max = self.X[:,1].min(0),self.X[:,1].max(0) - x1min, x1max = x1min-0.2*(x1max-x1min), x1max+0.2*(x1max-x1min) - x2min, x2max = x2min-0.2*(x2max-x2min), x2max+0.2*(x1max-x1min) - axis1 = np.linspace(x1min,x1max,50) - axis2 = np.linspace(x2min,x2max,50) - XX1, XX2 = [e.flatten() for e in np.meshgrid(axis1,axis2)] - Xnew = np.c_[XX1.flatten(),XX2.flatten()] - f,v,p = self.predict(Xnew) - self.likelihood.plot2D(self.X,Xnew,p,self.Z) - else: - raise NotImplementedError, "Cannot plot GPs with more than two input dimensions" - - def em(self,max_f_eval=1e4,epsilon=.1,plot_all=False): #TODO check this makes sense - """ - Fits sparse_EP and optimizes the hyperparametes iteratively until convergence is achieved. - """ - self.epsilon_em = epsilon - log_likelihood_change = self.epsilon_em + 1. - self.parameters_path = [self.kernel._get_params()] - self.approximate_likelihood() - self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]] - self.inducing_inputs_path = [self.Z] - self.log_likelihood_path = [self.log_likelihood()] - iteration = 0 - while log_likelihood_change > self.epsilon_em: - print 'EM iteration', iteration - self.optimize(max_f_eval = max_f_eval) - log_likelihood_new = self.log_likelihood() - log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1] - if log_likelihood_change < 0: - print 'log_likelihood decrement' - self.kernel._set_params_transformed(self.parameters_path[-1]) - self.kernM = self.kernel.copy() - slef.kernM.expand_X(self.iducing_inputs_path[-1]) - self.__init__(self.kernel,self.likelihood,kernM=self.kernM,powerep=[self.eta,self.delta],epsilon_ep = self.epsilon_ep, epsilon_em = self.epsilon_em) - - else: - self.approximate_likelihood() - self.log_likelihood_path.append(self.log_likelihood()) - self.parameters_path.append(self.kernel._get_params()) - self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde]) - self.inducing_inputs_path.append(self.Z) - iteration += 1 diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py new file mode 100644 index 00000000..3239d462 --- /dev/null +++ b/GPy/models/sparse_GP.py @@ -0,0 +1,217 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +import pylab as pb +from ..util.linalg import mdot, jitchol, chol_inv, pdinv +from ..util.plot import gpplot +from .. import kern +from GP import GP + +#Still TODO: +# make use of slices properly (kernel can now do this) +# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array) + +class sparse_GP(GP): + """ + Variational sparse GP model + + :param X: inputs + :type X: np.ndarray (N x Q) + :param likelihood: a likelihood instance, containing the observed data + :type likelihood: GPy.likelihood.(Gaussian | EP) + :param kernel : the kernel/covariance function. See link kernels + :type kernel: a GPy kernel + :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance) + :type X_uncertainty: np.ndarray (N x Q) | None + :param Z: inducing inputs (optional, see note) + :type Z: np.ndarray (M x Q) | None + :param Zslices: slices for the inducing inputs (see slicing TODO: link) + :param M : Number of inducing points (optional, default 10. Ignored if Z is not None) + :type M: int + :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) + :type normalize_(X|Y): bool + """ + + def __init__(self, X, likelihood, kernel, Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False): + self.scale_factor = 1.0# a scaling factor to help keep the algorithm stable + + self.Z = Z + self.Zslices = Zslices + self.Xslices = Xslices + self.M = Z.shape[0] + self.likelihood = likelihood + + if X_uncertainty is None: + self.has_uncertain_inputs=False + else: + assert X_uncertainty.shape==X.shape + self.has_uncertain_inputs=True + self.X_uncertainty = X_uncertainty + + GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices) + + #normalise X uncertainty also + if self.has_uncertain_inputs: + self.X_uncertainty /= np.square(self._Xstd) + + def _computations(self): + # TODO find routine to multiply triangular matrices + #TODO: slices for psi statistics (easy enough) + + sf = self.scale_factor + sf2 = sf**2 + + # kernel computations, using BGPLVM notation + self.Kmm = self.kern.K(self.Z) + if self.has_uncertain_inputs: + self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty) + self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T + self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty) + if self.likelihood.is_heteroscedastic: + self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.reshape(self.N,1,1)/sf2)).sum(0) + #TODO: what is the likelihood is heterscedatic and there are multiple independent outputs? + else: + self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0) + else: + self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices) + self.psi1 = self.kern.K(self.Z,self.X) + if self.likelihood.is_heteroscedastic: + tmp = self.psi1*(np.sqrt(self.likelihood.precision.reshape(self.N,1))/sf) + else: + tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf) + self.psi2_beta_scaled = np.dot(tmp,tmp.T) + self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:] # TODO: remove me for efficiency and stability + + self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) + + self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y + self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T) + self.B = np.eye(self.M)/sf2 + self.A + + self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) + + self.psi1V = np.dot(self.psi1, self.V) + self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) + self.C = mdot(self.Lmi.T, self.Bi, self.Lmi) + self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T) + + # Compute dL_dpsi # FIXME: this is untested for the het. case + self.dL_dpsi0 = - 0.5 * self.D * self.likelihood.precision * np.ones(self.N) + self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T + if self.likelihood.is_heteroscedastic: + self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB + self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC + self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD + else: + self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi[None,:,:] # dB + self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C[None,:,:] # dC + self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E[None,:,:] # dD + + # Compute dL_dKmm + self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB + self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC + self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD + + #the partial derivative vector for the likelihood + if self.likelihood.Nparams ==0: + #save computation here. + self.partial_for_likelihood = None + elif self.likelihood.is_heteroscedastic: + raise NotImplementedError, "heteroscedatic derivates not implemented" + #self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA + #self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB + #self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC + #self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD + else: + #likelihood is not heterscedatic + beta = self.likelihood.precision + dbeta = 0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y)) + dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2) + dbeta += - 0.5 * self.D * np.sum(self.Bi*self.A)/beta + dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta + self.partial_for_likelihood = -dbeta*self.likelihood.precision**2 + + + def _set_params(self, p): + self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) + self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) + self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) + self._computations() + + def _get_params(self): + return np.hstack([self.Z.flatten(),GP._get_params(self)]) + + def _get_param_names(self): + return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + GP._get_param_names(self) + + def log_likelihood(self): + """ Compute the (lower bound on the) log marginal likelihood """ + sf2 = self.scale_factor**2 + if self.likelihood.is_heteroscedastic: + A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) + else: + A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT + B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2) + C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) + D = +0.5*np.sum(self.psi1VVpsi1 * self.C) + return A+B+C+D + + def _log_likelihood_gradients(self): + return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood))) + + def dL_dtheta(self): + """ + Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel + """ + dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z) + if self.has_uncertain_inputs: + dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty) + dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty) + dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty) + else: + #re-cast computations in psi2 back to psi1: + dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1) + dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X) + dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) + + return dL_dtheta + + def dL_dZ(self): + """ + The derivative of the bound wrt the inducing inputs Z + """ + dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ + if self.has_uncertain_inputs: + dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty) + dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes' + else: + #re-cast computations in psi2 back to psi1: + dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1) + dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X) + return dL_dZ + + def _raw_predict(self, Xnew, slices, full_cov=False): + """Internal helper function for making predictions, does not account for normalisation""" + + Kx = self.kern.K(self.Z, Xnew) + mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) + if full_cov: + Kxx = self.kern.K(Xnew) + var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting + else: + Kxx = self.kern.Kdiag(Xnew) + var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + + return mu,var[:,None] + + def plot(self, *args, **kwargs): + """ + Plot the fitted model: just call the GP plot function and then add inducing inputs + """ + GP.plot(self,*args,**kwargs) + if self.Q==1: + if self.has_uncertain_inputs: + pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten())) + if self.Q==2: + pb.plot(self.Z[:,0],self.Z[:,1],'wo') diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py index 6b958dae..75c9eeeb 100644 --- a/GPy/models/sparse_GP_regression.py +++ b/GPy/models/sparse_GP_regression.py @@ -1,205 +1,46 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Copyright (c) 2012, James Hensman # Licensed under the BSD 3-clause license (see LICENSE.txt) + import numpy as np -import pylab as pb -from ..util.linalg import mdot, jitchol, chol_inv, pdinv -from ..util.plot import gpplot +from sparse_GP import sparse_GP +from .. import likelihoods from .. import kern from ..inference.likelihoods import likelihood from GP_regression import GP_regression -#Still TODO: -# make use of slices properly (kernel can now do this) -# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array) - -class sparse_GP_regression(GP_regression): +class sparse_GP_regression(sparse_GP): """ - Variational sparse GP model (Regression) + Gaussian Process model for regression + + This is a thin wrapper around the GP class, with a set of sensible defalts + + :param X: input observations + :param Y: observed values + :param kernel: a GPy kernel, defaults to rbf+white + :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_X: False|True + :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_Y: False|True + :param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing) + :rtype: model object + + .. Note:: Multiple independent outputs are allowed using columns of Y - :param X: inputs - :type X: np.ndarray (N x Q) - :param Y: observed data - :type Y: np.ndarray of observations (N x D) - :param kernel : the kernel/covariance function. See link kernels - :type kernel: a GPy kernel - :param Z: inducing inputs (optional, see note) - :type Z: np.ndarray (M x Q) | None - :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance) - :type X_uncertainty: np.ndarray (N x Q) | None - :param Zslices: slices for the inducing inputs (see slicing TODO: link) - :param M : Number of inducing points (optional, default 10. Ignored if Z is not None) - :type M: int - :param beta: noise precision. TODO> ignore beta if doing EP - :type beta: float - :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) - :type normalize_(X|Y): bool """ - def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False): - self.scale_factor = 100.0 - self.beta = beta + def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None,Z=None, M=10): + #kern defaults to rbf + if kernel is None: + kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) + + #Z defaults to a subset of the data if Z is None: - self.Z = np.random.permutation(X.copy())[:M] - self.M = M + Z = np.random.permutation(X.copy())[:M] else: assert Z.shape[1]==X.shape[1] - self.Z = Z - self.M = Z.shape[0] - if X_uncertainty is None: - self.has_uncertain_inputs=False - else: - assert X_uncertainty.shape==X.shape - self.has_uncertain_inputs=True - self.X_uncertainty = X_uncertainty - GP_regression.__init__(self, X, Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y) - self.trYYT = np.sum(np.square(self.Y)) + #likelihood defaults to Gaussian + likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y) - #normalise X uncertainty also - if self.has_uncertain_inputs: - self.X_uncertainty /= np.square(self._Xstd) - - def _computations(self): - # TODO find routine to multiply triangular matrices - #TODO: slices for psi statistics (easy enough) - - # kernel computations, using BGPLVM notation - self.Kmm = self.kern.K(self.Z) - if self.has_uncertain_inputs: - self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum() - self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T - self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty) - self.psi2_beta_scaled = (self.psi2*(self.beta/self.scale_factor**2)).sum(0) - else: - self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum() - self.psi1 = self.kern.K(self.Z,self.X) - #self.psi2 = np.dot(self.psi1,self.psi1.T) - #self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:] - tmp = self.psi1/(self.scale_factor/np.sqrt(self.beta)) - self.psi2_beta_scaled = np.dot(tmp,tmp.T) - - sf = self.scale_factor - sf2 = sf**2 - - self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)#+np.eye(self.M)*1e-3) - - self.V = (self.beta/self.scale_factor)*self.Y - self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T) - self.B = np.eye(self.M)/sf2 + self.A - - self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) - - self.psi1V = np.dot(self.psi1, self.V) - self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) - self.C = mdot(self.Lmi.T, self.Bi, self.Lmi) - self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T) - - # Compute dL_dpsi - self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N) - self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T - self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi[None,:,:] # dB - self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C[None,:,:] # dC - self.dL_dpsi2 += - 0.5 * self.beta * self.E[None,:,:] # dD - - # Compute dL_dKmm - self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB - self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC - self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD - - - def _set_params(self, p): - self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) - self.beta = p[self.M*self.Q] - self.kern._set_params(p[self.Z.size + 1:]) - self._computations() - - def _get_params(self): - return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()]) - - def _get_param_names(self): - return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed() - - - def log_likelihood(self): - """ Compute the (lower bound on the) log marginal likelihood """ - sf2 = self.scale_factor**2 - A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) -0.5*self.beta*self.trYYT - B = -0.5*self.D*(self.beta*self.psi0-np.trace(self.A)*sf2) - C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) - D = +0.5*np.sum(self.psi1VVpsi1 * self.C) - return A+B+C+D - - def _log_likelihood_gradients(self): - return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()]) - - def dL_dbeta(self): - """ - Compute the gradient of the log likelihood wrt beta. - """ - #TODO: suport heteroscedatic noise - sf2 = self.scale_factor**2 - dA_dbeta = 0.5 * self.N*self.D/self.beta - 0.5 * self.trYYT - dB_dbeta = - 0.5 * self.D * (self.psi0 - np.trace(self.A)/self.beta*sf2) - dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta - dD_dbeta = np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/self.beta - - return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta) - - def dL_dtheta(self): - """ - Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel - """ - dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z) - if self.has_uncertain_inputs: - dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty) - dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty) - dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape - else: - #re-cast computations in psi2 back to psi1: - dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1) - dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X) - dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) - - return dL_dtheta - - def dL_dZ(self): - """ - The derivative of the bound wrt the inducing inputs Z - """ - dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ - if self.has_uncertain_inputs: - dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty) - dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes' - else: - #re-cast computations in psi2 back to psi1: - dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1) - dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X) - return dL_dZ - - def _raw_predict(self, Xnew, slices, full_cov=False): - """Internal helper function for making predictions, does not account for normalisation""" - - Kx = self.kern.K(self.Z, Xnew) - mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) - - if full_cov: - Kxx = self.kern.K(Xnew) - var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case. - else: - Kxx = self.kern.Kdiag(Xnew) - var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case. - - return mu,var - - def plot(self, *args, **kwargs): - """ - Plot the fitted model: just call the GP_regression plot function and then add inducing inputs - """ - GP_regression.plot(self,*args,**kwargs) - if self.Q==1: - pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12) - if self.has_uncertain_inputs: - pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten())) - if self.Q==2: - pb.plot(self.Z[:,0],self.Z[:,1],'wo') + sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X, Xslices=Xslices) diff --git a/GPy/models/uncollapsed_sparse_GP.py b/GPy/models/uncollapsed_sparse_GP.py index 3bb72e60..0fccfc71 100644 --- a/GPy/models/uncollapsed_sparse_GP.py +++ b/GPy/models/uncollapsed_sparse_GP.py @@ -6,7 +6,7 @@ import pylab as pb from ..util.linalg import mdot, jitchol, chol_inv, pdinv from ..util.plot import gpplot from .. import kern -from ..inference.likelihoods import likelihood +from ..likelihoods import likelihood from sparse_GP_regression import sparse_GP_regression class uncollapsed_sparse_GP(sparse_GP_regression): @@ -136,8 +136,8 @@ class uncollapsed_sparse_GP(sparse_GP_regression): #dL_dm = np.dot(self.Kmmi,self.psi1V) - np.dot(self.Lambda,self.q_u_mean) dL_dm = np.dot(self.Kmmi,self.psi1V) - self.q_u_canonical[0] - #dL_dSim = - #dL_dmhSi = + #dL_dSim = + #dL_dmhSi = return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) # natgrad only, grad TODO diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py index a302b25f..61fb15bb 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/unit_tests.py @@ -154,17 +154,16 @@ class GradientTests(unittest.TestCase): m.constrain_positive('(linear|bias|white)') self.assertTrue(m.checkgrad()) - def test_GP_EP(self): - return # Disabled TODO + def test_GP_EP_probit(self): N = 20 - X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None] - k = GPy.kern.rbf(1) + GPy.kern.white(1) - Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None] - likelihood = GPy.inference.likelihoods.probit(Y) - m = GPy.models.GP_EP(X,likelihood,k) - m.constrain_positive('(var|len)') - m.approximate_likelihood() - self.assertTrue(m.checkgrad()) + X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None] + Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None] + kernel = GPy.kern.rbf(1) + distribution = GPy.likelihoods.likelihood_functions.probit() + likelihood = GPy.likelihoods.EP(Y, distribution) + m = GPy.models.GP(X, likelihood, kernel) + m.ensure_default_constraints() + self.assertTrue(m.EPEM) @unittest.skip("FITC will be broken for a while") def test_generalized_FITC(self): diff --git a/GPy/util/Tango.py b/GPy/util/Tango.py index d2a16fdf..8035ffe6 100644 --- a/GPy/util/Tango.py +++ b/GPy/util/Tango.py @@ -3,7 +3,6 @@ import matplotlib as mpl - import pylab as pb import sys #sys.path.append('/home/james/mlprojects/sitran_cluster/') @@ -15,12 +14,12 @@ def removeRightTicks(ax=None): ax = ax or pb.gca() for i, line in enumerate(ax.get_yticklines()): if i%2 == 1: # odd indices - line.set_visible(False) + line.set_visible(False) def removeUpperTicks(ax=None): ax = ax or pb.gca() for i, line in enumerate(ax.get_xticklines()): if i%2 == 1: # odd indices - line.set_visible(False) + line.set_visible(False) def fewerXticks(ax=None,divideby=2): ax = ax or pb.gca() ax.set_xticks(ax.get_xticks()[::divideby]) @@ -126,8 +125,6 @@ cdict_RB = {'red' :((0.,coloursRGB['mediumRed'][0]/256.,coloursRGB['mediumRed'][ 'blue':((0.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.), (.5,coloursRGB['mediumPurple'][2]/256.,coloursRGB['mediumPurple'][2]/256.), (1.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.))} -cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256) - cdict_BGR = {'red' :((0.,coloursRGB['mediumBlue'][0]/256.,coloursRGB['mediumBlue'][0]/256.), (.5,coloursRGB['mediumGreen'][0]/256.,coloursRGB['mediumGreen'][0]/256.), @@ -138,7 +135,7 @@ cdict_BGR = {'red' :((0.,coloursRGB['mediumBlue'][0]/256.,coloursRGB['mediumBlue 'blue':((0.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.), (.5,coloursRGB['mediumGreen'][2]/256.,coloursRGB['mediumGreen'][2]/256.), (1.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.))} -cmap_BGR = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_BGR,256) + cdict_Alu = {'red' :((0./5,coloursRGB['Aluminium1'][0]/256.,coloursRGB['Aluminium1'][0]/256.), (1./5,coloursRGB['Aluminium2'][0]/256.,coloursRGB['Aluminium2'][0]/256.), @@ -158,13 +155,12 @@ cdict_Alu = {'red' :((0./5,coloursRGB['Aluminium1'][0]/256.,coloursRGB['Aluminiu (3./5,coloursRGB['Aluminium4'][2]/256.,coloursRGB['Aluminium4'][2]/256.), (4./5,coloursRGB['Aluminium5'][2]/256.,coloursRGB['Aluminium5'][2]/256.), (5./5,coloursRGB['Aluminium6'][2]/256.,coloursRGB['Aluminium6'][2]/256.))} -cmap_Alu = mpl.colors.LinearSegmentedColormap('TangoAluminium',cdict_Alu,256) - +# cmap_Alu = mpl.colors.LinearSegmentedColormap('TangoAluminium',cdict_Alu,256) +# cmap_BGR = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_BGR,256) +# cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256) if __name__=='__main__': import pylab as pb pb.figure() pb.pcolor(pb.rand(10,10),cmap=cmap_RB) pb.colorbar() pb.show() - - diff --git a/GPy/util/plot.py b/GPy/util/plot.py index 8c06633e..8e71764d 100644 --- a/GPy/util/plot.py +++ b/GPy/util/plot.py @@ -6,30 +6,26 @@ import Tango import pylab as pb import numpy as np -def gpplot(x,mu,var,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs): +def gpplot(x,mu,lower,upper,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs): if axes is None: axes = pb.gca() mu = mu.flatten() x = x.flatten() + lower = lower.flatten() + upper = upper.flatten() #here's the mean axes.plot(x,mu,color=edgecol,linewidth=2) - #ensure variance is a vector - if len(var.shape)>1: - err = 2*np.sqrt(np.diag(var)) - else: - err = 2*np.sqrt(var) - - #here's the 2*std box + #here's the box kwargs['linewidth']=0.5 if not 'alpha' in kwargs.keys(): kwargs['alpha'] = 0.3 - axes.fill(np.hstack((x,x[::-1])),np.hstack((mu+err,mu[::-1]-err[::-1])),color=fillcol,**kwargs) + axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs) #this is the edge: - axes.plot(x,mu+err,color=edgecol,linewidth=0.2) - axes.plot(x,mu-err,color=edgecol,linewidth=0.2) + axes.plot(x,upper,color=edgecol,linewidth=0.2) + axes.plot(x,lower,color=edgecol,linewidth=0.2) def removeRightTicks(ax=None): ax = ax or pb.gca() @@ -74,4 +70,36 @@ def align_subplots(N,M,xlim=None, ylim=None): else: removeUpperTicks() +def x_frame1D(X,plot_limits=None,resolution=None): + """ + Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits + """ + assert X.shape[1] ==1, "x_frame1D is defined for one-dimensional inputs" + if plot_limits is None: + xmin,xmax = X.min(0),X.max(0) + xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin) + elif len(plot_limits)==2: + xmin, xmax = plot_limits + else: + raise ValueError, "Bad limits for plotting" + Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None] + return Xnew, xmin, xmax + +def x_frame2D(X,plot_limits=None,resolution=None): + """ + Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits + """ + assert X.shape[1] ==2, "x_frame2D is defined for two-dimensional inputs" + if plot_limits is None: + xmin,xmax = X.min(0),X.max(0) + xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin) + elif len(plot_limits)==2: + xmin, xmax = plot_limits + else: + raise ValueError, "Bad limits for plotting" + + resolution = resolution or 50 + xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution] + Xnew = np.vstack((xx.flatten(),yy.flatten())).T + return Xnew, xx, yy, xmin, xmax diff --git a/README.md b/README.md index c69352b8..0b5d00ce 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ GPy === -A Gaussian processes framework in python. +A Gaussian processes framework in python * [Online documentation](https://gpy.readthedocs.org/en/latest/) * [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy) \ No newline at end of file diff --git a/doc/Figures/kern-def.png b/doc/Figures/kern-def.png new file mode 100644 index 00000000..bad43b09 Binary files /dev/null and b/doc/Figures/kern-def.png differ diff --git a/doc/Figures/tuto_GP_regression_m1.png b/doc/Figures/tuto_GP_regression_m1.png index c78d8a04..e1a11fb1 100644 Binary files a/doc/Figures/tuto_GP_regression_m1.png and b/doc/Figures/tuto_GP_regression_m1.png differ diff --git a/doc/Figures/tuto_GP_regression_m2.png b/doc/Figures/tuto_GP_regression_m2.png index b976a69c..7e54e919 100644 Binary files a/doc/Figures/tuto_GP_regression_m2.png and b/doc/Figures/tuto_GP_regression_m2.png differ diff --git a/doc/Figures/tuto_GP_regression_m3.png b/doc/Figures/tuto_GP_regression_m3.png index a675a463..5b2b227c 100644 Binary files a/doc/Figures/tuto_GP_regression_m3.png and b/doc/Figures/tuto_GP_regression_m3.png differ diff --git a/doc/Figures/tuto_kern_overview_add_orth.png b/doc/Figures/tuto_kern_overview_add_orth.png new file mode 100644 index 00000000..0d4f1c4e Binary files /dev/null and b/doc/Figures/tuto_kern_overview_add_orth.png differ diff --git a/doc/Figures/tuto_kern_overview_allkern.png b/doc/Figures/tuto_kern_overview_allkern.png new file mode 100644 index 00000000..f3406b07 Binary files /dev/null and b/doc/Figures/tuto_kern_overview_allkern.png differ diff --git a/doc/Figures/tuto_kern_overview_basicdef.png b/doc/Figures/tuto_kern_overview_basicdef.png new file mode 100644 index 00000000..bad43b09 Binary files /dev/null and b/doc/Figures/tuto_kern_overview_basicdef.png differ diff --git a/doc/Figures/tuto_kern_overview_mANOVA.png b/doc/Figures/tuto_kern_overview_mANOVA.png new file mode 100644 index 00000000..db49e3bd Binary files /dev/null and b/doc/Figures/tuto_kern_overview_mANOVA.png differ diff --git a/doc/Figures/tuto_kern_overview_mANOVAdec.png b/doc/Figures/tuto_kern_overview_mANOVAdec.png new file mode 100644 index 00000000..ef154263 Binary files /dev/null and b/doc/Figures/tuto_kern_overview_mANOVAdec.png differ diff --git a/doc/GPy.examples.rst b/doc/GPy.examples.rst index 244e3012..59ffd43d 100644 --- a/doc/GPy.examples.rst +++ b/doc/GPy.examples.rst @@ -33,6 +33,14 @@ examples Package :undoc-members: :show-inheritance: +:mod:`poisson` Module +--------------------- + +.. automodule:: GPy.examples.poisson + :members: + :undoc-members: + :show-inheritance: + :mod:`regression` Module ------------------------ @@ -57,6 +65,14 @@ examples Package :undoc-members: :show-inheritance: +:mod:`sparse_ep_fix` Module +--------------------------- + +.. automodule:: GPy.examples.sparse_ep_fix + :members: + :undoc-members: + :show-inheritance: + :mod:`uncertain_input_GP_regression_demo` Module ------------------------------------------------ diff --git a/doc/GPy.inference.rst b/doc/GPy.inference.rst index 6f4ab691..357e70c7 100644 --- a/doc/GPy.inference.rst +++ b/doc/GPy.inference.rst @@ -1,22 +1,6 @@ inference Package ================= -:mod:`Expectation_Propagation` Module -------------------------------------- - -.. automodule:: GPy.inference.Expectation_Propagation - :members: - :undoc-members: - :show-inheritance: - -:mod:`likelihoods` Module -------------------------- - -.. automodule:: GPy.inference.likelihoods - :members: - :undoc-members: - :show-inheritance: - :mod:`optimization` Module -------------------------- diff --git a/doc/GPy.likelihoods.rst b/doc/GPy.likelihoods.rst new file mode 100644 index 00000000..34672d11 --- /dev/null +++ b/doc/GPy.likelihoods.rst @@ -0,0 +1,43 @@ +likelihoods Package +=================== + +:mod:`likelihoods` Package +-------------------------- + +.. automodule:: GPy.likelihoods + :members: + :undoc-members: + :show-inheritance: + +:mod:`EP` Module +---------------- + +.. automodule:: GPy.likelihoods.EP + :members: + :undoc-members: + :show-inheritance: + +:mod:`Gaussian` Module +---------------------- + +.. automodule:: GPy.likelihoods.Gaussian + :members: + :undoc-members: + :show-inheritance: + +:mod:`likelihood` Module +------------------------ + +.. automodule:: GPy.likelihoods.likelihood + :members: + :undoc-members: + :show-inheritance: + +:mod:`likelihood_functions` Module +---------------------------------- + +.. automodule:: GPy.likelihoods.likelihood_functions + :members: + :undoc-members: + :show-inheritance: + diff --git a/doc/GPy.models.rst b/doc/GPy.models.rst index b0a7a298..8837ac4e 100644 --- a/doc/GPy.models.rst +++ b/doc/GPy.models.rst @@ -17,18 +17,18 @@ models Package :undoc-members: :show-inheritance: -:mod:`GPLVM` Module -------------------- +:mod:`GP` Module +---------------- -.. automodule:: GPy.models.GPLVM +.. automodule:: GPy.models.GP :members: :undoc-members: :show-inheritance: -:mod:`GP_EP` Module +:mod:`GPLVM` Module ------------------- -.. automodule:: GPy.models.GP_EP +.. automodule:: GPy.models.GPLVM :members: :undoc-members: :show-inheritance: @@ -41,10 +41,10 @@ models Package :undoc-members: :show-inheritance: -:mod:`generalized_FITC` Module ------------------------------- +:mod:`sparse_GP` Module +----------------------- -.. automodule:: GPy.models.generalized_FITC +.. automodule:: GPy.models.sparse_GP :members: :undoc-members: :show-inheritance: diff --git a/doc/GPy.rst b/doc/GPy.rst index d3c1e843..3fd4bcfd 100644 --- a/doc/GPy.rst +++ b/doc/GPy.rst @@ -18,6 +18,7 @@ Subpackages GPy.examples GPy.inference GPy.kern + GPy.likelihoods GPy.models GPy.util diff --git a/doc/Makefile b/doc/Makefile index faa4ed65..95018f47 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -41,6 +41,7 @@ help: clean: -rm -rf $(BUILDDIR)/* + html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo diff --git a/doc/conf.py b/doc/conf.py index f1c120b5..8a05f386 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -11,12 +11,50 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os +import sys +import os + +print "python exec:", sys.executable +print "sys.path:", sys.path +try: + import numpy + print "numpy: %s, %s" % (numpy.__version__, numpy.__file__) +except ImportError: + print "no numpy" +try: + import matplotlib + print "matplotlib: %s, %s" % (matplotlib.__version__, matplotlib.__file__) +except ImportError: + print "no matplotlib" +try: + import ipython + print "ipython: %s, %s" % (ipython.__version__, ipython.__file__) +except ImportError: + print "no ipython" +try: + import sphinx + print "sphinx: %s, %s" % (sphinx.__version__, sphinx.__file__) +except ImportError: + print "no sphinx" + +print "sys.path:", sys.path # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +#sys.path.insert(0, os.path.abspath('../GPy')) + +#print "sys.path.after:", sys.path + +# If your extensions are in another directory, add it here. If the directory +# is relative to the documentation root, use os.path.abspath to make it +# absolute, like shown here. +sys.path.append(os.path.abspath('sphinxext')) + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('./sphinxext')) # -- General configuration ----------------------------------------------------- @@ -25,15 +63,77 @@ import sys, os # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] +print "Importing extensions" + +extensions = ['sphinx.ext.autodoc', + #'sphinx.ext.doctest' + 'sphinx.ext.viewcode', + 'sphinx.ext.pngmath', + 'ipython_directive', + 'ipython_console_highlighting' + #'matplotlib.sphinxext.plot_directive' + ] +plot_formats = [('png', 80), ('pdf', 50)] + +print "finished importing" + +############################################################################## +## +## Mock out imports with C dependencies because ReadTheDocs can't build them. +############################################################################# + +class Mock(object): + def __init__(self, *args, **kwargs): + pass + + def __call__(self, *args, **kwargs): + return Mock() + + @classmethod + def __getattr__(cls, name): + if name in ('__file__', '__path__'): + return '/dev/null' + elif name[0] == name[0].upper(): + mockType = type(name, (), {}) + mockType.__module__ = __name__ + return mockType + else: + return Mock() + +#import mock + +print "Mocking" +MOCK_MODULES = ['pylab', 'sympy', 'sympy.utilities', 'sympy.utilities.codegen', 'sympy.core.cache', 'sympy.core', 'sympy.parsing', 'sympy.parsing.sympy_parser', 'matplotlib'] +#'matplotlib', 'matplotlib.color', 'matplotlib.pyplot', 'pylab' ] +for mod_name in MOCK_MODULES: + sys.modules[mod_name] = Mock() # ----------------------- READTHEDOCS ------------------ on_rtd = os.environ.get('READTHEDOCS', None) == 'True' +on_rtd = True if on_rtd: - sys.path.append("../GPy") - os.system("pwd") - os.system("sphinx-apidoc -f -o . ../GPy") + sys.path.append(os.path.abspath('../GPy')) + + import subprocess + + proc = subprocess.Popen("pwd", stdout=subprocess.PIPE, shell=True) + (out, err) = proc.communicate() + print "program output:", out + proc = subprocess.Popen("ls ../", stdout=subprocess.PIPE, shell=True) + (out, err) = proc.communicate() + print "program output:", out + proc = subprocess.Popen("sphinx-apidoc -f -o . ../GPy", stdout=subprocess.PIPE, shell=True) + (out, err) = proc.communicate() + print "program output:", out + #proc = subprocess.Popen("whereis numpy", stdout=subprocess.PIPE, shell=True) + #(out, err) = proc.communicate() + #print "program output:", out + #proc = subprocess.Popen("whereis matplotlib", stdout=subprocess.PIPE, shell=True) + #(out, err) = proc.communicate() + #print "program output:", out + +print "Compiled files" # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -181,21 +281,21 @@ htmlhelp_basename = 'GPydoc' # -- Options for LaTeX output -------------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # Additional stuff for the LaTeX preamble. + #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'GPy.tex', u'GPy Documentation', - u'Author', 'manual'), + ('index', 'GPy.tex', u'GPy Documentation', + u'Author', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of @@ -238,9 +338,9 @@ man_pages = [ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'GPy', u'GPy Documentation', - u'Author', 'GPy', 'One line description of project.', - 'Miscellaneous'), + ('index', 'GPy', u'GPy Documentation', + u'Author', 'GPy', 'One line description of project.', + 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. @@ -294,3 +394,5 @@ epub_copyright = u'2013, Author' # Allow duplicate toc entries. #epub_tocdup = True + +autodoc_member_order = "source" diff --git a/doc/doc-requirements.txt b/doc/doc-requirements.txt new file mode 100644 index 00000000..0b5ac59b --- /dev/null +++ b/doc/doc-requirements.txt @@ -0,0 +1,3 @@ +ipython +numpy +scipy diff --git a/doc/index.rst b/doc/index.rst index 28690e99..b62ff6a7 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -8,8 +8,8 @@ Welcome to GPy's documentation! For a quick start, you can have a look at one of the tutorials: * `Basic Gaussian process regression `_ +* `A kernel overview `_ * Advanced GP regression (Forthcoming) -* Kernel manipulation (Forthcoming) * Writting kernels (Forthcoming) You may also be interested by some examples in the GPy/examples folder. @@ -28,4 +28,3 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` - diff --git a/doc/sphinxext/ipython_console_highlighting.py b/doc/sphinxext/ipython_console_highlighting.py new file mode 100644 index 00000000..f5cced41 --- /dev/null +++ b/doc/sphinxext/ipython_console_highlighting.py @@ -0,0 +1,115 @@ +"""reST directive for syntax-highlighting ipython interactive sessions. + +XXX - See what improvements can be made based on the new (as of Sept 2009) +'pycon' lexer for the python console. At the very least it will give better +highlighted tracebacks. +""" + +#----------------------------------------------------------------------------- +# Needed modules + +# Standard library +import re + +# Third party +from pygments.lexer import Lexer, do_insertions +from pygments.lexers.agile import (PythonConsoleLexer, PythonLexer, + PythonTracebackLexer) +from pygments.token import Comment, Generic + +from sphinx import highlighting + +#----------------------------------------------------------------------------- +# Global constants +line_re = re.compile('.*?\n') + +#----------------------------------------------------------------------------- +# Code begins - classes and functions + +class IPythonConsoleLexer(Lexer): + """ + For IPython console output or doctests, such as: + + .. sourcecode:: ipython + + In [1]: a = 'foo' + + In [2]: a + Out[2]: 'foo' + + In [3]: print a + foo + + In [4]: 1 / 0 + + Notes: + + - Tracebacks are not currently supported. + + - It assumes the default IPython prompts, not customized ones. + """ + + name = 'IPython console session' + aliases = ['ipython'] + mimetypes = ['text/x-ipython-console'] + input_prompt = re.compile("(In \[[0-9]+\]: )|( \.\.\.+:)") + output_prompt = re.compile("(Out\[[0-9]+\]: )|( \.\.\.+:)") + continue_prompt = re.compile(" \.\.\.+:") + tb_start = re.compile("\-+") + + def get_tokens_unprocessed(self, text): + pylexer = PythonLexer(**self.options) + tblexer = PythonTracebackLexer(**self.options) + + curcode = '' + insertions = [] + for match in line_re.finditer(text): + line = match.group() + input_prompt = self.input_prompt.match(line) + continue_prompt = self.continue_prompt.match(line.rstrip()) + output_prompt = self.output_prompt.match(line) + if line.startswith("#"): + insertions.append((len(curcode), + [(0, Comment, line)])) + elif input_prompt is not None: + insertions.append((len(curcode), + [(0, Generic.Prompt, input_prompt.group())])) + curcode += line[input_prompt.end():] + elif continue_prompt is not None: + insertions.append((len(curcode), + [(0, Generic.Prompt, continue_prompt.group())])) + curcode += line[continue_prompt.end():] + elif output_prompt is not None: + # Use the 'error' token for output. We should probably make + # our own token, but error is typicaly in a bright color like + # red, so it works fine for our output prompts. + insertions.append((len(curcode), + [(0, Generic.Error, output_prompt.group())])) + curcode += line[output_prompt.end():] + else: + if curcode: + for item in do_insertions(insertions, + pylexer.get_tokens_unprocessed(curcode)): + yield item + curcode = '' + insertions = [] + yield match.start(), Generic.Output, line + if curcode: + for item in do_insertions(insertions, + pylexer.get_tokens_unprocessed(curcode)): + yield item + + +def setup(app): + """Setup as a sphinx extension.""" + + # This is only a lexer, so adding it below to pygments appears sufficient. + # But if somebody knows that the right API usage should be to do that via + # sphinx, by all means fix it here. At least having this setup.py + # suppresses the sphinx warning we'd get without it. + pass + +#----------------------------------------------------------------------------- +# Register the extension as a valid pygments lexer +highlighting.lexers['ipython'] = IPythonConsoleLexer() + diff --git a/doc/sphinxext/ipython_directive.py b/doc/sphinxext/ipython_directive.py new file mode 100644 index 00000000..2c2696c1 --- /dev/null +++ b/doc/sphinxext/ipython_directive.py @@ -0,0 +1,835 @@ +# -*- coding: utf-8 -*- +"""Sphinx directive to support embedded IPython code. + +This directive allows pasting of entire interactive IPython sessions, prompts +and all, and their code will actually get re-executed at doc build time, with +all prompts renumbered sequentially. It also allows you to input code as a pure +python input by giving the argument python to the directive. The output looks +like an interactive ipython section. + +To enable this directive, simply list it in your Sphinx ``conf.py`` file +(making sure the directory where you placed it is visible to sphinx, as is +needed for all Sphinx directives). + +By default this directive assumes that your prompts are unchanged IPython ones, +but this can be customized. The configurable options that can be placed in +conf.py are + +ipython_savefig_dir: + The directory in which to save the figures. This is relative to the + Sphinx source directory. The default is `html_static_path`. +ipython_rgxin: + The compiled regular expression to denote the start of IPython input + lines. The default is re.compile('In \[(\d+)\]:\s?(.*)\s*'). You + shouldn't need to change this. +ipython_rgxout: + The compiled regular expression to denote the start of IPython output + lines. The default is re.compile('Out\[(\d+)\]:\s?(.*)\s*'). You + shouldn't need to change this. +ipython_promptin: + The string to represent the IPython input prompt in the generated ReST. + The default is 'In [%d]:'. This expects that the line numbers are used + in the prompt. +ipython_promptout: + + The string to represent the IPython prompt in the generated ReST. The + default is 'Out [%d]:'. This expects that the line numbers are used + in the prompt. + +ToDo +---- + +- Turn the ad-hoc test() function into a real test suite. +- Break up ipython-specific functionality from matplotlib stuff into better + separated code. + +Authors +------- + +- John D Hunter: orignal author. +- Fernando Perez: refactoring, documentation, cleanups, port to 0.11. +- VáclavŠmilauer : Prompt generalizations. +- Skipper Seabold, refactoring, cleanups, pure python addition +""" + +#----------------------------------------------------------------------------- +# Imports +#----------------------------------------------------------------------------- + +# Stdlib +import cStringIO +import os +import re +import sys +import tempfile +import ast + +# To keep compatibility with various python versions +try: + from hashlib import md5 +except ImportError: + from md5 import md5 + +# Third-party +try: + import matplotlib + matplotlib.use('Agg') +except ImportError: + print "Couldn't find matplotlib" + +import sphinx +from docutils.parsers.rst import directives +from docutils import nodes +from sphinx.util.compat import Directive + +# Our own +from IPython import Config, InteractiveShell +from IPython.core.profiledir import ProfileDir +from IPython.utils import io + +#----------------------------------------------------------------------------- +# Globals +#----------------------------------------------------------------------------- +# for tokenizing blocks +COMMENT, INPUT, OUTPUT = range(3) + +#----------------------------------------------------------------------------- +# Functions and class declarations +#----------------------------------------------------------------------------- +def block_parser(part, rgxin, rgxout, fmtin, fmtout): + """ + part is a string of ipython text, comprised of at most one + input, one ouput, comments, and blank lines. The block parser + parses the text into a list of:: + + blocks = [ (TOKEN0, data0), (TOKEN1, data1), ...] + + where TOKEN is one of [COMMENT | INPUT | OUTPUT ] and + data is, depending on the type of token:: + + COMMENT : the comment string + + INPUT: the (DECORATOR, INPUT_LINE, REST) where + DECORATOR: the input decorator (or None) + INPUT_LINE: the input as string (possibly multi-line) + REST : any stdout generated by the input line (not OUTPUT) + + + OUTPUT: the output string, possibly multi-line + """ + + block = [] + lines = part.split('\n') + N = len(lines) + i = 0 + decorator = None + while 1: + + if i==N: + # nothing left to parse -- the last line + break + + line = lines[i] + i += 1 + line_stripped = line.strip() + if line_stripped.startswith('#'): + block.append((COMMENT, line)) + continue + + if line_stripped.startswith('@'): + # we're assuming at most one decorator -- may need to + # rethink + decorator = line_stripped + continue + + # does this look like an input line? + matchin = rgxin.match(line) + if matchin: + lineno, inputline = int(matchin.group(1)), matchin.group(2) + + # the ....: continuation string + continuation = ' %s:'%''.join(['.']*(len(str(lineno))+2)) + Nc = len(continuation) + # input lines can continue on for more than one line, if + # we have a '\' line continuation char or a function call + # echo line 'print'. The input line can only be + # terminated by the end of the block or an output line, so + # we parse out the rest of the input line if it is + # multiline as well as any echo text + + rest = [] + while i 1: + if input_lines[-1] != "": + input_lines.append('') # make sure there's a blank line + # so splitter buffer gets reset + + continuation = ' %s:'%''.join(['.']*(len(str(lineno))+2)) + Nc = len(continuation) + + if is_savefig: + image_file, image_directive = self.process_image(decorator) + + ret = [] + is_semicolon = False + + for i, line in enumerate(input_lines): + if line.endswith(';'): + is_semicolon = True + + if i==0: + # process the first input line + if is_verbatim: + self.process_input_line('') + self.IP.execution_count += 1 # increment it anyway + else: + # only submit the line in non-verbatim mode + self.process_input_line(line, store_history=True) + formatted_line = '%s %s'%(input_prompt, line) + else: + # process a continuation line + if not is_verbatim: + self.process_input_line(line, store_history=True) + + formatted_line = '%s %s'%(continuation, line) + + if not is_suppress: + ret.append(formatted_line) + + if not is_suppress and len(rest.strip()) and is_verbatim: + # the "rest" is the standard output of the + # input, which needs to be added in + # verbatim mode + ret.append(rest) + + self.cout.seek(0) + output = self.cout.read() + if not is_suppress and not is_semicolon: + ret.append(output) + elif is_semicolon: # get spacing right + ret.append('') + + self.cout.truncate(0) + return (ret, input_lines, output, is_doctest, image_file, + image_directive) + #print 'OUTPUT', output # dbg + + def process_output(self, data, output_prompt, + input_lines, output, is_doctest, image_file): + """Process data block for OUTPUT token.""" + if is_doctest: + submitted = data.strip() + found = output + if found is not None: + found = found.strip() + + # XXX - fperez: in 0.11, 'output' never comes with the prompt + # in it, just the actual output text. So I think all this code + # can be nuked... + + # the above comment does not appear to be accurate... (minrk) + + ind = found.find(output_prompt) + if ind<0: + e='output prompt="%s" does not match out line=%s' % \ + (output_prompt, found) + raise RuntimeError(e) + found = found[len(output_prompt):].strip() + + if found!=submitted: + e = ('doctest failure for input_lines="%s" with ' + 'found_output="%s" and submitted output="%s"' % + (input_lines, found, submitted) ) + raise RuntimeError(e) + #print 'doctest PASSED for input_lines="%s" with found_output="%s" and submitted output="%s"'%(input_lines, found, submitted) + + def process_comment(self, data): + """Process data fPblock for COMMENT token.""" + if not self.is_suppress: + return [data] + + def save_image(self, image_file): + """ + Saves the image file to disk. + """ + self.ensure_pyplot() + command = 'plt.gcf().savefig("%s")'%image_file + #print 'SAVEFIG', command # dbg + self.process_input_line('bookmark ipy_thisdir', store_history=False) + self.process_input_line('cd -b ipy_savedir', store_history=False) + self.process_input_line(command, store_history=False) + self.process_input_line('cd -b ipy_thisdir', store_history=False) + self.process_input_line('bookmark -d ipy_thisdir', store_history=False) + self.clear_cout() + + + def process_block(self, block): + """ + process block from the block_parser and return a list of processed lines + """ + ret = [] + output = None + input_lines = None + lineno = self.IP.execution_count + + input_prompt = self.promptin%lineno + output_prompt = self.promptout%lineno + image_file = None + image_directive = None + + for token, data in block: + if token==COMMENT: + out_data = self.process_comment(data) + elif token==INPUT: + (out_data, input_lines, output, is_doctest, image_file, + image_directive) = \ + self.process_input(data, input_prompt, lineno) + elif token==OUTPUT: + out_data = \ + self.process_output(data, output_prompt, + input_lines, output, is_doctest, + image_file) + if out_data: + ret.extend(out_data) + + # save the image files + if image_file is not None: + self.save_image(image_file) + + return ret, image_directive + + def ensure_pyplot(self): + if self._pyplot_imported: + return + self.process_input_line('import matplotlib.pyplot as plt', + store_history=False) + + def process_pure_python(self, content): + """ + content is a list of strings. it is unedited directive conent + + This runs it line by line in the InteractiveShell, prepends + prompts as needed capturing stderr and stdout, then returns + the content as a list as if it were ipython code + """ + output = [] + savefig = False # keep up with this to clear figure + multiline = False # to handle line continuation + multiline_start = None + fmtin = self.promptin + + ct = 0 + + for lineno, line in enumerate(content): + + line_stripped = line.strip() + if not len(line): + output.append(line) + continue + + # handle decorators + if line_stripped.startswith('@'): + output.extend([line]) + if 'savefig' in line: + savefig = True # and need to clear figure + continue + + # handle comments + if line_stripped.startswith('#'): + output.extend([line]) + continue + + # deal with lines checking for multiline + continuation = u' %s:'% ''.join(['.']*(len(str(ct))+2)) + if not multiline: + modified = u"%s %s" % (fmtin % ct, line_stripped) + output.append(modified) + ct += 1 + try: + ast.parse(line_stripped) + output.append(u'') + except Exception: # on a multiline + multiline = True + multiline_start = lineno + else: # still on a multiline + modified = u'%s %s' % (continuation, line) + output.append(modified) + try: + mod = ast.parse( + '\n'.join(content[multiline_start:lineno+1])) + if isinstance(mod.body[0], ast.FunctionDef): + # check to see if we have the whole function + for element in mod.body[0].body: + if isinstance(element, ast.Return): + multiline = False + else: + output.append(u'') + multiline = False + except Exception: + pass + + if savefig: # clear figure if plotted + self.ensure_pyplot() + self.process_input_line('plt.clf()', store_history=False) + self.clear_cout() + savefig = False + + return output + +class IpythonDirective(Directive): + + has_content = True + required_arguments = 0 + optional_arguments = 4 # python, suppress, verbatim, doctest + final_argumuent_whitespace = True + option_spec = { 'python': directives.unchanged, + 'suppress' : directives.flag, + 'verbatim' : directives.flag, + 'doctest' : directives.flag, + } + + shell = EmbeddedSphinxShell() + + def get_config_options(self): + # contains sphinx configuration variables + config = self.state.document.settings.env.config + + # get config variables to set figure output directory + confdir = self.state.document.settings.env.app.confdir + savefig_dir = config.ipython_savefig_dir + source_dir = os.path.dirname(self.state.document.current_source) + if savefig_dir is None: + savefig_dir = config.html_static_path + if isinstance(savefig_dir, list): + savefig_dir = savefig_dir[0] # safe to assume only one path? + savefig_dir = os.path.join(confdir, savefig_dir) + + # get regex and prompt stuff + rgxin = config.ipython_rgxin + rgxout = config.ipython_rgxout + promptin = config.ipython_promptin + promptout = config.ipython_promptout + + return savefig_dir, source_dir, rgxin, rgxout, promptin, promptout + + def setup(self): + # reset the execution count if we haven't processed this doc + #NOTE: this may be borked if there are multiple seen_doc tmp files + #check time stamp? + seen_docs = [i for i in os.listdir(tempfile.tempdir) + if i.startswith('seen_doc')] + if seen_docs: + fname = os.path.join(tempfile.tempdir, seen_docs[0]) + docs = open(fname).read().split('\n') + if not self.state.document.current_source in docs: + self.shell.IP.history_manager.reset() + self.shell.IP.execution_count = 1 + else: # haven't processed any docs yet + docs = [] + + + # get config values + (savefig_dir, source_dir, rgxin, + rgxout, promptin, promptout) = self.get_config_options() + + # and attach to shell so we don't have to pass them around + self.shell.rgxin = rgxin + self.shell.rgxout = rgxout + self.shell.promptin = promptin + self.shell.promptout = promptout + self.shell.savefig_dir = savefig_dir + self.shell.source_dir = source_dir + + # setup bookmark for saving figures directory + + self.shell.process_input_line('bookmark ipy_savedir %s'%savefig_dir, + store_history=False) + self.shell.clear_cout() + + # write the filename to a tempfile because it's been "seen" now + if not self.state.document.current_source in docs: + fd, fname = tempfile.mkstemp(prefix="seen_doc", text=True) + fout = open(fname, 'a') + fout.write(self.state.document.current_source+'\n') + fout.close() + + return rgxin, rgxout, promptin, promptout + + + def teardown(self): + # delete last bookmark + self.shell.process_input_line('bookmark -d ipy_savedir', + store_history=False) + self.shell.clear_cout() + + def run(self): + debug = False + + #TODO, any reason block_parser can't be a method of embeddable shell + # then we wouldn't have to carry these around + rgxin, rgxout, promptin, promptout = self.setup() + + options = self.options + self.shell.is_suppress = 'suppress' in options + self.shell.is_doctest = 'doctest' in options + self.shell.is_verbatim = 'verbatim' in options + + + # handle pure python code + if 'python' in self.arguments: + content = self.content + self.content = self.shell.process_pure_python(content) + + parts = '\n'.join(self.content).split('\n\n') + + lines = ['.. code-block:: ipython',''] + figures = [] + + for part in parts: + + block = block_parser(part, rgxin, rgxout, promptin, promptout) + + if len(block): + rows, figure = self.shell.process_block(block) + for row in rows: + lines.extend([' %s'%line for line in row.split('\n')]) + + if figure is not None: + figures.append(figure) + + #text = '\n'.join(lines) + #figs = '\n'.join(figures) + + for figure in figures: + lines.append('') + lines.extend(figure.split('\n')) + lines.append('') + + #print lines + if len(lines)>2: + if debug: + print '\n'.join(lines) + else: #NOTE: this raises some errors, what's it for? + #print 'INSERTING %d lines'%len(lines) + self.state_machine.insert_input( + lines, self.state_machine.input_lines.source(0)) + + text = '\n'.join(lines) + txtnode = nodes.literal_block(text, text) + txtnode['language'] = 'ipython' + #imgnode = nodes.image(figs) + + # cleanup + self.teardown() + + return []#, imgnode] + +# Enable as a proper Sphinx directive +def setup(app): + setup.app = app + + app.add_directive('ipython', IpythonDirective) + app.add_config_value('ipython_savefig_dir', None, True) + app.add_config_value('ipython_rgxin', + re.compile('In \[(\d+)\]:\s?(.*)\s*'), True) + app.add_config_value('ipython_rgxout', + re.compile('Out\[(\d+)\]:\s?(.*)\s*'), True) + app.add_config_value('ipython_promptin', 'In [%d]:', True) + app.add_config_value('ipython_promptout', 'Out[%d]:', True) + + +# Simple smoke test, needs to be converted to a proper automatic test. +def test(): + + examples = [ + r""" +In [9]: pwd +Out[9]: '/home/jdhunter/py4science/book' + +In [10]: cd bookdata/ +/home/jdhunter/py4science/book/bookdata + +In [2]: from pylab import * + +In [2]: ion() + +In [3]: im = imread('stinkbug.png') + +@savefig mystinkbug.png width=4in +In [4]: imshow(im) +Out[4]: + +""", + r""" + +In [1]: x = 'hello world' + +# string methods can be +# used to alter the string +@doctest +In [2]: x.upper() +Out[2]: 'HELLO WORLD' + +@verbatim +In [3]: x.st +x.startswith x.strip +""", + r""" + +In [130]: url = 'http://ichart.finance.yahoo.com/table.csv?s=CROX\ + .....: &d=9&e=22&f=2009&g=d&a=1&br=8&c=2006&ignore=.csv' + +In [131]: print url.split('&') +['http://ichart.finance.yahoo.com/table.csv?s=CROX', 'd=9', 'e=22', 'f=2009', 'g=d', 'a=1', 'b=8', 'c=2006', 'ignore=.csv'] + +In [60]: import urllib + +""", + r"""\ + +In [133]: import numpy.random + +@suppress +In [134]: numpy.random.seed(2358) + +@doctest +In [135]: numpy.random.rand(10,2) +Out[135]: +array([[ 0.64524308, 0.59943846], + [ 0.47102322, 0.8715456 ], + [ 0.29370834, 0.74776844], + [ 0.99539577, 0.1313423 ], + [ 0.16250302, 0.21103583], + [ 0.81626524, 0.1312433 ], + [ 0.67338089, 0.72302393], + [ 0.7566368 , 0.07033696], + [ 0.22591016, 0.77731835], + [ 0.0072729 , 0.34273127]]) + +""", + + r""" +In [106]: print x +jdh + +In [109]: for i in range(10): + .....: print i + .....: + .....: +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +""", + + r""" + +In [144]: from pylab import * + +In [145]: ion() + +# use a semicolon to suppress the output +@savefig test_hist.png width=4in +In [151]: hist(np.random.randn(10000), 100); + + +@savefig test_plot.png width=4in +In [151]: plot(np.random.randn(10000), 'o'); + """, + + r""" +# use a semicolon to suppress the output +In [151]: plt.clf() + +@savefig plot_simple.png width=4in +In [151]: plot([1,2,3]) + +@savefig hist_simple.png width=4in +In [151]: hist(np.random.randn(10000), 100); + +""", + r""" +# update the current fig +In [151]: ylabel('number') + +In [152]: title('normal distribution') + + +@savefig hist_with_text.png +In [153]: grid(True) + + """, + ] + # skip local-file depending first example: + examples = examples[1:] + + #ipython_directive.DEBUG = True # dbg + #options = dict(suppress=True) # dbg + options = dict() + for example in examples: + content = example.split('\n') + ipython_directive('debug', arguments=None, options=options, + content=content, lineno=0, + content_offset=None, block_text=None, + state=None, state_machine=None, + ) + +# Run test suite as a script +if __name__=='__main__': + if not os.path.isdir('_static'): + os.mkdir('_static') + test() + print 'All OK? Check figures in _static/' + + diff --git a/doc/tuto_GP_regression.rst b/doc/tuto_GP_regression.rst index 7d1a43df..92b25bc0 100644 --- a/doc/tuto_GP_regression.rst +++ b/doc/tuto_GP_regression.rst @@ -1,4 +1,3 @@ - ************************************* Gaussian process regression tutorial ************************************* @@ -12,7 +11,7 @@ We first import the libraries we will need: :: import numpy as np import GPy -1 dimensional model +1-dimensional model =================== For this toy example, we assume we have the following inputs and outputs:: @@ -22,13 +21,11 @@ For this toy example, we assume we have the following inputs and outputs:: Note that the observations Y include some noise. -The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential) plus some white noise:: +The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential):: - Gaussian = GPy.kern.rbf(D=1) - noise = GPy.kern.white(D=1) - kernel = Gaussian + noise + kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.) -The parameter ``D`` stands for the dimension of the input space. Note that many other kernels are implemented such as: +The parameter ``D`` stands for the dimension of the input space. The parameters ``variance`` and ``lengthscale`` are optional. Note that many other kernels are implemented such as: * linear (``GPy.kern.linear``) * exponential kernel (``GPy.kern.exponential``) @@ -41,19 +38,19 @@ The inputs required for building the model are the observations and the kernel:: m = GPy.models.GP_regression(X,Y,kernel) -The functions ``print`` and ``plot`` give an insight of the model we have just build. The code:: +By default, some observation noise is added to the modle. The functions ``print`` and ``plot`` give an insight of the model we have just build. The code:: print m m.plot() gives the following output: :: - Marginal log-likelihood: -2.281e+01 + Marginal log-likelihood: -4.479e+00 Name | Value | Constraints | Ties | Prior ----------------------------------------------------------------- rbf_variance | 1.0000 | | | rbf_lengthscale | 1.0000 | | | - white_variance | 1.0000 | | | + noise variance | 1.0000 | | | .. figure:: Figures/tuto_GP_regression_m1.png :align: center @@ -75,24 +72,24 @@ but it is also possible to set a range on to constrain one parameter to be fixed m.unconstrain('') # Required to remove the previous constrains m.constrain_positive('rbf_variance') m.constrain_bounded('lengthscale',1.,10. ) - m.constrain_fixed('white',0.0025) + m.constrain_fixed('noise',0.0025) Once the constrains have been imposed, the model can be optimized:: m.optimize() -If we want to perform some restarts to try to improve the result of the optimization, we can use the optimize_restart function:: +If we want to perform some restarts to try to improve the result of the optimization, we can use the ``optimize_restart`` function:: m.optimize_restarts(Nrestarts = 10) Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting model resulting model:: - Marginal log-likelihood: 2.001e+01 + Marginal log-likelihood: 3.603e+01 Name | Value | Constraints | Ties | Prior ----------------------------------------------------------------- - rbf_variance | 0.8033 | (+ve) | | - rbf_lengthscale | 1.8033 | (1.0, 10.0) | | - white_variance | 0.0025 | Fixed | | + rbf_variance | 0.8151 | (+ve) | | + rbf_lengthscale | 1.8037 | (1.0, 10.0) | | + noise variance | 0.0025 | Fixed | | .. figure:: Figures/tuto_GP_regression_m2.png :align: center @@ -101,7 +98,7 @@ Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting mo GP regression model after optimization of the parameters. -2 dimensional example +2-dimensional example ===================== Here is a 2 dimensional example:: @@ -131,15 +128,16 @@ Here is a 2 dimensional example:: m.plot() print(m) -The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic). The output of the last 2 lines is:: +The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic). The output of the last two lines is:: - Marginal log-likelihood: 2.893e+01 - Name | Value | Constraints | Ties | Prior - ------------------------------------------------------------------------- - Mat52_ARD_variance | 0.4094 | (+ve) | | - Mat52_ARD_lengthscale_0 | 2.1060 | (+ve) | | - Mat52_ARD_lengthscale_1 | 2.0546 | (+ve) | | - white_variance | 0.0012 | (+ve) | | + Marginal log-likelihood: 6.682e+01 + Name | Value | Constraints | Ties | Prior + --------------------------------------------------------------------- + Mat52_variance | 0.3860 | (+ve) | | + Mat52_lengthscale_0 | 2.0578 | (+ve) | | + Mat52_lengthscale_1 | 1.8542 | (+ve) | | + white_variance | 0.0023 | (+ve) | | + noise variance | 0.0000 | (+ve) | | .. figure:: Figures/tuto_GP_regression_m3.png :align: center diff --git a/doc/tuto_kernel_overview.rst b/doc/tuto_kernel_overview.rst new file mode 100644 index 00000000..80e2bee2 --- /dev/null +++ b/doc/tuto_kernel_overview.rst @@ -0,0 +1,177 @@ + +**************************** +tutorial : A kernel overview +**************************** + +First we import the libraries we will need :: + + import pylab as pb + import numpy as np + import GPy + pb.ion() + +For most kernels, the dimension is the only mandatory parameter to define a kernel object. However, it is also possible to specify the values of the parameters. For example, the three following commands are valid for defining a squared exponential kernel (ie rbf or Gaussian) :: + + ker1 = GPy.kern.rbf(1) # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.) + ker2 = GPy.kern.rbf(D=1, variance = 1.5, lengthscale=2.) + ker3 = GPy.kern.rbf(1, .5, .5) + +A `plot` and a `print` functions are implemented to represent kernel objects :: + + print ker1 + + ker1.plot() + ker2.plot() + ker3.plot() + +.. figure:: Figures/tuto_kern_overview_basicdef.png + :align: center + :height: 350px + +Implemented kernels +=================== + +Many kernels are already implemented in GPy. Here is a summary of most of them: + +.. figure:: Figures/tuto_kern_overview_allkern.png + :align: center + :height: 800px + +On the other hand, it is possible to use the `sympy` package to build new kernels. This will be the subject of another tutorial. + +Operations to combine kernel +============================ + +In ``GPy``, kernel objects can be combined with the usual ``+`` and ``*`` operators. :: + + k1 = GPy.kern.rbf(1,variance=1., lengthscale=2) + k2 = GPy.kern.Matern32(1,variance=1., lengthscale=2) + + ker_add = k1 + k2 + print ker_add + + ker_prod = k1 * k2 + print ker_prod + +Note that by default, the operator ``+`` adds kernels defined on the same input space whereas ``*`` assumes that the kernels are defined on different input spaces. Here for example ``ker_add.D`` will return ``1`` whereas ``ker_prod.D`` will return ``2``. + +In order to add kernels defined on the different input spaces, the required command is:: + + ker_add_orth = k1.add_orthogonal(k2) + +.. figure:: Figures/tuto_kern_overview_add_orth.png + :align: center + :height: 350px + + Output of ``ker_add_orth.plot(plot_limits=[[-10,-10],[10,10]])``. + +Example : Building an ANOVA kernel +================================== + +In two dimensions ANOVA kernels have the following form: + +.. math:: + + k_{ANOVA}(x,y) = \prod_{i=1}^2 (1 + k_i(x_i,y_i)) = 1 + k_1(x_1,y_1) + k_2(x_2,y_2) + k_1(x_1,y_1) \times k_2(x_2,y_2). + +Let us assume that we want to define an ANOVA kernel with a Matern 3/2 kernel for :math:`k_i`. As seen previously, we can define this kernel as follows :: + + k_cst = GPy.kern.bias(1,variance=1.) + k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3) + Kanova = (k_cst + k_mat) * (k_cst + k_mat) + print Kanova + +Printing the resulting kernel outputs the following :: + + Name | Value | Constraints | Ties + --------------------------------------------------------------------------- + biasbias_variance | 1.0000 | | + biasMat52_variance | 1.0000 | | + biasMat52_Mat52_lengthscale | 3.0000 | | (1) + Mat52bias_variance | 1.0000 | | + Mat52bias_Mat52_lengthscale | 3.0000 | | (0) + Mat52Mat52_variance | 1.0000 | | + Mat52Mat52_Mat52_lengthscale | 3.0000 | | (0) + Mat52Mat52_Mat52_lengthscale | 3.0000 | | (1) + +Note the ties between the lengthscales of ``Kanova`` to keep the number of lengthscales equal to 2. On the other hand, there are four variance terms in the new parameterization: one for each term of the right hand part of the above equation. We can illustrate the use of this kernel on a toy example:: + + # sample inputs and outputs + X = np.random.uniform(-3.,3.,(40,2)) + Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:]) + + # Create GP regression model + m = GPy.models.GP_regression(X,Y,Kanova) + m.plot() + + +.. figure:: Figures/tuto_kern_overview_mANOVA.png + :align: center + :height: 350px + +As :math:`k_{ANOVA}` corresponds to the sum of 4 kernels, the best predictor can be splited in a sum of 4 functions + +.. math:: + + bp(x) & = k(x)^t K^{-1} Y \\ + & = (1 + k_1(x_1) + k_2(x_2) + k_1(x_1)k_2(x_2))^t K^{-1} Y \\ + & = 1^t K^{-1} Y + k_1(x_1)^t K^{-1} Y + k_2(x_2)^t K^{-1} Y + (k_1(x_1)k_2(x_2))^t K^{-1} Y + +The submodels can be represented with the option ``which_function`` of ``plot``: :: + + pb.figure(figsize=(20,5)) + pb.subplots_adjust(wspace=0.5) + pb.subplot(1,5,1) + m.plot() + pb.subplot(1,5,2) + pb.ylabel("= ",rotation='horizontal',fontsize='30') + pb.subplot(1,5,3) + m.plot(which_functions=[False,True,False,False]) + pb.ylabel("cst +",rotation='horizontal',fontsize='30') + pb.subplot(1,5,4) + m.plot(which_functions=[False,False,True,False]) + pb.ylabel("+ ",rotation='horizontal',fontsize='30') + pb.subplot(1,5,5) + pb.ylabel("+ ",rotation='horizontal',fontsize='30') + m.plot(which_functions=[False,False,False,True]) + + +.. figure:: Figures/tuto_kern_overview_mANOVAdec.png + :align: center + :height: 200px + + +.. import pylab as pb + import numpy as np + import GPy + pb.ion() + + ker1 = GPy.kern.rbf(D=1) # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.) + ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=3.) + ker3 = GPy.kern.rbf(1, .5, .25) + + ker1.plot() + ker2.plot() + ker3.plot() + #pb.savefig("Figures/tuto_kern_overview_basicdef.png") + + kernels = [GPy.kern.rbf(1), GPy.kern.exponential(1), GPy.kern.Matern32(1), GPy.kern.Matern52(1), GPy.kern.Brownian(1), GPy.kern.bias(1), GPy.kern.linear(1), GPy.kern.spline(1), GPy.kern.periodic_exponential(1), GPy.kern.periodic_Matern32(1), GPy.kern.periodic_Matern52(1), GPy.kern.white(1)] + kernel_names = ["GPy.kern.rbf", "GPy.kern.exponential", "GPy.kern.Matern32", "GPy.kern.Matern52", "GPy.kern.Brownian", "GPy.kern.bias", "GPy.kern.linear", "GPy.kern.spline", "GPy.kern.periodic_exponential", "GPy.kern.periodic_Matern32", "GPy.kern.periodic_Matern52", "GPy.kern.white"] + + pb.figure(figsize=(16,12)) + pb.subplots_adjust(wspace=.5, hspace=.5) + for i, kern in enumerate(kernels): + pb.subplot(3,4,i+1) + kern.plot(x=7.5,plot_limits=[0.00001,15.]) + pb.title(kernel_names[i]+ '\n') + #pb.axes([.1,.1,.8,.7]) + #pb.figtext(.5,.9,'Foo Bar', fontsize=18, ha='center') + #pb.figtext(.5,.85,'Lorem ipsum dolor sit amet, consectetur adipiscing elit',fontsize=10,ha='center') + + # actual plot for the noise + i = 11 + X = np.linspace(0.,15.,201) + WN = 0*X + WN[100] = 1. + pb.subplot(3,4,i+1) + pb.plot(X,WN,'b') diff --git a/grid_parameters.py b/grid_parameters.py deleted file mode 100644 index 64d82755..00000000 --- a/grid_parameters.py +++ /dev/null @@ -1,64 +0,0 @@ -import numpy as np -import pylab as pb -pb.ion() -import sys -import GPy - -pb.close('all') - -N = 200 -M = 15 -resolution=5 - -X = np.linspace(0,12,N)[:,None] -Z = np.linspace(0,12,M)[:,None] # inducing points (fixed for now) -Y = np.sin(X) + np.random.randn(*X.shape)/np.sqrt(50.) -#k = GPy.kern.rbf(1) -k = GPy.kern.Matern32(1) + GPy.kern.white(1) - -models = [GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k) - ,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k) - ,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k) - ,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)] -models[0].scale_factor = 1. -models[1].scale_factor = 10. -models[2].scale_factor = 100. -models[3].scale_factor = 1000. - #GPy.models.sgp_debugB(X,Y,Z=Z,kernel=k), - #GPy.models.sgp_debugC(X,Y,Z=Z,kernel=k)]#, - #GPy.models.sgp_debugE(X,Y,Z=Z,kernel=k)] - -[m.constrain_fixed('white',0.1) for m in models] - -#xx,yy = np.mgrid[1.5:4:0+resolution*1j,-2:2:0+resolution*1j] -xx,yy = np.mgrid[3:16:0+resolution*1j,-2:1:0+resolution*1j] - -lls = [] -cgs = [] -grads = [] -count = 0 -for l,v in zip(xx.flatten(),yy.flatten()): - count += 1 - print count, 'of', resolution**2 - sys.stdout.flush() - - [m.set('lengthscale',l) for m in models] - [m.set('_variance',10.**v) for m in models] - lls.append([m.log_likelihood() for m in models]) - grads.append([m.log_likelihood_gradients() for m in models]) - cgs.append([m.checkgrad(verbose=0,return_ratio=True) for m in models]) - -lls = np.array(zip(*lls)).reshape(-1,resolution,resolution) -cgs = np.array(zip(*cgs)).reshape(-1,resolution,resolution) - -for ll,cg in zip(lls,cgs): - pb.figure() - pb.contourf(xx,yy,ll,100,cmap=pb.cm.gray) - pb.colorbar() - try: - pb.contour(xx,yy,np.exp(ll),colors='k') - except: - pass - pb.scatter(xx.flatten(),yy.flatten(),20,np.log(np.abs(cg.flatten())),cmap=pb.cm.jet,linewidth=0) - pb.colorbar() - diff --git a/setup.py b/setup.py index 432b8b13..40c89ccb 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ import os from numpy.distutils.core import Extension, setup -from sphinx.setup_command import BuildDoc +#from sphinx.setup_command import BuildDoc # Version number version = '0.1.3' @@ -19,7 +19,7 @@ setup(name = 'GPy', license = "BSD 3-clause", keywords = "machine-learning gaussian-processes kernels", url = "http://ml.sheffield.ac.uk/GPy/", - packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples'], + packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples', 'GPy.likelihoods'], package_dir={'GPy': 'GPy'}, package_data = {'GPy': ['GPy/examples']}, py_modules = ['GPy.__init__'], @@ -27,8 +27,11 @@ setup(name = 'GPy', #ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py', # sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])], install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'], - setup_requires=['sphinx'], - cmdclass = {'build_sphinx': BuildDoc}, + extras_require = { + 'docs':['Sphinx', 'ipython'], + }, + #setup_requires=['sphinx'], + #cmdclass = {'build_sphinx': BuildDoc}, classifiers=[ "Development Status :: 1 - Alpha", "Topic :: Machine Learning",