Merge branch 'master' of github.com:SheffieldML/GPy

This commit is contained in:
James Hensman 2013-03-11 11:51:39 +00:00
commit 401b4f2775
13 changed files with 414 additions and 145 deletions

View file

@ -121,9 +121,6 @@ class model(parameterised):
else: else:
raise AttributeError, "no parameter matches %s"%name raise AttributeError, "no parameter matches %s"%name
def log_prior(self): def log_prior(self):
"""evaluate the prior""" """evaluate the prior"""
return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None]) return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
@ -135,12 +132,11 @@ class model(parameterised):
[np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None] [np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
return ret return ret
def _log_likelihood_gradients_transformed(self): def _transform_gradients(self, g):
""" """
Use self.log_likelihood_gradients and self.prior_gradients to get the gradients of the model. Takes a list of gradients and return an array of transformed gradients (positive/negative/tied/and so on)
Adjust the gradient for constraints and ties, return.
""" """
g = self._log_likelihood_gradients() + self._log_prior_gradients()
x = self._get_params() x = self._get_params()
g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices] g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices] g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
@ -152,6 +148,7 @@ class model(parameterised):
else: else:
return g return g
def randomize(self): def randomize(self):
""" """
Randomize the model. Randomize the model.
@ -241,6 +238,27 @@ class model(parameterised):
print "Warning! constraining %s postive"%name print "Warning! constraining %s postive"%name
def objective_function(self, x):
"""
The objective function passed to the optimizer. It combines the likelihood and the priors.
"""
self._set_params_transformed(x)
return -self.log_likelihood() - self.log_prior()
def objective_function_gradients(self, x):
"""
Gets the gradients from the likelihood and the priors.
"""
self._set_params_transformed(x)
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
prior_gradients = self._transform_gradients(self._log_prior_gradients())
return -LL_gradients - prior_gradients
def objective_and_gradients(self, x):
obj_f = self.objective_function(x)
obj_grads = self.objective_function_gradients(x)
return obj_f, obj_grads
def optimize(self, optimizer=None, start=None, **kwargs): def optimize(self, optimizer=None, start=None, **kwargs):
""" """
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
@ -254,22 +272,12 @@ class model(parameterised):
if optimizer is None: if optimizer is None:
optimizer = self.preferred_optimizer optimizer = self.preferred_optimizer
def f(x):
self._set_params_transformed(x)
return -self.log_likelihood()-self.log_prior()
def fp(x):
self._set_params_transformed(x)
return -self._log_likelihood_gradients_transformed()
def f_fp(x):
self._set_params_transformed(x)
return -self.log_likelihood()-self.log_prior(),-self._log_likelihood_gradients_transformed()
if start == None: if start == None:
start = self._get_params_transformed() start = self._get_params_transformed()
optimizer = optimization.get_optimizer(optimizer) optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model = self, **kwargs) opt = optimizer(start, model = self, **kwargs)
opt.run(f_fp=f_fp, f=f, fp=fp) opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
self.optimization_runs.append(opt) self.optimization_runs.append(opt)
self._set_params_transformed(opt.x_opt) self._set_params_transformed(opt.x_opt)
@ -357,12 +365,9 @@ class model(parameterised):
dx = step*np.sign(np.random.uniform(-1,1,x.size)) dx = step*np.sign(np.random.uniform(-1,1,x.size))
#evaulate around the point x #evaulate around the point x
self._set_params_transformed(x+dx) f1, g1 = self.objective_and_gradients(x+dx)
f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed() f2, g2 = self.objective_and_gradients(x-dx)
self._set_params_transformed(x-dx) gradient = self.objective_function_gradients(x)
f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
self._set_params_transformed(x)
gradient = self._log_likelihood_gradients_transformed()
numerical_gradient = (f1-f2)/(2*dx) numerical_gradient = (f1-f2)/(2*dx)
global_ratio = (f1-f2)/(2*np.dot(dx,gradient)) global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
@ -398,14 +403,10 @@ class model(parameterised):
for i in param_list: for i in param_list:
xx = x.copy() xx = x.copy()
xx[i] += step xx[i] += step
self._set_params_transformed(xx) f1, g1 = self.objective_and_gradients(xx)
f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()[i]
xx[i] -= 2.*step xx[i] -= 2.*step
self._set_params_transformed(xx) f2, g2 = self.objective_and_gradients(xx)
f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()[i] gradient = self.objective_function_gradients(x)[i]
self._set_params_transformed(x)
gradient = self._log_likelihood_gradients_transformed()[i]
numerical_gradient = (f1-f2)/(2*step) numerical_gradient = (f1-f2)/(2*step)
ratio = (f1-f2)/(2*step*gradient) ratio = (f1-f2)/(2*step*gradient)

View file

@ -11,7 +11,7 @@ import GPy
default_seed=10000 default_seed=10000
def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME def crescent_data(seed=default_seed): #FIXME
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC']. :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
@ -31,11 +31,8 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME
likelihood = GPy.likelihoods.EP(data['Y'],distribution) likelihood = GPy.likelihoods.EP(data['Y'],distribution)
if model_type=='Full': m = GPy.models.GP(data['X'],likelihood,kernel)
m = GPy.models.GP(data['X'],likelihood,kernel) m.ensure_default_constraints()
else:
# create sparse GP EP model
m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
m.update_likelihood_approximation() m.update_likelihood_approximation()
print(m) print(m)
@ -94,16 +91,13 @@ def toy_linear_1d_classification(seed=default_seed):
# Model definition # Model definition
m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel) m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
m.ensure_default_constraints()
# Optimize # Optimize
""" m.update_likelihood_approximation()
EPEM runs a loop that consists of two steps: # Parameters optimization:
1) EP likelihood approximation: m.optimize()
m.update_likelihood_approximation() #m.EPEM() #FIXME
2) Parameters optimization:
m.optimize()
"""
m.EPEM()
# Plot # Plot
pb.subplot(211) pb.subplot(211)

View file

@ -10,51 +10,86 @@ import pylab as pb
import numpy as np import numpy as np
import GPy import GPy
np.random.seed(2) np.random.seed(2)
pb.ion()
N = 500 N = 500
M = 5 M = 5
pb.close('all') default_seed=10000
######################################
## 1 dimensional example
# sample inputs and outputs def crescent_data(inducing=10, seed=default_seed):
X = np.random.uniform(-3.,3.,(N,1)) """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
#Y = np.sin(X)+np.random.randn(N,1)*0.05
F = np.sin(X)+np.random.randn(N,1)*0.05
Y = np.ones([F.shape[0],1])
Y[F<0] = -1
likelihood = GPy.inference.likelihoods.probit(Y)
# construct kernel :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
rbf = GPy.kern.rbf(1) :param seed : seed value for data generation.
noise = GPy.kern.white(1) :type seed: int
kernel = rbf + noise :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
# create simple GP model data = GPy.util.datasets.crescent_data(seed=seed)
#m = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
# contrain all parameters to be positive # Kernel object
#m.constrain_fixed('prec',100.) kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
m = GPy.models.sparse_GP(X, Y, kernel, M=M)
m.ensure_default_constraints()
#if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian):
# m.approximate_likelihood()
print m.checkgrad()
m.optimize('tnc', messages = 1)
m.plot(samples=3)
print m
n = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood) # Likelihood object
n.ensure_default_constraints() distribution = GPy.likelihoods.likelihood_functions.probit()
if not isinstance(n.likelihood,GPy.inference.likelihoods.gaussian): likelihood = GPy.likelihoods.EP(data['Y'],distribution)
n.approximate_likelihood()
print n.checkgrad() sample = np.random.randint(0,data['X'].shape[0],inducing)
pb.figure() Z = data['X'][sample,:]
n.plot() #Z = (np.random.random_sample(2*inducing)*(data['X'].max()-data['X'].min())+data['X'].min()).reshape(inducing,-1)
# create sparse GP EP model
m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
m.ensure_default_constraints()
m.update_likelihood_approximation()
print(m)
# optimize
m.optimize()
print(m)
# plot
m.plot()
return m
def toy_linear_1d_classification(seed=default_seed):
"""
Simple 1D classification example
:param seed : seed value for data generation (default is 4).
:type seed: int
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y == -1] = 0
# Kernel object
kernel = GPy.kern.rbf(1)
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(Y,distribution)
Z = np.random.uniform(data['X'].min(),data['X'].max(),(10,1))
# Model definition
m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
m.ensure_default_constraints()
# Optimize
m.update_likelihood_approximation()
# Parameters optimization:
m.optimize()
#m.EPEM() #FIXME
# Plot
pb.subplot(211)
m.plot_f()
pb.subplot(212)
m.plot()
print(m)
return m
"""
m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
m.ensure_default_constraints()
print m.checkgrad()
"""

View file

@ -0,0 +1,56 @@
# The detailed explanations of the commands used in this file can be found in the tutorial section
import pylab as pb
pb.ion()
import numpy as np
import GPy
X = np.random.uniform(-3.,3.,(20,1))
Y = np.sin(X) + np.random.randn(20,1)*0.05
kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
m = GPy.models.GP_regression(X,Y,kernel)
print m
m.plot()
m.constrain_positive('')
m.unconstrain('') # Required to remove the previous constrains
m.constrain_positive('rbf_variance')
m.constrain_bounded('lengthscale',1.,10. )
m.constrain_fixed('noise',0.0025)
m.optimize()
m.optimize_restarts(Nrestarts = 10)
###########################
# 2-dimensional example #
###########################
import pylab as pb
pb.ion()
import numpy as np
import GPy
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(50,2))
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
# define kernel
ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
# create simple GP model
m = GPy.models.GP_regression(X,Y,ker)
# contrain all parameters to be positive
m.constrain_positive('')
# optimize and plot
pb.figure()
m.optimize('tnc', max_f_eval = 1000)
m.plot()
print(m)

View file

@ -0,0 +1,139 @@
# The detailed explanations of the commands used in this file can be found in the tutorial section
import pylab as pb
import numpy as np
import GPy
pb.ion()
ker1 = GPy.kern.rbf(1) # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=2.)
ker3 = GPy.kern.rbf(1, .5, .5)
print ker2
ker1.plot()
ker2.plot()
ker3.plot()
k1 = GPy.kern.rbf(1,1.,2.)
k2 = GPy.kern.Matern32(1, 0.5, 0.2)
# Product of kernels
k_prod = k1.prod(k2)
k_prodorth = k1.prod_orthogonal(k2)
# Sum of kernels
k_add = k1.add(k2)
k_addorth = k1.add_orthogonal(k2)
pb.figure(figsize=(8,8))
pb.subplot(2,2,1)
k_prod.plot()
pb.title('prod')
pb.subplot(2,2,2)
k_prodorth.plot()
pb.title('prod_orthogonal')
pb.subplot(2,2,3)
k_add.plot()
pb.title('add')
pb.subplot(2,2,4)
k_addorth.plot()
pb.title('add_orthogonal')
pb.subplots_adjust(wspace=0.3, hspace=0.3)
k1 = GPy.kern.rbf(1,1.,2)
k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
k = k1 * k2 # equivalent to k = k1.prod(k2)
print k
# Simulate sample paths
X = np.linspace(-5,5,501)[:,None]
Y = np.random.multivariate_normal(np.zeros(501),k.K(X),1)
# plot
pb.figure(figsize=(10,4))
pb.subplot(1,2,1)
k.plot()
pb.subplot(1,2,2)
pb.plot(X,Y.T)
pb.ylabel("Sample path")
pb.subplots_adjust(wspace=0.3)
k = (k1+k2)*(k1+k2)
print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.Matern32(1)
k3 = GPy.kern.white(1)
k = k1 + k2 + k3
print k
k.constrain_positive('var')
k.constrain_fixed(np.array([1]),1.75)
k.tie_param('len')
k.unconstrain('white')
k.constrain_bounded('white',lower=1e-5,upper=.5)
print k
k_cst = GPy.kern.bias(1,variance=1.)
k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
Kanova = (k_cst + k_mat).prod_orthogonal(k_cst + k_mat)
print Kanova
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(40,2))
Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
# Create GP regression model
m = GPy.models.GP_regression(X,Y,Kanova)
pb.figure(figsize=(5,5))
m.plot()
pb.figure(figsize=(20,3))
pb.subplots_adjust(wspace=0.5)
pb.subplot(1,5,1)
m.plot()
pb.subplot(1,5,2)
pb.ylabel("= ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,3)
m.plot(which_functions=[False,True,False,False])
pb.ylabel("cst +",rotation='horizontal',fontsize='30')
pb.subplot(1,5,4)
m.plot(which_functions=[False,False,True,False])
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,5)
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
m.plot(which_functions=[False,False,False,True])
import pylab as pb
import numpy as np
import GPy
pb.ion()
ker1 = GPy.kern.rbf(D=1) # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=3.)
ker3 = GPy.kern.rbf(1, .5, .25)
ker1.plot()
ker2.plot()
ker3.plot()
#pb.savefig("Figures/tuto_kern_overview_basicdef.png")
kernels = [GPy.kern.rbf(1), GPy.kern.exponential(1), GPy.kern.Matern32(1), GPy.kern.Matern52(1), GPy.kern.Brownian(1), GPy.kern.bias(1), GPy.kern.linear(1), GPy.kern.spline(1), GPy.kern.periodic_exponential(1), GPy.kern.periodic_Matern32(1), GPy.kern.periodic_Matern52(1), GPy.kern.white(1)]
kernel_names = ["GPy.kern.rbf", "GPy.kern.exponential", "GPy.kern.Matern32", "GPy.kern.Matern52", "GPy.kern.Brownian", "GPy.kern.bias", "GPy.kern.linear", "GPy.kern.spline", "GPy.kern.periodic_exponential", "GPy.kern.periodic_Matern32", "GPy.kern.periodic_Matern52", "GPy.kern.white"]
pb.figure(figsize=(16,12))
pb.subplots_adjust(wspace=.5, hspace=.5)
for i, kern in enumerate(kernels):
pb.subplot(3,4,i+1)
kern.plot(x=7.5,plot_limits=[0.00001,15.])
pb.title(kernel_names[i]+ '\n')
# actual plot for the noise
i = 11
X = np.linspace(0.,15.,201)
WN = 0*X
WN[100] = 1.
pb.subplot(3,4,i+1)
pb.plot(X,WN,'b')

View file

@ -17,7 +17,7 @@ class EP(likelihood):
self.epsilon = epsilon self.epsilon = epsilon
self.eta, self.delta = power_ep self.eta, self.delta = power_ep
self.data = data self.data = data
self.N = self.data.size self.N, self.D = self.data.shape
self.is_heteroscedastic = True self.is_heteroscedastic = True
self.Nparams = 0 self.Nparams = 0
@ -29,7 +29,7 @@ class EP(likelihood):
#initial values for the GP variables #initial values for the GP variables
self.Y = np.zeros((self.N,1)) self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N) self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N) self.precision = np.ones(self.N)[:,None]
self.Z = 0 self.Z = 0
self.YYT = None self.YYT = None
@ -54,18 +54,14 @@ class EP(likelihood):
self.Y = mu_tilde[:,None] self.Y = mu_tilde[:,None]
self.YYT = np.dot(self.Y,self.Y.T) self.YYT = np.dot(self.Y,self.Y.T)
self.precision = self.tau_tilde self.covariance_matrix = np.diag(1./self.tau_tilde)
self.covariance_matrix = np.diag(1./self.precision) self.precision = self.tau_tilde[:,None]
def fit_full(self,K): def fit_full(self,K):
""" """
The expectation-propagation algorithm. The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006. For nomenclature see Rasmussen & Williams 2006.
""" """
#Prior distribution parameters: p(f|X) = N(f|0,K)
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.N) mu = np.zeros(self.N)
Sigma = K.copy() Sigma = K.copy()
@ -124,13 +120,14 @@ class EP(likelihood):
return self._compute_GP_variables() return self._compute_GP_variables()
def fit_DTC(self, Knn_diag, Kmn, Kmm): #def fit_DTC(self, Knn_diag, Kmn, Kmm):
def fit_DTC(self, Kmm, Kmn):
""" """
The expectation-propagation algorithm with sparse pseudo-input. The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013. For nomenclature see ... 2013.
""" """
#TODO: this doesn;t work with uncertain inputs! #TODO: this doesn't work with uncertain inputs!
""" """
Prior approximation parameters: Prior approximation parameters:
@ -158,12 +155,12 @@ class EP(likelihood):
sigma_ = 1./tau_ sigma_ = 1./tau_
mu_ = v_/tau_ mu_ = v_/tau_
""" """
tau_ = np.empty(self.N,dtype=float) self.tau_ = np.empty(self.N,dtype=float)
v_ = np.empty(self.N,dtype=float) self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments #Initial values - Marginal moments
z = np.empty(self.N,dtype=float) z = np.empty(self.N,dtype=float)
Z_hat = np.empty(self.N,dtype=float) self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float) phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float) mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float) sigma2_hat = np.empty(self.N,dtype=float)
@ -172,21 +169,21 @@ class EP(likelihood):
epsilon_np1 = 1 epsilon_np1 = 1
epsilon_np2 = 1 epsilon_np2 = 1
self.iterations = 0 self.iterations = 0
np1 = [tau_tilde.copy()] np1 = [self.tau_tilde.copy()]
np2 = [v_tilde.copy()] np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N) update_order = np.random.permutation(self.N)
for i in update_order: for i in update_order:
#Cavity distribution parameters #Cavity distribution parameters
tau_[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i] self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
v_[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments #Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],tau_[i],v_[i]) self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update #Site parameters update
Delta_tau = delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
tau_tilde[i] = tau_tilde[i] + Delta_tau self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
v_tilde[i] = v_tilde[i] + Delta_v self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update #Posterior distribution parameters update
LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
L = jitchol(LLT) L = jitchol(LLT)
@ -196,25 +193,26 @@ class EP(likelihood):
mu = mu + (Delta_v-Delta_tau*mu[i])*si mu = mu + (Delta_v-Delta_tau*mu[i])*si
self.iterations += 1 self.iterations += 1
#Sigma recomputation with Cholesky decompositon #Sigma recomputation with Cholesky decompositon
LLT0 = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T) LLT0 = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
L = jitchol(LLT) L = jitchol(LLT)
V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1) V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
V2,info = linalg.flapack.dtrtrs(L.T,V,lower=0) V2,info = linalg.flapack.dtrtrs(L.T,V,lower=0)
Sigma_diag = np.sum(V*V,-2) Sigma_diag = np.sum(V*V,-2)
Knmv_tilde = np.dot(Kmn,v_tilde) Knmv_tilde = np.dot(Kmn,self.v_tilde)
mu = np.dot(V2.T,Knmv_tilde) mu = np.dot(V2.T,Knmv_tilde)
epsilon_np1 = sum((tau_tilde-np1[-1])**2)/self.N epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N
epsilon_np2 = sum((v_tilde-np2[-1])**2)/self.N epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N
np1.append(tau_tilde.copy()) np1.append(self.tau_tilde.copy())
np2.append(v_tilde.copy()) np2.append(self.v_tilde.copy())
self._compute_GP_variables() self._compute_GP_variables()
def fit_FITC(self, Knn_diag, Kmn): def fit_FITC(self, Kmm, Kmn, Knn_diag):
""" """
The expectation-propagation algorithm with sparse pseudo-input. The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008. For nomenclature see Naish-Guzman and Holden, 2008.
""" """
M = Kmm.shape[0]
""" """
Prior approximation parameters: Prior approximation parameters:
@ -235,7 +233,7 @@ class EP(likelihood):
mu = w + P*gamma mu = w + P*gamma
""" """
self.w = np.zeros(self.N) self.w = np.zeros(self.N)
self.gamma = np.zeros(self.M) self.gamma = np.zeros(M)
mu = np.zeros(self.N) mu = np.zeros(self.N)
P = P0.copy() P = P0.copy()
R = R0.copy() R = R0.copy()
@ -271,7 +269,7 @@ class EP(likelihood):
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments #Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i]) self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
@ -281,10 +279,10 @@ class EP(likelihood):
dtd1 = Delta_tau*Diag[i] + 1. dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i] dii = Diag[i]
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1 Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = P[i,:].reshape(1,self.M) pi_ = P[i,:].reshape(1,M)
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_ P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(R,pi_.T) Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R)) RTR = np.dot(R.T,np.dot(np.eye(M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T R = jitchol(RTR).T
self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1 self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T) self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
@ -296,7 +294,7 @@ class EP(likelihood):
Diag = Diag0/(1.+ Diag0 * self.tau_tilde) Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
P = (Diag / Diag0)[:,None] * P0 P = (Diag / Diag0)[:,None] * P0
RPT0 = np.dot(R0,P0.T) RPT0 = np.dot(R0,P0.T)
L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T)) L = jitchol(np.eye(M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
R,info = linalg.flapack.dtrtrs(L,R0,lower=1) R,info = linalg.flapack.dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T) RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)

View file

@ -37,8 +37,8 @@ class probit(likelihood_function):
:param tau_i: precision of the cavity distribution (float) :param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float) :param v_i: mean/variance of the cavity distribution (float)
""" """
# TODO: some version of assert np.sum(np.abs(Y)-1) == 0, "Output values must be either -1 or 1" if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
if data_i == 0: data_i = -1 #NOTE Binary classification works better classes {-1,1}, 1D-plotting works better with classes {0,1}. # TODO: some version of assert
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = stats.norm.cdf(z) Z_hat = stats.norm.cdf(z)
phi = stats.norm.pdf(z) phi = stats.norm.pdf(z)

View file

@ -269,6 +269,8 @@ class GP(model):
if hasattr(self,'Z'): if hasattr(self,'Z'):
Zu = self.Z*self._Xstd + self._Xmean Zu = self.Z*self._Xstd + self._Xmean
pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12) pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
elif self.X.shape[1]==2: #FIXME elif self.X.shape[1]==2: #FIXME
resolution = resolution or 50 resolution = resolution or 50
@ -281,5 +283,8 @@ class GP(model):
pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.) pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
pb.xlim(xmin[0],xmax[0]) pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1]) pb.ylim(xmin[1],xmax[1])
if hasattr(self,'Z'):
pb.plot(self.Z[:,0],self.Z[:,1],'wo')
else: else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions" raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

View file

@ -72,7 +72,7 @@ class sparse_GP(GP):
self.psi2 = None self.psi2 = None
def _computations(self): def _computations(self):
# TODO find routine to multiply triangular matrices #TODO: find routine to multiply triangular matrices
#TODO: slices for psi statistics (easy enough) #TODO: slices for psi statistics (easy enough)
sf = self.scale_factor sf = self.scale_factor
@ -82,9 +82,9 @@ class sparse_GP(GP):
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1 #TODO: what is the likelihood is heterscedatic and there are multiple independent outputs? assert self.likelihood.D == 1 #TODO: what is the likelihood is heterscedatic and there are multiple independent outputs?
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.reshape(self.N,1,1)/sf2)).sum(0) self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
else: else:
tmp = self.psi1.T*(np.sqrt(self.likelihood.precision.reshape(1,self.N))/sf) tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
self.psi2_beta_scaled = np.dot(tmp,tmp.T) self.psi2_beta_scaled = np.dot(tmp,tmp.T)
else: else:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
@ -106,15 +106,19 @@ class sparse_GP(GP):
self.C = mdot(self.Lmi.T, self.Bi, self.Lmi) self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T) self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
# Compute dL_dpsi # FIXME: this is untested for the het. case # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
self.dL_dpsi0 = - 0.5 * self.D * self.likelihood.precision * np.ones(self.N) self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB if self.has_uncertain_inputs:
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
if not self.has_uncertain_inputs: self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
raise NotImplementedError, "TODO: recaste derivatibes in psi2 back into psi1" else:
self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
self.dL_dpsi2 = None
else: else:
self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
@ -166,14 +170,29 @@ class sparse_GP(GP):
def _get_param_names(self): def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self) return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self)
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_DTC(self.Kmm,self.psi1)
#self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2 sf2 = self.scale_factor**2
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2)
else: else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2) B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = +0.5*np.sum(self.psi1VVpsi1 * self.C) D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
return A+B+C+D return A+B+C+D
@ -221,14 +240,3 @@ class sparse_GP(GP):
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None] return mu,var[:,None]
def plot(self, *args, **kwargs):
"""
Plot the fitted model: just call the GP plot function and then add inducing inputs
"""
GP.plot(self,*args,**kwargs)
if self.Q==1:
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
if self.Q==2:
pb.plot(self.Z[:,0],self.Z[:,1],'wo')

View file

@ -157,13 +157,28 @@ class GradientTests(unittest.TestCase):
def test_GP_EP_probit(self): def test_GP_EP_probit(self):
N = 20 N = 20
X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None] X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None] Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
kernel = GPy.kern.rbf(1) kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.probit() distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(Y, distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.models.GP(X, likelihood, kernel) m = GPy.models.GP(X, likelihood, kernel)
m.ensure_default_constraints() m.ensure_default_constraints()
self.assertTrue(m.EPEM) m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
#self.assertTrue(m.EPEM)
def test_sparse_EP_DTC_probit(self):
N = 20
X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
Z = np.linspace(0,15,4)[:,None]
kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.models.sparse_GP(X, likelihood, kernel,Z)
m.ensure_default_constraints()
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
@unittest.skip("FITC will be broken for a while") @unittest.skip("FITC will be broken for a while")
def test_generalized_FITC(self): def test_generalized_FITC(self):

View file

@ -0,0 +1,17 @@
***************************
List of implemented kernels
***************************
The :math:`\checkmark` symbol represents the functions that have been implemented for each kernel.
.. |tick|
.. |tick| image:: tick.png
====== =========== === ======= =========== =============== ======= =========== ====== ====== =======
NAME get/set K Kdiag dK_dtheta dKdiag_dtheta dK_dX dKdiag_dX psi0 psi1 psi2
====== =========== === ======= =========== =============== ======= =========== ====== ====== =======
rbf \\checkmark y
====== =========== === ======= =========== =============== ======= =========== ====== ====== =======

View file

@ -2,7 +2,7 @@
Gaussian process regression tutorial Gaussian process regression tutorial
************************************* *************************************
We will see in this tutorial the basics for building a 1 dimensional and a 2 dimensional Gaussian process regression model, also known as a kriging model. We will see in this tutorial the basics for building a 1 dimensional and a 2 dimensional Gaussian process regression model, also known as a kriging model. The code shown in this tutorial can be found without the comments at GPy/examples/tuto_GP_regression.py.
We first import the libraries we will need: :: We first import the libraries we will need: ::

View file

@ -2,6 +2,7 @@
**************************** ****************************
tutorial : A kernel overview tutorial : A kernel overview
**************************** ****************************
The aim of this tutorial is to give a better understanding of the kernel objects in GPy and to list the ones that are already implemented. The code shown in this tutorial can be found without the comments at GPy/examples/tuto_kernel_overview.py.
First we import the libraries we will need :: First we import the libraries we will need ::
@ -38,7 +39,7 @@ return::
Implemented kernels Implemented kernels
=================== ===================
Many kernels are already implemented in GPy. Here is a summary of most of them: Many kernels are already implemented in GPy. A comprehensive list can be found `here <kernel_implementation.html>`_ . The following figure gives a summary of most of them:
.. figure:: Figures/tuto_kern_overview_allkern.png .. figure:: Figures/tuto_kern_overview_allkern.png
:align: center :align: center