merged with master

This commit is contained in:
Nicolo Fusi 2013-02-15 11:56:37 +00:00
commit 2abba8cf14
59 changed files with 2845 additions and 1608 deletions

View file

@ -1,11 +1,20 @@
language: python language: python
python: python:
- "2.7" - "2.7"
#Set virtual env with system-site-packages to true
virtualenv:
system_site_packages: true
# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
install: before_install:
- sudo apt-get install python-scipy - sudo apt-get install -qq python-scipy python-pip
- pip install sphinx - sudo apt-get install -qq python-matplotlib
install:
- pip install sphinx
- pip install nose
- pip install . --use-mirrors - pip install . --use-mirrors
# command to run tests, e.g. python setup.py test # command to run tests, e.g. python setup.py test
script: script:
- nosetests --with-xcoverage --with-xunit --cover-package=GPy --cover-erase GPy/testing - nosetests GPy/testing

View file

@ -7,5 +7,5 @@ import models
import inference import inference
import util import util
import examples import examples
#import examples TODO: discuss!
from core import priors from core import priors
import likelihoods

View file

@ -10,6 +10,7 @@ from parameterised import parameterised, truncate_pad
import priors import priors
from ..util.linalg import jitchol from ..util.linalg import jitchol
from ..inference import optimization from ..inference import optimization
from .. import likelihoods
class model(parameterised): class model(parameterised):
def __init__(self): def __init__(self):
@ -82,7 +83,7 @@ class model(parameterised):
def get(self,name, return_names=False): def get(self,name, return_names=False):
""" """
Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
""" """
matches = self.grep_param_names(name) matches = self.grep_param_names(name)
if len(matches): if len(matches):
@ -107,7 +108,7 @@ class model(parameterised):
def get_gradient(self,name, return_names=False): def get_gradient(self,name, return_names=False):
""" """
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
""" """
matches = self.grep_param_names(name) matches = self.grep_param_names(name)
if len(matches): if len(matches):
@ -303,54 +304,62 @@ class model(parameterised):
return '\n'.join(s) return '\n'.join(s)
def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3, return_ratio=False, *args): def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3):
""" """
Check the gradient of the model by comparing to a numerical estimate. Check the gradient of the model by comparing to a numerical estimate.
If the overall gradient fails, invividual components are tested. If the verbose flag is passed, invividual components are tested (and printed)
:param verbose: If True, print a "full" checking of each parameter
:type verbose: bool
:param step: The size of the step around which to linearise the objective
:type step: float (defaul 1e-6)
:param tolerance: the tolerance allowed (see note)
:type tolerance: float (default 1e-3)
Note:-
The gradient is considered correct if the ratio of the analytical
and numerical gradients is within <tolerance> of unity.
""" """
x = self._get_params_transformed().copy() x = self._get_params_transformed().copy()
#choose a random direction to step in: if not verbose:
dx = step*np.sign(np.random.uniform(-1,1,x.size)) #just check the global ratio
dx = step*np.sign(np.random.uniform(-1,1,x.size))
#evaulate around the point x #evaulate around the point x
self._set_params_transformed(x+dx) self._set_params_transformed(x+dx)
f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed() f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
self._set_params_transformed(x-dx) self._set_params_transformed(x-dx)
f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed() f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
self._set_params_transformed(x) self._set_params_transformed(x)
gradient = self._log_likelihood_gradients_transformed() gradient = self._log_likelihood_gradients_transformed()
numerical_gradient = (f1-f2)/(2*dx) numerical_gradient = (f1-f2)/(2*dx)
global_ratio = (f1-f2)/(2*np.dot(dx,gradient)) global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
if verbose:
print "Gradient ratio = ", global_ratio, '\n'
sys.stdout.flush()
if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio): if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
if verbose: return True
print 'Gradcheck passed' else:
return False
else: else:
if verbose: #check the gradient of each parameter individually, and do some pretty printing
print "Global check failed. Testing individual gradients\n" try:
names = self._get_param_names_transformed()
except NotImplementedError:
names = ['Variable %i'%i for i in range(len(x))]
try: # Prepare for pretty-printing
names = self._get_param_names_transformed() header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
except NotImplementedError: max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
names = ['Variable %i'%i for i in range(len(x))] float_len = 10
cols = [max_names]
# Prepare for pretty-printing cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical'] cols = np.array(cols) + 5
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])]) header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
float_len = 10 header_string = map(lambda x: '|'.join(x), [header_string])
cols = [max_names] separator = '-'*len(header_string[0])
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))]) print '\n'.join([header_string[0], separator])
cols = np.array(cols) + 5
header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-'*len(header_string[0])
print '\n'.join([header_string[0], separator])
for i in range(len(x)): for i in range(len(x)):
xx = x.copy() xx = x.copy()
@ -368,27 +377,52 @@ class model(parameterised):
ratio = (f1-f2)/(2*step*gradient) ratio = (f1-f2)/(2*step*gradient)
difference = np.abs((f1-f2)/2/step - gradient) difference = np.abs((f1-f2)/2/step - gradient)
if verbose: if (np.abs(ratio-1)<tolerance):
if (np.abs(ratio-1)<tolerance): formatted_name = "\033[92m {0} \033[0m".format(names[i])
formatted_name = "\033[92m {0} \033[0m".format(names[i]) else:
else: formatted_name = "\033[91m {0} \033[0m".format(names[i])
formatted_name = "\033[91m {0} \033[0m".format(names[i]) r = '%.6f' % float(ratio)
r = '%.6f' % float(ratio) d = '%.6f' % float(difference)
d = '%.6f' % float(difference) g = '%.6f' % gradient
g = '%.6f' % gradient ng = '%.6f' % float(numerical_gradient)
ng = '%.6f' % float(numerical_gradient) grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4]) print grad_string
print grad_string
if verbose: def EPEM(self,epsilon=.1,**kwargs):
print '' """
TODO: Should this not bein the GP class?
Expectation maximization for Expectation Propagation.
if return_ratio: kwargs are passed to the optimize function. They can be:
return global_ratio
:epsilon: convergence criterion
:max_f_eval: maximum number of function evaluations
:messages: whether to display during optimisation
:param optimzer: whice optimizer to use (defaults to self.preferred optimizer)
:type optimzer: string TODO: valid strings?
"""
assert isinstance(self.likelihood,likelihoods.EP), "EM is not available for Gaussian likelihoods"
log_change = epsilon + 1.
self.log_likelihood_record = []
self.gp_params_record = []
self.ep_params_record = []
iteration = 0
last_value = -np.exp(1000)
while log_change > epsilon or not iteration:
print 'EM iteration %s' %iteration
self.update_likelihood_approximation()
self.optimize(**kwargs)
new_value = self.log_likelihood()
log_change = new_value - last_value
if log_change > epsilon:
self.log_likelihood_record.append(new_value)
self.gp_params_record.append(self._get_params())
#self.ep_params_record.append((self.beta,self.Y,self.Z_ep))
last_value = new_value
else: else:
return False convergence = False
#self.beta, self.Y, self.Z_ep = self.ep_params_record[-1]
if return_ratio: self._set_params(self.gp_params_record[-1])
return global_ratio print "Log-likelihood decrement: %s \nLast iteration discarded." %log_change
else: iteration += 1
return True

View file

@ -102,6 +102,11 @@ class parameterised(object):
else: else:
return expr return expr
def Nparam_transformed(self):
ties = 0
for ar in self.tied_indices:
ties += ar.size - 1
return self.Nparam - len(self.constrained_fixed_indices) - ties
def constrain_positive(self, which): def constrain_positive(self, which):
""" """
@ -149,8 +154,6 @@ class parameterised(object):
def constrain_negative(self,which): def constrain_negative(self,which):
""" """
Set negative constraints. Set negative constraints.

View file

@ -16,8 +16,13 @@ k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,D).T
<<<<<<< HEAD
k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q) k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
# k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q, 0.00001)
=======
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
>>>>>>> master
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M) m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
m.constrain_positive('(rbf|bias|noise|white|S)') m.constrain_positive('(rbf|bias|noise|white|S)')
# m.constrain_fixed('S', 1) # m.constrain_fixed('S', 1)

View file

@ -3,16 +3,15 @@
""" """
Simple Gaussian Processes classification Gaussian Processes classification
""" """
import pylab as pb import pylab as pb
import numpy as np import numpy as np
import GPy import GPy
default_seed=10000 default_seed=10000
######################################
## 2 dimensional example def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME
def crescent_data(model_type='Full', inducing=10, seed=default_seed):
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC']. :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
@ -21,20 +20,28 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC'). :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int :type inducing: int
""" """
data = GPy.util.datasets.crescent_data(seed=seed) data = GPy.util.datasets.crescent_data(seed=seed)
likelihood = GPy.inference.likelihoods.probit(data['Y'])
# Kernel object
kernel = GPy.kern.rbf(data['X'].shape[1])
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(data['Y'],distribution)
if model_type=='Full': if model_type=='Full':
m = GPy.models.GP_EP(data['X'],likelihood) m = GPy.models.GP(data['X'],likelihood,kernel)
else: else:
# create sparse GP EP model # create sparse GP EP model
m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type) m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
m.approximate_likelihood() m.update_likelihood_approximation()
print(m) print(m)
# optimize # optimize
m.em() m.optimize()
print(m) print(m)
# plot # plot
@ -42,54 +49,67 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
return m return m
def oil(): def oil():
"""Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.""" """
Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
"""
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1]) # Kernel object
kernel = GPy.kern.rbf(12)
# create simple GP model # Likelihood object
m = GPy.models.GP_EP(data['X'],likelihood) distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1],distribution)
# contrain all parameters to be positive # Create GP model
m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
# Contrain all parameters to be positive
m.constrain_positive('') m.constrain_positive('')
m.tie_param('lengthscale') m.tie_param('lengthscale')
m.approximate_likelihood() m.update_likelihood_approximation()
# optimize # Optimize
m.optimize() m.optimize()
# plot
#m.plot()
print(m) print(m)
return m return m
def toy_linear_1d_classification(model_type='Full', inducing=4, seed=default_seed): def toy_linear_1d_classification(seed=default_seed):
"""Simple 1D classification example. """
:param model_type: type of model to fit ['Full', 'FITC', 'DTC']. Simple 1D classification example
:param seed : seed value for data generation (default is 4). :param seed : seed value for data generation (default is 4).
:type seed: int :type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1]) Y = data['Y'][:, 0:1]
assert model_type in ('Full','DTC','FITC') Y[Y == -1] = 0
# create simple GP model # Kernel object
if model_type=='Full': kernel = GPy.kern.rbf(1)
m = GPy.models.simple_GP_EP(data['X'],likelihood)
else:
# create sparse GP EP model
m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
m.constrain_positive('var') # Likelihood object
m.constrain_positive('len') distribution = GPy.likelihoods.likelihood_functions.probit()
m.tie_param('lengthscale') likelihood = GPy.likelihoods.EP(Y,distribution)
m.approximate_likelihood()
# Optimize and plot # Model definition
m.em(plot_all=False) # EM algorithm m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
# Optimize
"""
EPEM runs a loop that consists of two steps:
1) EP likelihood approximation:
m.update_likelihood_approximation()
2) Parameters optimization:
m.optimize()
"""
m.EPEM()
# Plot
pb.subplot(211)
m.plot_f()
pb.subplot(212)
m.plot() m.plot()
print(m) print(m)
return m return m

47
GPy/examples/poisson.py Normal file
View file

@ -0,0 +1,47 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Gaussian Processes + Expectation Propagation - Poisson Likelihood
"""
import pylab as pb
import numpy as np
import GPy
default_seed=10000
def toy_1d(seed=default_seed):
"""
Simple 1D classification example
:param seed : seed value for data generation (default is 4).
:type seed: int
"""
X = np.arange(0,100,5)[:,None]
F = np.round(np.sin(X/18.) + .1*X) + np.arange(5,25)[:,None]
E = np.random.randint(-5,5,20)[:,None]
Y = F + E
kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.Poisson()
likelihood = GPy.likelihoods.EP(Y,distribution)
m = GPy.models.GP(X,likelihood,kernel)
m.ensure_default_constraints()
# Approximate likelihood
m.update_likelihood_approximation()
# Optimize and plot
m.optimize()
#m.EPEM FIXME
print m
# Plot
pb.subplot(211)
m.plot_f() #GP plot
pb.subplot(212)
m.plot() #Output plot
return m

View file

@ -0,0 +1,60 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
"""
Sparse Gaussian Processes regression with an RBF kernel
"""
import pylab as pb
import numpy as np
import GPy
np.random.seed(2)
pb.ion()
N = 500
M = 5
pb.close('all')
######################################
## 1 dimensional example
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(N,1))
#Y = np.sin(X)+np.random.randn(N,1)*0.05
F = np.sin(X)+np.random.randn(N,1)*0.05
Y = np.ones([F.shape[0],1])
Y[F<0] = -1
likelihood = GPy.inference.likelihoods.probit(Y)
# construct kernel
rbf = GPy.kern.rbf(1)
noise = GPy.kern.white(1)
kernel = rbf + noise
# create simple GP model
#m = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
# contrain all parameters to be positive
#m.constrain_fixed('prec',100.)
m = GPy.models.sparse_GP(X, Y, kernel, M=M)
m.ensure_default_constraints()
#if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian):
# m.approximate_likelihood()
print m.checkgrad()
m.optimize('tnc', messages = 1)
m.plot(samples=3)
print m
n = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
n.ensure_default_constraints()
if not isinstance(n.likelihood,GPy.inference.likelihoods.gaussian):
n.approximate_likelihood()
print n.checkgrad()
pb.figure()
n.plot()
"""
m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
m.ensure_default_constraints()
print m.checkgrad()
"""

View file

@ -1,240 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import random
from scipy import stats, linalg
from .likelihoods import likelihood
from ..core import model
from ..util.linalg import pdinv,mdot,jitchol
from ..util.plot import gpplot
from .. import kern
class EP_base:
"""
Expectation Propagation.
This is just the base class for expectation propagation. We'll extend it for full and sparse EP.
"""
def __init__(self,likelihood,epsilon=1e-3,powerep=[1.,1.]):
self.likelihood = likelihood
self.epsilon = epsilon
self.eta, self.delta = powerep
self.jitter = 1e-12
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)
self.restart_EP()
def restart_EP(self):
"""
Set the EP approximation to initial state
"""
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
self.mu = np.zeros(self.N)
class Full(EP_base):
"""
:param likelihood: Output's likelihood (e.g. probit)
:type likelihood: GPy.inference.likelihood instance
:param K: prior covariance matrix
:type K: np.ndarray (N x N)
:param likelihood: Output's likelihood (e.g. probit)
:type likelihood: GPy.inference.likelihood instance
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
"""
def __init__(self,K,likelihood,*args,**kwargs):
assert K.shape[0] == K.shape[1]
self.K = K
self.N = self.K.shape[0]
EP_base.__init__(self,likelihood,*args,**kwargs)
def fit_EP(self,messages=False):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006 (pag. 52-60)
"""
#Prior distribution parameters: p(f|X) = N(f|0,K)
#self.K = self.kernel.K(self.X,self.X)
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
self.mu=np.zeros(self.N)
self.Sigma=self.K.copy()
"""
Initial values - Cavity distribution parameters:
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=np.float64)
self.v_ = np.empty(self.N,dtype=np.float64)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=np.float64)
self.Z_hat = np.empty(self.N,dtype=np.float64)
phi = np.empty(self.N,dtype=np.float64)
mu_hat = np.empty(self.N,dtype=np.float64)
sigma2_hat = np.empty(self.N,dtype=np.float64)
#Approximation
epsilon_np1 = self.epsilon + 1.
epsilon_np2 = self.epsilon + 1.
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = self.mu[i]/self.Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma[i,i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update
si=self.Sigma[:,i].reshape(self.N,1)
self.Sigma = self.Sigma - Delta_tau/(1.+ Delta_tau*self.Sigma[i,i])*np.dot(si,si.T)
self.mu = np.dot(self.Sigma,self.v_tilde)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*(self.K)
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B)
V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
self.Sigma = self.K - np.dot(V.T,V)
self.mu = np.dot(self.Sigma,self.v_tilde)
epsilon_np1 = np.mean(self.tau_tilde-self.np1[-1]**2)
epsilon_np2 = np.mean(self.v_tilde-self.np2[-1]**2)
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
if messages:
print "EP iteration %i, epsiolon %d"%(self.iterations,epsilon_np1)
class FITC(EP_base):
"""
:param likelihood: Output's likelihood (e.g. probit)
:type likelihood: GPy.inference.likelihood instance
:param Knn_diag: The diagonal elements of Knn is a 1D vector
:param Kmn: The 'cross' variance between inducing inputs and data
:param Kmm: the covariance matrix of the inducing inputs
:param likelihood: Output's likelihood (e.g. probit)
:type likelihood: GPy.inference.likelihood instance
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
"""
def __init__(self,likelihood,Knn_diag,Kmn,Kmm,*args,**kwargs):
self.Knn_diag = Knn_diag
self.Kmn = Kmn
self.Kmm = Kmm
self.M = self.Kmn.shape[0]
self.N = self.Kmn.shape[1]
assert self.M <= self.N, 'The number of inducing inputs must be smaller than the number of observations'
assert len(Knn_diag) == self.N, 'Knn_diagonal has size different from N'
EP_base.__init__(self,likelihood,*args,**kwargs)
def fit_EP(self):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008.
"""
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
"""
self.Kmmi, self.Kmm_hld = pdinv(self.Kmm)
self.P0 = self.Kmn.T
self.KmnKnm = np.dot(self.P0.T, self.P0)
self.KmmiKmn = np.dot(self.Kmmi,self.P0.T)
self.Qnn_diag = np.sum(self.P0.T*self.KmmiKmn,-2)
self.Diag0 = self.Knn_diag - self.Qnn_diag
self.R0 = jitchol(self.Kmmi).T
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*gamma
"""
self.w = np.zeros(self.N)
self.gamma = np.zeros(self.M)
self.mu = np.zeros(self.N)
self.P = self.P0.copy()
self.R = self.R0.copy()
self.Diag = self.Diag0.copy()
self.Sigma_diag = self.Knn_diag
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=np.float64)
self.v_ = np.empty(self.N,dtype=np.float64)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=np.float64)
self.Z_hat = np.empty(self.N,dtype=np.float64)
phi = np.empty(self.N,dtype=np.float64)
mu_hat = np.empty(self.N,dtype=np.float64)
sigma2_hat = np.empty(self.N,dtype=np.float64)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.arange(self.N)
random.shuffle(update_order)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update
dtd1 = Delta_tau*self.Diag[i] + 1.
dii = self.Diag[i]
self.Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = self.P[i,:].reshape(1,self.M)
self.P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(self.R,pi_.T)
RTR = np.dot(self.R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*self.Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),self.R))
self.R = jitchol(RTR).T
self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.gamma = self.gamma + (Delta_v - Delta_tau*self.mu[i])*np.dot(RTR,self.P[i,:].T)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
self.mu = self.w + np.dot(self.P,self.gamma)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
self.Diag = self.Diag0/(1.+ self.Diag0 * self.tau_tilde)
self.P = (self.Diag / self.Diag0)[:,None] * self.P0
self.RPT0 = np.dot(self.R0,self.P0.T)
L = jitchol(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(L,self.R0,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
self.w = self.Diag * self.v_tilde
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.v_tilde))
self.mu = self.w + np.dot(self.P,self.gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())

View file

@ -1,219 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from ..util.plot import gpplot
class likelihood:
def __init__(self,Y,location=0,scale=1):
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood used
"""
self.Y = Y
self.N = self.Y.shape[0]
self.location = location
self.scale = scale
def plot1Da(self,X_new,Mean_new,Var_new,X_u,Mean_u,Var_u):
"""
Plot the predictive distribution of the GP model for 1-dimensional inputs
:param X_new: The points at which to make a prediction
:param Mean_new: mean values at X_new
:param Var_new: variance values at X_new
:param X_u: input (inducing) points used to train the model
:param Mean_u: mean values at X_u
:param Var_new: variance values at X_u
"""
assert X_new.shape[1] == 1, 'Number of dimensions must be 1'
gpplot(X_new,Mean_new,Var_new)
pb.errorbar(X_u,Mean_u,2*np.sqrt(Var_u),fmt='r+')
pb.plot(X_u,Mean_u,'ro')
def plot2D(self,X,X_new,F_new,U=None):
"""
Predictive distribution of the fitted GP model for 2-dimensional inputs
:param X_new: The points at which to make a prediction
:param Mean_new: mean values at X_new
:param Var_new: variance values at X_new
:param X_u: input points used to train the model
:param Mean_u: mean values at X_u
:param Var_new: variance values at X_u
"""
N,D = X_new.shape
assert D == 2, 'Number of dimensions must be 2'
n = np.sqrt(N)
x1min = X_new[:,0].min()
x1max = X_new[:,0].max()
x2min = X_new[:,1].min()
x2max = X_new[:,1].max()
pb.imshow(F_new.reshape(n,n),extent=(x1min,x1max,x2max,x2min),vmin=0,vmax=1)
pb.colorbar()
C1 = np.arange(self.N)[self.Y.flatten()==1]
C2 = np.arange(self.N)[self.Y.flatten()==-1]
[pb.plot(X[i,0],X[i,1],'ro') for i in C1]
[pb.plot(X[i,0],X[i,1],'bo') for i in C2]
pb.xlim(x1min,x1max)
pb.ylim(x2min,x2max)
if U is not None:
[pb.plot(a,b,'wo') for a,b in U]
class probit(likelihood):
"""
Probit likelihood
Y is expected to take values in {-1,1}
-----
$$
L(x) = \\Phi (Y_i*f_i)
$$
"""
def moments_match(self,i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
z = self.Y[i]*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = stats.norm.cdf(z)
phi = stats.norm.pdf(z)
mu_hat = v_i/tau_i + self.Y[i]*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
return Z_hat, mu_hat, sigma2_hat
def plot1Db(self,X,X_new,F_new,U=None):
assert X.shape[1] == 1, 'Number of dimensions must be 1'
gpplot(X_new,F_new,np.zeros(X_new.shape[0]))
pb.plot(X,(self.Y+1)/2,'kx',mew=1.5)
pb.ylim(-0.2,1.2)
if U is not None:
pb.plot(U,U*0+.5,'r|',mew=1.5,markersize=12)
def predictive_mean(self,mu,variance):
return stats.norm.cdf(mu/np.sqrt(1+variance))
def _log_likelihood_gradients():
raise NotImplementedError
class poisson(likelihood):
"""
Poisson likelihood
Y is expected to take values in {0,1,2,...}
-----
$$
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def moments_match(self,i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
mu = v_i/tau_i
sigma = np.sqrt(1./tau_i)
def poisson_norm(f):
"""
Product of the likelihood and the cavity distribution
"""
pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
rate = np.exp( (f*self.scale)+self.location)
poisson = stats.poisson.pmf(float(self.Y[i]),rate)
return pdf_norm_f*poisson
def log_pnm(f):
"""
Log of poisson_norm
"""
return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*self.Y[i])
"""
Golden Search and Simpson's Rule
--------------------------------
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
"""
#TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
#Golden search
golden_A = -1 if self.Y[i] == 0 else np.array([np.log(self.Y[i]),mu]).min() #Lower limit
golden_B = np.array([np.log(self.Y[i]),mu]).max() #Upper limit
golden_A = (golden_A - self.location)/self.scale
golden_B = (golden_B - self.location)/self.scale
opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
# Simpson's approximation
width = 3./np.log(max(self.Y[i],2))
A = opt - width #Lower limit
B = opt + width #Upper limit
K = 10*int(np.log(max(self.Y[i],150))) #Number of points in the grid, we DON'T want K to be the same number for every case
h = (B-A)/K # length of the intervals
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
first = zeroth*x
second = first*x
Z_hat = sum(zeroth)*h/3 # Zero-th moment
mu_hat = sum(first)*h/(3*Z_hat) # First moment
m2 = sum(second)*h/(3*Z_hat) # Second moment
sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat)
def plot1Db(self,X,X_new,F_new,F2_new=None,U=None):
pb.subplot(212)
#gpplot(X_new,F_new,np.sqrt(F2_new))
pb.plot(X_new,F_new)#,np.sqrt(F2_new)) #FIXME
pb.plot(X,self.Y,'kx',mew=1.5)
if U is not None:
pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12)
def predictive_mean(self,mu,variance):
return np.exp(mu*self.scale + self.location)
def predictive_variance(self,mu,variance):
return mu
def _log_likelihood_gradients():
raise NotImplementedError
class gaussian(likelihood):
"""
Gaussian likelihood
Y is expected to take values in (-inf,inf)
"""
def moments_match(self,i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
mu = v_i/tau_i
sigma = np.sqrt(1./tau_i)
s = 1. if self.Y[i] == 0 else 1./self.Y[i]
sigma2_hat = 1./(1./sigma**2 + 1./s**2)
mu_hat = sigma2_hat*(mu/sigma**2 + self.Y[i]/s**2)
Z_hat = 1./np.sqrt(2*np.pi) * 1./np.sqrt(sigma**2+s**2) * np.exp(-.5*(mu-self.Y[i])**2/(sigma**2 + s**2))
return Z_hat, mu_hat, sigma2_hat
def plot1Db(self,X,X_new,F_new,U=None):
assert X.shape[1] == 1, 'Number of dimensions must be 1'
gpplot(X_new,F_new,np.zeros(X_new.shape[0]))
pb.plot(X,self.Y,'kx',mew=1.5)
if U is not None:
pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12)
def predictive_mean(self,mu,Sigma):
return mu
def _log_likelihood_gradients():
raise NotImplementedError

View file

@ -14,14 +14,14 @@ class Matern32(kernpart):
.. math:: .. math::
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param D: the number of input dimensions :param D: the number of input dimensions
:type D: int :type D: int
:param variance: the variance :math:`\sigma^2` :param variance: the variance :math:`\sigma^2`
:type variance: float :type variance: float
:param lengthscale: the vector of lengthscale :math:`\ell_i` :param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean :type ARD: Boolean
:rtype: kernel object :rtype: kernel object
@ -35,17 +35,19 @@ class Matern32(kernpart):
self.Nparam = 2 self.Nparam = 2
self.name = 'Mat32' self.name = 'Mat32'
if lengthscale is not None: if lengthscale is not None:
assert lengthscale.shape == (1,) lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else: else:
lengthscale = np.ones(1) lengthscale = np.ones(1)
else: else:
self.Nparam = self.D + 1 self.Nparam = self.D + 1
self.name = 'Mat32_ARD' self.name = 'Mat32'
if lengthscale is not None: if lengthscale is not None:
assert lengthscale.shape == (self.D,) lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.D, "bad number of lengthscales"
else: else:
lengthscale = np.ones(self.D) lengthscale = np.ones(self.D)
self._set_params(np.hstack((variance,lengthscale))) self._set_params(np.hstack((variance,lengthscale.flatten())))
def _get_params(self): def _get_params(self):
"""return the value of the parameters.""" """return the value of the parameters."""
@ -116,9 +118,9 @@ class Matern32(kernpart):
:param F1: vector of derivatives of F :param F1: vector of derivatives of F
:type F1: np.array :type F1: np.array
:param F2: vector of second derivatives of F :param F2: vector of second derivatives of F
:type F2: np.array :type F2: np.array
:param lower,upper: boundaries of the input domain :param lower,upper: boundaries of the input domain
:type lower,upper: floats :type lower,upper: floats
""" """
assert self.D == 1 assert self.D == 1
def L(x,i): def L(x,i):
@ -133,4 +135,3 @@ class Matern32(kernpart):
#print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n" #print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
#return(G) #return(G)
return(self.lengthscale**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T)) return(self.lengthscale**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))

View file

@ -13,14 +13,14 @@ class Matern52(kernpart):
.. math:: .. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param D: the number of input dimensions :param D: the number of input dimensions
:type D: int :type D: int
:param variance: the variance :math:`\sigma^2` :param variance: the variance :math:`\sigma^2`
:type variance: float :type variance: float
:param lengthscale: the vector of lengthscale :math:`\ell_i` :param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean :type ARD: Boolean
:rtype: kernel object :rtype: kernel object
@ -33,17 +33,19 @@ class Matern52(kernpart):
self.Nparam = 2 self.Nparam = 2
self.name = 'Mat52' self.name = 'Mat52'
if lengthscale is not None: if lengthscale is not None:
assert lengthscale.shape == (1,) lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else: else:
lengthscale = np.ones(1) lengthscale = np.ones(1)
else: else:
self.Nparam = self.D + 1 self.Nparam = self.D + 1
self.name = 'Mat52_ARD' self.name = 'Mat52'
if lengthscale is not None: if lengthscale is not None:
assert lengthscale.shape == (self.D,) lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.D, "bad number of lengthscales"
else: else:
lengthscale = np.ones(self.D) lengthscale = np.ones(self.D)
self._set_params(np.hstack((variance,lengthscale))) self._set_params(np.hstack((variance,lengthscale.flatten())))
def _get_params(self): def _get_params(self):
"""return the value of the parameters.""" """return the value of the parameters."""

View file

@ -13,14 +13,14 @@ class exponential(kernpart):
.. math:: .. math::
k(r) = \sigma^2 \exp(- r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } k(r) = \sigma^2 \exp(- r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param D: the number of input dimensions :param D: the number of input dimensions
:type D: int :type D: int
:param variance: the variance :math:`\sigma^2` :param variance: the variance :math:`\sigma^2`
:type variance: float :type variance: float
:param lengthscale: the vector of lengthscale :math:`\ell_i` :param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean :type ARD: Boolean
:rtype: kernel object :rtype: kernel object
@ -33,17 +33,19 @@ class exponential(kernpart):
self.Nparam = 2 self.Nparam = 2
self.name = 'exp' self.name = 'exp'
if lengthscale is not None: if lengthscale is not None:
assert lengthscale.shape == (1,) lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else: else:
lengthscale = np.ones(1) lengthscale = np.ones(1)
else: else:
self.Nparam = self.D + 1 self.Nparam = self.D + 1
self.name = 'exp_ARD' self.name = 'exp'
if lengthscale is not None: if lengthscale is not None:
assert lengthscale.shape == (self.D,) lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.D, "bad number of lengthscales"
else: else:
lengthscale = np.ones(self.D) lengthscale = np.ones(self.D)
self._set_params(np.hstack((variance,lengthscale))) self._set_params(np.hstack((variance,lengthscale.flatten())))
def _get_params(self): def _get_params(self):
"""return the value of the parameters.""" """return the value of the parameters."""
@ -87,7 +89,7 @@ class exponential(kernpart):
dl = self.variance*dvar*dist2M.sum(-1)*invdist dl = self.variance*dvar*dist2M.sum(-1)*invdist
target[1] += np.sum(dl*partial) target[1] += np.sum(dl*partial)
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,partial,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
#NB: derivative of diagonal elements wrt lengthscale is 0 #NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(partial) target[0] += np.sum(partial)
@ -110,9 +112,9 @@ class exponential(kernpart):
:param F: vector of functions :param F: vector of functions
:type F: np.array :type F: np.array
:param F1: vector of derivatives of F :param F1: vector of derivatives of F
:type F1: np.array :type F1: np.array
:param lower,upper: boundaries of the input domain :param lower,upper: boundaries of the input domain
:type lower,upper: floats :type lower,upper: floats
""" """
assert self.D == 1 assert self.D == 1
def L(x,i): def L(x,i):
@ -124,8 +126,3 @@ class exponential(kernpart):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0] G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
Flower = np.array([f(lower) for f in F])[:,None] Flower = np.array([f(lower) for f in F])[:,None]
return(self.lengthscale/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T)) return(self.lengthscale/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T))

View file

@ -6,7 +6,7 @@ import numpy as np
from ..core.parameterised import parameterised from ..core.parameterised import parameterised
from kernpart import kernpart from kernpart import kernpart
import itertools import itertools
from product_orthogonal import product_orthogonal from product_orthogonal import product_orthogonal
class kern(parameterised): class kern(parameterised):
def __init__(self,D,parts=[], input_slices=None): def __init__(self,D,parts=[], input_slices=None):
@ -155,7 +155,7 @@ class kern(parameterised):
D = K1.D + K2.D D = K1.D + K2.D
newkernparts = [product_orthogonal(k1,k2).parts[0] for k1, k2 in itertools.product(K1.parts,K2.parts)] newkernparts = [product_orthogonal(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
slices = [] slices = []
for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices): for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
@ -235,6 +235,8 @@ class kern(parameterised):
X2 = X X2 = X
target = np.zeros(self.Nparam) target = np.zeros(self.Nparam)
[p.dK_dtheta(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)] [p.dK_dtheta(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
#TODO: transform the gradients here!
return target return target
def dK_dX(self,partial,X,X2=None,slices1=None,slices2=None): def dK_dX(self,partial,X,X2=None,slices1=None,slices2=None):
@ -324,6 +326,7 @@ class kern(parameterised):
[p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
# "crossterms". Here we are recomputing psi1 for white (we don't need to), but it's # "crossterms". Here we are recomputing psi1 for white (we don't need to), but it's
# not really expensive, since it's just a matrix of zeroes. # not really expensive, since it's just a matrix of zeroes.
# psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts] # psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]

View file

@ -15,34 +15,33 @@ class linear(kernpart):
:param D: the number of input dimensions :param D: the number of input dimensions
:type D: int :type D: int
:param variances: the vector of variances :math:`\sigma^2_i` :param variances: the vector of variances :math:`\sigma^2_i`
:type variances: np.ndarray of size (1,) or (D,) depending on ARD :type variances: array or list of the appropriate size (or float if there is only one variance parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single variance parameter \sigma^2), otherwise there is one variance parameter per dimension. :param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
:type ARD: Boolean :type ARD: Boolean
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self,D,variances=None,ARD=True): def __init__(self,D,variances=None,ARD=False):
self.D = D self.D = D
self.ARD = ARD self.ARD = ARD
if ARD == False: if ARD == False:
self.Nparam = 1 self.Nparam = 1
self.name = 'linear' self.name = 'linear'
if variances is not None: if variances is not None:
if isinstance(variances, float): variances = np.asarray(variances)
variances = np.array([variances]) assert variances.size == 1, "Only one variance needed for non-ARD kernel"
assert variances.shape == (1,)
else: else:
variances = np.ones(1) variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,)) self._Xcache, self._X2cache = np.empty(shape=(2,))
else: else:
self.Nparam = self.D self.Nparam = self.D
self.name = 'linear_ARD' self.name = 'linear'
if variances is not None: if variances is not None:
assert variances.shape == (self.D,) variances = np.asarray(variances)
assert variances.size == self.D, "bad number of lengthscales"
else: else:
variances = np.ones(self.D) variances = np.ones(self.D)
self._set_params(variances) self._set_params(variances.flatten())
#initialize cache #initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3,1)) self._Z, self._mu, self._S = np.empty(shape=(3,1))

View file

@ -12,7 +12,7 @@ class rbf(kernpart):
.. math:: .. math::
k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \qquad \qquad \\text{ where } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}} k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}}
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input. where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
@ -21,7 +21,7 @@ class rbf(kernpart):
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
:param lengthscale: the vector of lengthscale of the kernel :param lengthscale: the vector of lengthscale of the kernel
:type lengthscale: np.ndarray od size (1,) or (D,) depending on ARD :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean :type ARD: Boolean
:rtype: kernel object :rtype: kernel object
@ -75,6 +75,7 @@ class rbf(kernpart):
def K(self,X,X2,target): def K(self,X,X2,target):
if X2 is None: if X2 is None:
X2 = X X2 = X
self._K_computations(X,X2) self._K_computations(X,X2)
np.add(self.variance*self._K_dvar, target,target) np.add(self.variance*self._K_dvar, target,target)

311
GPy/likelihoods/EP.py Normal file
View file

@ -0,0 +1,311 @@
import numpy as np
from scipy import stats, linalg
from ..util.linalg import pdinv,mdot,jitchol
from likelihood import likelihood
class EP(likelihood):
def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]):
"""
Expectation Propagation
Arguments
---------
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
likelihood_function : a likelihood function (see likelihood_functions.py)
"""
self.likelihood_function = likelihood_function
self.epsilon = epsilon
self.eta, self.delta = power_ep
self.data = data
self.N = self.data.size
self.is_heteroscedastic = True
self.Nparams = 0
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
#initial values for the GP variables
self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)
self.Z = 0
self.YYT = None
def predictive_values(self,mu,var):
return self.likelihood_function.predictive_values(mu,var)
def _get_params(self):
return np.zeros(0)
def _get_param_names(self):
return []
def _set_params(self,p):
pass # TODO: the EP likelihood might want to take some parameters...
def _gradients(self,partial):
return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
def _compute_GP_variables(self):
#Variables to be called from GP
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
self.Y = mu_tilde[:,None]
self.YYT = np.dot(self.Y,self.Y.T)
self.precision = self.tau_tilde
self.covariance_matrix = np.diag(1./self.precision)
def fit_full(self,K):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006.
"""
#Prior distribution parameters: p(f|X) = N(f|0,K)
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.N)
Sigma = K.copy()
"""
Initial values - Cavity distribution parameters:
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = self.epsilon + 1.
epsilon_np2 = self.epsilon + 1.
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update
si=Sigma[:,i].reshape(self.N,1)
Sigma = Sigma - Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)
mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B)
V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()
def fit_DTC(self, Knn_diag, Kmn, Kmm):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013.
"""
#TODO: this doesn;t work with uncertain inputs!
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = Qnn = Knm*Kmmi*Kmn
"""
Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
KmnKnm = np.dot(Kmn, Kmn.T)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy()
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*gamma
"""
mu = np.zeros(self.N)
LLT = Kmm.copy()
Sigma_diag = Qnn_diag.copy()
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
tau_ = np.empty(self.N,dtype=float)
v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
np1 = [tau_tilde.copy()]
np2 = [v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
tau_[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
v_[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
#Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],tau_[i],v_[i])
#Site parameters update
Delta_tau = delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
tau_tilde[i] = tau_tilde[i] + Delta_tau
v_tilde[i] = v_tilde[i] + Delta_v
#Posterior distribution parameters update
LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
L = jitchol(LLT)
V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu = mu + (Delta_v-Delta_tau*mu[i])*si
self.iterations += 1
#Sigma recomputation with Cholesky decompositon
LLT0 = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
V2,info = linalg.flapack.dtrtrs(L.T,V,lower=0)
Sigma_diag = np.sum(V*V,-2)
Knmv_tilde = np.dot(Kmn,v_tilde)
mu = np.dot(V2.T,Knmv_tilde)
epsilon_np1 = sum((tau_tilde-np1[-1])**2)/self.N
epsilon_np2 = sum((v_tilde-np2[-1])**2)/self.N
np1.append(tau_tilde.copy())
np2.append(v_tilde.copy())
self._compute_GP_variables()
def fit_FITC(self, Knn_diag, Kmn):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008.
"""
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
"""
Kmmi, self.Lm, self.Lmi, Kmm_logdet = pdinv(Kmm)
P0 = Kmn.T
KmnKnm = np.dot(P0.T, P0)
KmmiKmn = np.dot(Kmmi,P0.T)
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
Diag0 = Knn_diag - Qnn_diag
R0 = jitchol(Kmmi).T
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*gamma
"""
self.w = np.zeros(self.N)
self.gamma = np.zeros(self.M)
mu = np.zeros(self.N)
P = P0.copy()
R = R0.copy()
Diag = Diag0.copy()
Sigma_diag = Knn_diag
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update
dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i]
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = P[i,:].reshape(1,self.M)
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T
self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
mu = self.w + np.dot(P,self.gamma)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
P = (Diag / Diag0)[:,None] * P0
RPT0 = np.dot(R0,P0.T)
L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
R,info = linalg.flapack.dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
self.w = Diag * self.v_tilde
self.gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
mu = self.w + np.dot(P,self.gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()

View file

@ -0,0 +1,56 @@
import numpy as np
from likelihood import likelihood
class Gaussian(likelihood):
def __init__(self,data,variance=1.,normalize=False):
self.is_heteroscedastic = False
self.Nparams = 1
self.data = data
self.N,D = data.shape
self.Z = 0. # a correction factor which accounts for the approximation made
#normalisation
if normalize:
self._mean = data.mean(0)[None,:]
self._std = data.std(0)[None,:]
self.Y = (self.data - self._mean)/self._std
else:
self._mean = np.zeros((1,D))
self._std = np.ones((1,D))
self.Y = self.data
#TODO: make this work efficiently (only compute YYT if D>>N)
self.YYT = np.dot(self.Y,self.Y.T)
self.trYYT = np.trace(self.YYT)
self._set_params(np.asarray(variance))
def _get_params(self):
return np.asarray(self._variance)
def _get_param_names(self):
return ["noise variance"]
def _set_params(self,x):
self._variance = float(x)
self.covariance_matrix = np.eye(self.N)*self._variance
self.precision = 1./self._variance
def predictive_values(self,mu,var):
"""
Un-normalise the prediction and add the likelihood variance, then return the 5%, 95% interval
"""
mean = mu*self._std + self._mean
true_var = (var + self._variance)*self._std**2
_5pc = mean + - 2.*np.sqrt(true_var)
_95pc = mean + 2.*np.sqrt(true_var)
return mean, _5pc, _95pc
def fit_full(self):
"""
No approximations needed
"""
pass
def _gradients(self,partial):
return np.sum(partial)

View file

@ -0,0 +1,4 @@
from EP import EP
from Gaussian import Gaussian
# TODO: from Laplace import Laplace
import likelihood_functions as functions

View file

@ -0,0 +1,35 @@
import numpy as np
class likelihood:
"""
The atom for a likelihood class
This object interfaces the GP and the data. The most basic likelihood
(Gaussian) inherits directly from this, as does the EP algorithm
Some things must be defined for this to work properly:
self.Y : the effective Gaussian target of the GP
self.N, self.D : Y.shape
self.covariance_matrix : the effective (noise) covariance of the GP targets
self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
self.is_heteroscedastic : enables significant computational savings in GP
self.precision : a scalar or vector representation of the effective target precision
self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
"""
def __init__(self,data):
raise ValueError, "this class is not to be instantiated"
def _get_params(self):
raise NotImplementedError
def _get_param_names(self):
raise NotImplementedError
def _set_params(self,x):
raise NotImplementedError
def fit(self):
raise NotImplementedError
def _gradients(self,partial):
raise NotImplementedError

View file

@ -0,0 +1,134 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from ..util.plot import gpplot
class likelihood_function:
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
"""
def __init__(self,location=0,scale=1):
self.location = location
self.scale = scale
class probit(likelihood_function):
"""
Probit likelihood
Y is expected to take values in {-1,1}
-----
$$
L(x) = \\Phi (Y_i*f_i)
$$
"""
def moments_match(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
# TODO: some version of assert np.sum(np.abs(Y)-1) == 0, "Output values must be either -1 or 1"
if data_i == 0: data_i = -1 #NOTE Binary classification works better classes {-1,1}, 1D-plotting works better with classes {0,1}.
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = stats.norm.cdf(z)
phi = stats.norm.pdf(z)
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
return Z_hat, mu_hat, sigma2_hat
def predictive_values(self,mu,var):
"""
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
"""
mu = mu.flatten()
var = var.flatten()
mean = stats.norm.cdf(mu/np.sqrt(1+var))
p_025 = np.zeros(mu.shape)
p_975 = np.ones(mu.shape)
return mean, p_025, p_975
class Poisson(likelihood_function):
"""
Poisson likelihood
Y is expected to take values in {0,1,2,...}
-----
$$
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def moments_match(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
mu = v_i/tau_i
sigma = np.sqrt(1./tau_i)
def poisson_norm(f):
"""
Product of the likelihood and the cavity distribution
"""
pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
rate = np.exp( (f*self.scale)+self.location)
poisson = stats.poisson.pmf(float(data_i),rate)
return pdf_norm_f*poisson
def log_pnm(f):
"""
Log of poisson_norm
"""
return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*data_i)
"""
Golden Search and Simpson's Rule
--------------------------------
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
"""
#TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
#Golden search
golden_A = -1 if data_i == 0 else np.array([np.log(data_i),mu]).min() #Lower limit
golden_B = np.array([np.log(data_i),mu]).max() #Upper limit
golden_A = (golden_A - self.location)/self.scale
golden_B = (golden_B - self.location)/self.scale
opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
# Simpson's approximation
width = 3./np.log(max(data_i,2))
A = opt - width #Lower limit
B = opt + width #Upper limit
K = 10*int(np.log(max(data_i,150))) #Number of points in the grid, we DON'T want K to be the same number for every case
h = (B-A)/K # length of the intervals
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
first = zeroth*x
second = first*x
Z_hat = sum(zeroth)*h/3 # Zero-th moment
mu_hat = sum(first)*h/(3*Z_hat) # First moment
m2 = sum(second)*h/(3*Z_hat) # Second moment
sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat)
def predictive_values(self,mu,var):
"""
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
"""
mean = np.exp(mu*self.scale + self.location)
tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
p_025 = tmp[:,0]
p_975 = tmp[:,1]
return mean,p_025,p_975

View file

@ -5,10 +5,12 @@ import numpy as np
import pylab as pb import pylab as pb
import sys, pdb import sys, pdb
from GPLVM import GPLVM from GPLVM import GPLVM
from sparse_GP_regression import sparse_GP_regression from sparse_GP import sparse_GP
from GPy.util.linalg import pdinv from GPy.util.linalg import pdinv
from ..likelihoods import Gaussian
from .. import kern
class Bayesian_GPLVM(sparse_GP_regression, GPLVM): class Bayesian_GPLVM(sparse_GP, GPLVM):
""" """
Bayesian Gaussian Process Latent Variable Model Bayesian Gaussian Process Latent Variable Model
@ -20,18 +22,24 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
:type init: 'PCA'|'random' :type init: 'PCA'|'random'
""" """
def __init__(self, Y, Q, X = None, S = None, init='PCA', **kwargs): def __init__(self, Y, Q, init='PCA', M=10, Z=None, kernel=None, **kwargs):
if X == None: X = self.initialise_latent(init, Q, Y)
X = self.initialise_latent(init, Q, Y)
if S == None:
S = np.ones_like(X) * 1e-2
sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs) if Z is None:
Z = np.random.permutation(X.copy())[:M]
else:
assert Z.shape[1]==X.shape[1]
if kernel is None:
kernel = kern.rbf(Q) + kern.white(Q)
S = np.ones_like(X) * 1e-2#
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_uncertainty=S, **kwargs)
def _get_param_names(self): def _get_param_names(self):
X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
return (X_names + S_names + sparse_GP_regression._get_param_names(self)) return (X_names + S_names + sparse_GP._get_param_names(self))
def _get_params(self): def _get_params(self):
""" """
@ -39,17 +47,17 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
The resulting 1-D array has this structure: The resulting 1-D array has this structure:
=============================================================== ===============================================================
| mu | S | Z | beta | theta | | mu | S | Z | theta | beta |
=============================================================== ===============================================================
""" """
return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression._get_params(self))) return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP._get_params(self)))
def _set_params(self,x): def _set_params(self,x):
N, Q = self.N, self.Q N, Q = self.N, self.Q
self.X = x[:self.X.size].reshape(N,Q).copy() self.X = x[:self.X.size].reshape(N,Q).copy()
self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy() self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy()
sparse_GP_regression._set_params(self, x[(2*N*Q):]) sparse_GP._set_params(self, x[(2*N*Q):])
def dL_dmuS(self): def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty) dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty)
@ -58,17 +66,8 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
dKL_dS = (1. - (1./self.X_uncertainty))*0.5 return np.hstack((dL_dmu.flatten(), dL_dS.flatten()))
dKL_dmu = self.X
return np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
def KL_divergence(self):
var_mean = np.square(self.X).sum()
var_S = np.sum(self.X_uncertainty - np.log(self.X_uncertainty))
return 0.5*(var_mean + var_S) - 0.5*self.Q*self.N
def log_likelihood(self):
return sparse_GP_regression.log_likelihood(self) - self.KL_divergence()
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression._log_likelihood_gradients(self))) return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self)))

274
GPy/models/GP.py Normal file
View file

@ -0,0 +1,274 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from .. import kern
from ..core import model
from ..util.linalg import pdinv,mdot
from ..util.plot import gpplot,x_frame1D,x_frame2D, Tango
from ..likelihoods import EP
class GP(model):
"""
Gaussian Process model for regression and EP
:param X: input observations
:param kernel: a GPy kernel, defaults to rbf+white
:parm likelihood: a GPy likelihood
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
:rtype: model object
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
:type powerep: list
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
#FIXME normalize vs normalise
def __init__(self, X, likelihood, kernel, normalize_X=False, Xslices=None):
# parse arguments
self.Xslices = Xslices
self.X = X
assert len(self.X.shape)==2
self.N, self.Q = self.X.shape
assert isinstance(kernel, kern.kern)
self.kern = kernel
#here's some simple normalisation for the inputs
if normalize_X:
self._Xmean = X.mean(0)[None,:]
self._Xstd = X.std(0)[None,:]
self.X = (X.copy() - self._Xmean) / self._Xstd
if hasattr(self,'Z'):
self.Z = (self.Z - self._Xmean) / self._Xstd
else:
self._Xmean = np.zeros((1,self.X.shape[1]))
self._Xstd = np.ones((1,self.X.shape[1]))
self.likelihood = likelihood
#assert self.X.shape[0] == self.likelihood.Y.shape[0]
#self.N, self.D = self.likelihood.Y.shape
assert self.X.shape[0] == self.likelihood.data.shape[0]
self.N, self.D = self.likelihood.data.shape
model.__init__(self)
def _set_params(self,p):
self.kern._set_params_transformed(p[:self.kern.Nparam])
#self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas
self.K = self.kern.K(self.X,slices1=self.Xslices)
self.K += self.likelihood.covariance_matrix
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
#the gradient of the likelihood wrt the covariance matrix
if self.likelihood.YYT is None:
alpha = np.dot(self.Ki,self.likelihood.Y)
self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
else:
tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
self.dL_dK = 0.5*(tmp - self.D*self.Ki)
def _get_params(self):
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
def _get_param_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
"""
self.likelihood.fit_full(self.kern.K(self.X))
self._set_params(self._get_params()) # update the GP
def _model_fit_term(self):
"""
Computes the model fit using YYT if it's available
"""
if self.likelihood.YYT is None:
return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
else:
return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT))
def log_likelihood(self):
"""
The log marginal likelihood of the GP.
For an EP model, can be written as the log likelihood of a regression
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
"""
return -0.5*self.D*self.K_logdet + self._model_fit_term() + self.likelihood.Z
def _log_likelihood_gradients(self):
"""
The gradient of all parameters.
For the kernel parameters, use the chain rule via dL_dK
For the likelihood parameters, pass in alpha = K^-1 y
"""
return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self,_Xnew,slices=None, full_cov=False):
"""
Internal helper function for making predictions, does not account
for normalisation or likelihood
"""
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
KiKx = np.dot(self.Ki,Kx)
if full_cov:
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
var = Kxx - np.dot(KiKx.T,Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(_Xnew, slices=slices)
var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
var = var[:,None]
return mu, var
def predict(self,Xnew, slices=None, full_cov=False):
"""
Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.Q
:param slices: specifies which outputs kernel(s) the Xnew correspond to (see below)
:type slices: (None, list of slice objects, list of ints)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.D
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D
.. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
- If None, the new points covary throigh every kernel part (default)
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
- If a list of booleans, specifying which kernel parts are active
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
This is to allow for different normalisations of the output dimensions.
"""
#normalise X values
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
mu, var = self._raw_predict(Xnew, slices, full_cov)
#now push through likelihood TODO
mean, _025pm, _975pm = self.likelihood.predictive_values(mu, var)
return mean, var, _025pm, _975pm
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, full_cov=False):
"""
Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
:param samples: the number of a posteriori samples to plot
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_functions: which of the kernel functions to plot (additively)
:type which_functions: list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions using which_data and which_functions
Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
"""
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
if which_data=='all':
which_data = slice(None)
if self.X.shape[1] == 1:
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
if samples == 0:
m,v = self._raw_predict(Xnew, slices=which_functions)
gpplot(Xnew,m,m-2*np.sqrt(v),m+2*np.sqrt(v))
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
else:
m,v = self._raw_predict(Xnew, slices=which_functions,full_cov=True)
Ysim = np.random.multivariate_normal(m.flatten(),v,samples)
gpplot(Xnew,m,m-2*np.sqrt(np.diag(v)[:,None]),m+2*np.sqrt(np.diag(v))[:,None])
for i in range(samples):
pb.plot(Xnew,Ysim[i,:],Tango.coloursHex['darkBlue'],linewidth=0.25)
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
pb.xlim(xmin,xmax)
ymin,ymax = min(np.append(self.likelihood.Y,m-2*np.sqrt(np.diag(v)[:,None]))), max(np.append(self.likelihood.Y,m+2*np.sqrt(np.diag(v)[:,None])))
ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
pb.ylim(ymin,ymax)
if hasattr(self,'Z'):
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
elif self.X.shape[1] == 2:
resolution = resolution or 50
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits,resolution)
m,v = self._raw_predict(Xnew, slices=which_functions)
m = m.reshape(resolution,resolution).T
pb.contour(xx,yy,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=m.min(), vmax=m.max())
pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1])
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
# TODO include samples
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
if which_data=='all':
which_data = slice(None)
if self.X.shape[1] == 1:
Xu = self.X * self._Xstd + self._Xmean #NOTE self.X are the normalized values now
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
m, var, lower, upper = self.predict(Xnew, slices=which_functions)
gpplot(Xnew,m, lower, upper)
pb.plot(Xu[which_data],self.likelihood.data[which_data],'kx',mew=1.5)
ymin,ymax = min(np.append(self.likelihood.data,lower)), max(np.append(self.likelihood.data,upper))
ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
pb.xlim(xmin,xmax)
pb.ylim(ymin,ymax)
if hasattr(self,'Z'):
Zu = self.Z*self._Xstd + self._Xmean
pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
elif self.X.shape[1]==2: #FIXME
resolution = resolution or 50
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits,resolution)
x, y = np.linspace(xmin[0],xmax[0],resolution), np.linspace(xmin[1],xmax[1],resolution)
m, var, lower, upper = self.predict(Xnew, slices=which_functions)
m = m.reshape(resolution,resolution).T
pb.contour(x,y,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
Yf = self.likelihood.Y.flatten()
pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1])
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

View file

@ -8,9 +8,10 @@ import sys, pdb
from .. import kern from .. import kern
from ..core import model from ..core import model
from ..util.linalg import pdinv, PCA from ..util.linalg import pdinv, PCA
from GP_regression import GP_regression from GP import GP
from ..likelihoods import Gaussian
class GPLVM(GP_regression): class GPLVM(GP):
""" """
Gaussian Process Latent Variable Model Gaussian Process Latent Variable Model
@ -22,10 +23,13 @@ class GPLVM(GP_regression):
:type init: 'PCA'|'random' :type init: 'PCA'|'random'
""" """
def __init__(self, Y, Q, init='PCA', X = None, **kwargs): def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs):
if X is None: if X is None:
X = self.initialise_latent(init, Q, Y) X = self.initialise_latent(init, Q, Y)
GP_regression.__init__(self, X, Y, **kwargs) if kernel is None:
kernel = kern.rbf(Q) + kern.bias(Q)
likelihood = Gaussian(Y)
GP.__init__(self, X, likelihood, kernel, **kwargs)
def initialise_latent(self, init, Q, Y): def initialise_latent(self, init, Q, Y):
if init == 'PCA': if init == 'PCA':
@ -34,23 +38,19 @@ class GPLVM(GP_regression):
return np.random.randn(Y.shape[0], Q) return np.random.randn(Y.shape[0], Q)
def _get_param_names(self): def _get_param_names(self):
return (sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) return sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) + GP._get_param_names(self)
+ self.kern._get_param_names_transformed())
def _get_params(self): def _get_params(self):
return np.hstack((self.X.flatten(), self.kern._get_params_transformed())) return np.hstack((self.X.flatten(), GP._get_params(self)))
def _set_params(self,x): def _set_params(self,x):
self.X = x[:self.X.size].reshape(self.N,self.Q).copy() self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
GP_regression._set_params(self, x[self.X.size:]) GP._set_params(self, x[self.X.size:])
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
dL_dK = self.dL_dK() dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X)
dL_dtheta = self.kern.dK_dtheta(dL_dK,self.X) return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self)))
dL_dX = 2*self.kern.dK_dX(dL_dK,self.X)
return np.hstack((dL_dX.flatten(),dL_dtheta))
def plot(self): def plot(self):
assert self.Y.shape[1]==2 assert self.Y.shape[1]==2

View file

@ -1,160 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from scipy import stats, linalg
from .. import kern
from ..inference.Expectation_Propagation import Full
from ..inference.likelihoods import likelihood,probit#,poisson,gaussian
from ..core import model
from ..util.linalg import pdinv,jitchol
from ..util.plot import gpplot
class GP_EP(model):
def __init__(self,X,likelihood,kernel=None,epsilon_ep=1e-3,epsion_em=.1,powerep=[1.,1.]):
"""
Simple Gaussian Process with Non-Gaussian likelihood
Arguments
---------
:param X: input observations (NxD numpy.darray)
:param likelihood: a GPy likelihood (likelihood class)
:param kernel: a GPy kernel (kern class)
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1 (float)
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.] (list)
:rtype: GPy model class.
"""
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1])
assert isinstance(kernel,kern.kern), 'kernel is not a kern instance'
self.likelihood = likelihood
self.Y = self.likelihood.Y
self.kernel = kernel
self.X = X
self.N, self.D = self.X.shape
self.eta,self.delta = powerep
self.epsilon_ep = epsilon_ep
self.jitter = 1e-12
self.K = self.kernel.K(self.X)
model.__init__(self)
def _set_params(self,p):
self.kernel._set_params_transformed(p)
def _get_params(self):
return self.kernel._get_params_transformed()
def _get_param_names(self):
return self.kernel._get_param_names_transformed()
def approximate_likelihood(self):
self.ep_approx = Full(self.K,self.likelihood,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta])
self.ep_approx.fit_EP()
def posterior_param(self):
self.K = self.kernel.K(self.X)
self.Sroot_tilde_K = np.sqrt(self.ep_approx.tau_tilde)[:,None]*self.K
B = np.eye(self.N) + np.sqrt(self.ep_approx.tau_tilde)[None,:]*self.Sroot_tilde_K
#self.L = np.linalg.cholesky(B)
self.L = jitchol(B)
V,info = linalg.flapack.dtrtrs(self.L,self.Sroot_tilde_K,lower=1)
self.Sigma = self.K - np.dot(V.T,V)
self.mu = np.dot(self.Sigma,self.ep_approx.v_tilde)
def log_likelihood(self):
"""
Returns
-------
The EP approximation to the log-marginal likelihood
"""
self.posterior_param()
mu_ = self.ep_approx.v_/self.ep_approx.tau_
L1 =.5*sum(np.log(1+self.ep_approx.tau_tilde*1./self.ep_approx.tau_))-sum(np.log(np.diag(self.L)))
L2A =.5*np.sum((self.Sigma-np.diag(1./(self.ep_approx.tau_+self.ep_approx.tau_tilde))) * np.dot(self.ep_approx.v_tilde[:,None],self.ep_approx.v_tilde[None,:]))
L2B = .5*np.dot(mu_*(self.ep_approx.tau_/(self.ep_approx.tau_tilde+self.ep_approx.tau_)),self.ep_approx.tau_tilde*mu_ - 2*self.ep_approx.v_tilde)
L3 = sum(np.log(self.ep_approx.Z_hat))
return L1 + L2A + L2B + L3
def _log_likelihood_gradients(self):
dK_dp = self.kernel.dK_dtheta(self.X)
self.dK_dp = dK_dp
aux1,info_1 = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1)
b = self.ep_approx.v_tilde - np.sqrt(self.ep_approx.tau_tilde)*linalg.flapack.dtrtrs(self.L.T,aux1)[0]
U,info_u = linalg.flapack.dtrtrs(self.L,np.diag(np.sqrt(self.ep_approx.tau_tilde)),lower=1)
dL_dK = 0.5*(np.outer(b,b)-np.dot(U.T,U))
self.dL_dK = dL_dK
return np.array([np.sum(dK_dpi*dL_dK) for dK_dpi in dK_dp.T])
def predict(self,X):
#TODO: check output dimensions
self.posterior_param()
K_x = self.kernel.K(self.X,X)
Kxx = self.kernel.K(X)
aux1,info = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1)
aux2,info = linalg.flapack.dtrtrs(self.L.T, aux1,lower=0)
zeta = np.sqrt(self.ep_approx.tau_tilde)*aux2
f = np.dot(K_x.T,self.ep_approx.v_tilde-zeta)
v,info = linalg.flapack.dtrtrs(self.L,np.sqrt(self.ep_approx.tau_tilde)[:,None]*K_x,lower=1)
variance = Kxx - np.dot(v.T,v)
vdiag = np.diag(variance)
y=self.likelihood.predictive_mean(f,vdiag)
return f,vdiag,y
def plot(self):
"""
Plot the fitted model: training function values, inducing points used, mean estimate and confidence intervals.
"""
if self.X.shape[1]==1:
pb.figure()
xmin,xmax = self.X.min(),self.X.max()
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
Xnew = np.linspace(xmin,xmax,100)[:,None]
mu_f, var_f, mu_phi = self.predict(Xnew)
pb.subplot(211)
self.likelihood.plot1Da(X_new=Xnew,Mean_new=mu_f,Var_new=var_f,X_u=self.X,Mean_u=self.mu,Var_u=np.diag(self.Sigma))
pb.subplot(212)
self.likelihood.plot1Db(self.X,Xnew,mu_phi)
elif self.X.shape[1]==2:
pb.figure()
x1min,x1max = self.X[:,0].min(0),self.X[:,0].max(0)
x2min,x2max = self.X[:,1].min(0),self.X[:,1].max(0)
x1min, x1max = x1min-0.2*(x1max-x1min), x1max+0.2*(x1max-x1min)
x2min, x2max = x2min-0.2*(x2max-x2min), x2max+0.2*(x1max-x1min)
axis1 = np.linspace(x1min,x1max,50)
axis2 = np.linspace(x2min,x2max,50)
XX1, XX2 = [e.flatten() for e in np.meshgrid(axis1,axis2)]
Xnew = np.c_[XX1.flatten(),XX2.flatten()]
f,v,p = self.predict(Xnew)
self.likelihood.plot2D(self.X,Xnew,p)
else:
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
def em(self,max_f_eval=1e4,epsilon=.1,plot_all=False): #TODO check this makes sense
"""
Fits sparse_EP and optimizes the hyperparametes iteratively until convergence is achieved.
"""
self.epsilon_em = epsilon
log_likelihood_change = self.epsilon_em + 1.
self.parameters_path = [self.kernel._get_params()]
self.approximate_likelihood()
self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]]
self.log_likelihood_path = [self.log_likelihood()]
iteration = 0
while log_likelihood_change > self.epsilon_em:
print 'EM iteration', iteration
self.optimize(max_f_eval = max_f_eval)
log_likelihood_new = self.log_likelihood()
log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1]
if log_likelihood_change < 0:
print 'log_likelihood decrement'
self.kernel._set_params_transformed(self.parameters_path[-1])
self.kernM._set_params_transformed(self.parameters_path[-1])
else:
self.approximate_likelihood()
self.log_likelihood_path.append(self.log_likelihood())
self.parameters_path.append(self.kernel._get_params())
self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde])
iteration += 1

View file

@ -1,18 +1,18 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
import pylab as pb from GP import GP
from .. import likelihoods
from .. import kern from .. import kern
from ..core import model
from ..util.linalg import pdinv,mdot
from ..util.plot import gpplot, Tango
class GP_regression(model): class GP_regression(GP):
""" """
Gaussian Process model for regression Gaussian Process model for regression
This is a thin wrapper around the GP class, with a set of sensible defalts
:param X: input observations :param X: input observations
:param Y: observed values :param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white :param kernel: a GPy kernel, defaults to rbf+white
@ -29,199 +29,8 @@ class GP_regression(model):
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None): def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None):
if kernel is None: if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1]) kernel = kern.rbf(X.shape[1])
# parse arguments likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
self.Xslices = Xslices
assert isinstance(kernel, kern.kern)
self.kern = kernel
self.X = X
self.Y = Y
assert len(self.X.shape)==2
assert len(self.Y.shape)==2
assert self.X.shape[0] == self.Y.shape[0]
self.N, self.D = self.Y.shape
self.N, self.Q = self.X.shape
#here's some simple normalisation GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
if normalize_X:
self._Xmean = X.mean(0)[None,:]
self._Xstd = X.std(0)[None,:]
self.X = (X.copy() - self._Xmean) / self._Xstd
if hasattr(self,'Z'):
self.Z = (self.Z - self._Xmean) / self._Xstd
else:
self._Xmean = np.zeros((1,self.X.shape[1]))
self._Xstd = np.ones((1,self.X.shape[1]))
if normalize_Y:
self._Ymean = Y.mean(0)[None,:]
self._Ystd = Y.std(0)[None,:]
self.Y = (Y.copy()- self._Ymean) / self._Ystd
else:
self._Ymean = np.zeros((1,self.Y.shape[1]))
self._Ystd = np.ones((1,self.Y.shape[1]))
if self.D > self.N:
# then it's more efficient to store YYT
self.YYT = np.dot(self.Y, self.Y.T)
else:
self.YYT = None
model.__init__(self)
def _set_params(self,p):
self.kern._set_params_transformed(p)
self.K = self.kern.K(self.X,slices1=self.Xslices)
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
def _get_params(self):
return self.kern._get_params_transformed()
def _get_param_names(self):
return self.kern._get_param_names_transformed()
def _model_fit_term(self):
"""
Computes the model fit using YYT if it's available
"""
if self.YYT is None:
return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
else:
return -0.5*np.sum(np.multiply(self.Ki, self.YYT))
def log_likelihood(self):
complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - 0.5*self.D*self.K_logdet
return complexity_term + self._model_fit_term()
def dL_dK(self):
if self.YYT is None:
alpha = np.dot(self.Ki,self.Y)
dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
else:
dL_dK = 0.5*(mdot(self.Ki, self.YYT, self.Ki) - self.D*self.Ki)
return dL_dK
def _log_likelihood_gradients(self):
return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X)
def predict(self,Xnew, slices=None, full_cov=False):
"""
Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.Q
:param slices: specifies which outputs kernel(s) the Xnew correspond to (see below)
:type slices: (None, list of slice objects, list of ints)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.D
:rtype: posterior variance, a Numpy array, Nnew x Nnew x (self.D)
.. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
- If None, the new points covary throigh every kernel part (default)
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
- If a list of booleans, specifying which kernel parts are active
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
This is to allow for different normalisations of the output dimensions.
"""
#normalise X values
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
mu, var = self._raw_predict(Xnew, slices, full_cov)
#un-normalise
mu = mu*self._Ystd + self._Ymean
if full_cov:
if self.D==1:
var *= np.square(self._Ystd)
else:
var = var[:,:,None] * np.square(self._Ystd)
else:
if self.D==1:
var *= np.square(np.squeeze(self._Ystd))
else:
var = var[:,None] * np.square(self._Ystd)
return mu,var
def _raw_predict(self,_Xnew,slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation"""
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
mu = np.dot(np.dot(Kx.T,self.Ki),self.Y)
KiKx = np.dot(self.Ki,Kx)
if full_cov:
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
var = Kxx - np.dot(KiKx.T,Kx)
else:
Kxx = self.kern.Kdiag(_Xnew, slices=slices)
var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
return mu, var
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
"""
:param samples: the number of a posteriori samples to plot
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_functions: which of the kernel functions to plot (additively)
:type which_functions: list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions using which_data and which_functions
"""
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
if which_data=='all':
which_data = slice(None)
X = self.X[which_data,:]
Y = self.Y[which_data,:]
Xorig = X*self._Xstd + self._Xmean
Yorig = Y*self._Ystd + self._Ymean
if plot_limits is None:
xmin,xmax = Xorig.min(0),Xorig.max(0)
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
elif len(plot_limits)==2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
if self.X.shape[1]==1:
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
m,v = self.predict(Xnew,slices=which_functions)
gpplot(Xnew,m,v)
if samples:
s = np.random.multivariate_normal(m.flatten(),v,samples)
pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
pb.plot(Xorig,Yorig,'kx',mew=1.5)
pb.xlim(xmin,xmax)
elif self.X.shape[1]==2:
resolution = 50 or resolution
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xtest = np.vstack((xx.flatten(),yy.flatten())).T
zz,vv = self.predict(Xtest,slices=which_functions)
zz = zz.reshape(resolution,resolution)
pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1])
else:
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"

View file

@ -2,12 +2,12 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from GP import GP
from GP_regression import GP_regression from GP_regression import GP_regression
from sparse_GP import sparse_GP
from sparse_GP_regression import sparse_GP_regression from sparse_GP_regression import sparse_GP_regression
from GPLVM import GPLVM from GPLVM import GPLVM
from warped_GP import warpedGP from warped_GP import warpedGP
from GP_EP import GP_EP
from generalized_FITC import generalized_FITC
from sparse_GPLVM import sparse_GPLVM from sparse_GPLVM import sparse_GPLVM
from uncollapsed_sparse_GP import uncollapsed_sparse_GP #from uncollapsed_sparse_GP import uncollapsed_sparse_GP
from BGPLVM import Bayesian_GPLVM from BGPLVM import Bayesian_GPLVM

View file

@ -1,241 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from scipy import stats, linalg
from .. import kern
from ..core import model
from ..util.linalg import pdinv,mdot
from ..util.plot import gpplot
from ..inference.Expectation_Propagation import FITC
from ..inference.likelihoods import likelihood,probit
class generalized_FITC(model):
def __init__(self,X,likelihood,kernel=None,inducing=10,epsilon_ep=1e-3,powerep=[1.,1.]):
"""
Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
:param X: input observations
:param likelihood: Output's likelihood (likelihood class)
:param kernel: a GPy kernel
:param inducing: Either an array specifying the inducing points location or a scalar defining their number.
:param epsilon_ep: EP convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
"""
assert isinstance(kernel,kern.kern)
self.likelihood = likelihood
self.Y = self.likelihood.Y
self.kernel = kernel
self.X = X
self.N, self.D = self.X.shape
assert self.Y.shape[0] == self.N
if type(inducing) == int:
self.M = inducing
self.Z = (np.random.random_sample(self.D*self.M)*(self.X.max()-self.X.min())+self.X.min()).reshape(self.M,-1)
elif type(inducing) == np.ndarray:
self.Z = inducing
self.M = self.Z.shape[0]
self.eta,self.delta = powerep
self.epsilon_ep = epsilon_ep
self.jitter = 1e-12
model.__init__(self)
def _set_params(self,p):
self.kernel._set_params_transformed(p[0:-self.Z.size])
self.Z = p[-self.Z.size:].reshape(self.M,self.D)
def _get_params(self):
return np.hstack([self.kernel._get_params_transformed(),self.Z.flatten()])
def _get_param_names(self):
return self.kernel._get_param_names_transformed()+['iip_%i'%i for i in range(self.Z.size)]
def approximate_likelihood(self):
self.Kmm = self.kernel.K(self.Z)
self.Knm = self.kernel.K(self.X,self.Z)
self.Knn_diag = self.kernel.Kdiag(self.X)
self.ep_approx = FITC(self.Kmm,self.likelihood,self.Knm.T,self.Knn_diag,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta])
self.ep_approx.fit_EP()
def posterior_param(self):
self.Knn_diag = self.kernel.Kdiag(self.X)
self.Kmm = self.kernel.K(self.Z)
self.Kmmi, self.Lmm, self.Lmmi, self.Kmm_logdet = pdinv(self.Kmm)
self.Knm = self.kernel.K(self.X,self.Z)
self.KmmiKmn = np.dot(self.Kmmi,self.Knm.T)
self.Qnn = np.dot(self.Knm,self.KmmiKmn)
self.Diag0 = self.Knn_diag - np.diag(self.Qnn)
self.R0 = np.linalg.cholesky(self.Kmmi).T
self.Taut = self.ep_approx.tau_tilde/(1.+ self.ep_approx.tau_tilde*self.Diag0)
self.KmnTaut = self.Knm.T*self.Taut[None,:]
self.KmnTautKnm = np.dot(self.KmnTaut, self.Knm)
self.Woodbury_inv, self.Wood_L, self.Wood_Li, self.Woodbury_logdet = pdinv(self.Kmm + self.KmnTautKnm)
self.Qnn_diag = self.Knn_diag - np.diag(self.Qnn) + 1./self.ep_approx.tau_tilde
self.Qi = -np.dot(self.KmnTaut.T, np.dot(self.Woodbury_inv,self.KmnTaut)) + np.diag(self.Taut)
self.hld = 0.5*np.sum(np.log(self.Diag0 + 1./self.ep_approx.tau_tilde)) - 0.5*self.Kmm_logdet + 0.5*self.Woodbury_logdet
self.Diag = self.Diag0/(1.+ self.Diag0 * self.ep_approx.tau_tilde)
self.P = (self.Diag / self.Diag0)[:,None] * self.Knm
self.RPT0 = np.dot(self.R0,self.Knm.T)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.R0,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
self.w = self.Diag * self.ep_approx.v_tilde
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.ep_approx.v_tilde))
self.mu = self.w + np.dot(self.P,self.gamma)
self.mu_tilde = (self.ep_approx.v_tilde/self.ep_approx.tau_tilde)[:,None]
def log_likelihood(self):
self.posterior_param()
self.YYT = np.dot(self.mu_tilde,self.mu_tilde.T)
A = -self.hld
B = -.5*np.sum(self.Qi*self.YYT)
C = sum(np.log(self.ep_approx.Z_hat))
D = .5*np.sum(np.log(1./self.ep_approx.tau_tilde + 1./self.ep_approx.tau_))
E = .5*np.sum((self.ep_approx.v_/self.ep_approx.tau_ - self.mu_tilde.flatten())**2/(1./self.ep_approx.tau_ + 1./self.ep_approx.tau_tilde))
return A + B + C + D + E
def _log_likelihood_gradients(self):
dKmm_dtheta = self.kernel.dK_dtheta(self.Z)
dKnn_dtheta = self.kernel.dK_dtheta(self.X)
dKmn_dtheta = self.kernel.dK_dtheta(self.Z,self.X)
dKmm_dZ = -self.kernel.dK_dX(self.Z)
dKnm_dZ = -self.kernel.dK_dX(self.X,self.Z)
tmp = [np.dot(dKmn_dtheta_i,self.KmmiKmn) for dKmn_dtheta_i in dKmn_dtheta.T]
dQnn_dtheta = [tmp_i + tmp_i.T - np.dot(np.dot(self.KmmiKmn.T,dKmm_dtheta_i),self.KmmiKmn) for tmp_i,dKmm_dtheta_i in zip(tmp,dKmm_dtheta.T)]
dDiag0_dtheta = [np.diag(dKnn_dtheta_i) - np.diag(dQnn_dtheta_i) for dKnn_dtheta_i,dQnn_dtheta_i in zip(dKnn_dtheta.T,dQnn_dtheta)]
dQ_dtheta = [np.diag(dDiag0_dtheta_i) + dQnn_dtheta_i for dDiag0_dtheta_i,dQnn_dtheta_i in zip(dDiag0_dtheta,dQnn_dtheta)]
dW_dtheta = [dKmm_dtheta_i + 2*np.dot(self.KmnTaut,dKmn_dtheta_i) - np.dot(self.KmnTaut*dDiag0_dtheta_i,self.KmnTaut.T) for dKmm_dtheta_i,dDiag0_dtheta_i,dKmn_dtheta_i in zip(dKmm_dtheta.T,dDiag0_dtheta,dKmn_dtheta.T)]
QiY = np.dot(self.Qi, self.mu_tilde)
QiYYQi = np.outer(QiY,QiY)
WiKmnTaut = np.dot(self.Woodbury_inv,self.KmnTaut)
K_Y = np.dot(self.KmmiKmn,QiY)
# gradient - theta
Atheta = [-0.5*np.dot(self.Taut,dDiag0_dtheta_i) + 0.5*np.sum(self.Kmmi*dKmm_dtheta_i) - 0.5*np.sum(self.Woodbury_inv*dW_dtheta_i) for dDiag0_dtheta_i,dKmm_dtheta_i,dW_dtheta_i in zip(dDiag0_dtheta,dKmm_dtheta.T,dW_dtheta)]
Btheta = np.array([0.5*np.sum(QiYYQi*dQ_dtheta_i) for dQ_dtheta_i in dQ_dtheta])
dL_dtheta = Atheta + Btheta
# gradient - Z
# Az
dQnn_dZ_diag_a2 = (np.array([d[:,:,None]*self.KmmiKmn[:,:,None] for d in dKnm_dZ.transpose(2,0,1)]).reshape(self.D,self.M,self.N)).transpose(1,2,0)
dQnn_dZ_diag_b2 = (np.array([(self.KmmiKmn*np.sum(d[:,:,None]*self.KmmiKmn,-2))[:,:,None] for d in dKmm_dZ.transpose(2,0,1)]).reshape(self.D,self.M,self.N)).transpose(1,2,0)
dQnn_dZ_diag = dQnn_dZ_diag_a2 - dQnn_dZ_diag_b2
d_hld_Diag1_dZ = -np.sum(np.dot(self.KmmiKmn*self.Taut,self.KmmiKmn.T)[:,:,None]*dKmm_dZ,-2) + np.sum((self.KmmiKmn*self.Taut)[:,:,None]*dKnm_dZ,-2)
d_hld_Kmm_dZ = np.sum(self.Kmmi[:,:,None]*dKmm_dZ,-2)
d_hld_W_dZ1 = np.sum(WiKmnTaut[:,:,None]*dKnm_dZ,-2)
d_hld_W_dZ3 = np.sum(self.Woodbury_inv[:,:,None]*dKmm_dZ,-2)
d_hld_W_dZ2 = np.array([np.sum(np.sum(WiKmnTaut.T*d[:,:,None]*self.KmnTaut.T,-2),-1) for d in dQnn_dZ_diag.transpose(2,0,1)]).T
Az = d_hld_Diag1_dZ + d_hld_Kmm_dZ - d_hld_W_dZ1 - d_hld_W_dZ2 - d_hld_W_dZ3
# Bz
Bz2 = np.sum(np.dot(K_Y,QiY.T)[:,:,None]*dKnm_dZ,-2)
Bz3 = - np.sum(np.dot(K_Y,K_Y.T)[:,:,None]*dKmm_dZ,-2)
Bz1 = -np.array([np.sum((QiY**2)*d[:,:,None],-2) for d in dQnn_dZ_diag.transpose(2,0,1)]).reshape(self.D,self.M).T
Bz = Bz1 + Bz2 + Bz3
dL_dZ = (Az + Bz).flatten()
return np.hstack([dL_dtheta, dL_dZ])
def predict(self,X):
"""
Make a prediction for the vsGP model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
#TODO: check output dimensions
K_x = self.kernel.K(self.Z,X)
Kxx = self.kernel.K(X)
#K_x = self.kernM.cross.K(X)
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.M) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
#self.mu_u = mu_u
#self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u,self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
KR0T = np.dot(K_x.T,self.R0.T)
mu_star = np.dot(KR0T,mu_H)
sigma2_star = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
vdiag = np.diag(sigma2_star)
# q(y_star|y) = non-gaussian posterior probability of class membership
p = self.likelihood.predictive_mean(mu_star,vdiag)
return mu_star,vdiag,p
def plot(self):
"""
Plot the fitted model: training function values, inducing points used, mean estimate and confidence intervals.
"""
if self.X.shape[1]==1:
pb.figure()
xmin,xmax = np.r_[self.X,self.Z].min(),np.r_[self.X,self.Z].max()
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
Xnew = np.linspace(xmin,xmax,100)[:,None]
mu_f, var_f, mu_phi = self.predict(Xnew)
self.mu_inducing,self.var_diag_inducing,self.phi_inducing = self.predict(self.Z)
pb.subplot(211)
self.likelihood.plot1Da(X_new=Xnew,Mean_new=mu_f,Var_new=var_f,X_u=self.Z,Mean_u=self.mu_inducing,Var_u=self.var_diag_inducing)
pb.subplot(212)
self.likelihood.plot1Db(self.X,Xnew,mu_phi,self.Z)
elif self.X.shape[1]==2:
pb.figure()
x1min,x1max = self.X[:,0].min(0),self.X[:,0].max(0)
x2min,x2max = self.X[:,1].min(0),self.X[:,1].max(0)
x1min, x1max = x1min-0.2*(x1max-x1min), x1max+0.2*(x1max-x1min)
x2min, x2max = x2min-0.2*(x2max-x2min), x2max+0.2*(x1max-x1min)
axis1 = np.linspace(x1min,x1max,50)
axis2 = np.linspace(x2min,x2max,50)
XX1, XX2 = [e.flatten() for e in np.meshgrid(axis1,axis2)]
Xnew = np.c_[XX1.flatten(),XX2.flatten()]
f,v,p = self.predict(Xnew)
self.likelihood.plot2D(self.X,Xnew,p,self.Z)
else:
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
def em(self,max_f_eval=1e4,epsilon=.1,plot_all=False): #TODO check this makes sense
"""
Fits sparse_EP and optimizes the hyperparametes iteratively until convergence is achieved.
"""
self.epsilon_em = epsilon
log_likelihood_change = self.epsilon_em + 1.
self.parameters_path = [self.kernel._get_params()]
self.approximate_likelihood()
self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]]
self.inducing_inputs_path = [self.Z]
self.log_likelihood_path = [self.log_likelihood()]
iteration = 0
while log_likelihood_change > self.epsilon_em:
print 'EM iteration', iteration
self.optimize(max_f_eval = max_f_eval)
log_likelihood_new = self.log_likelihood()
log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1]
if log_likelihood_change < 0:
print 'log_likelihood decrement'
self.kernel._set_params_transformed(self.parameters_path[-1])
self.kernM = self.kernel.copy()
slef.kernM.expand_X(self.iducing_inputs_path[-1])
self.__init__(self.kernel,self.likelihood,kernM=self.kernM,powerep=[self.eta,self.delta],epsilon_ep = self.epsilon_ep, epsilon_em = self.epsilon_em)
else:
self.approximate_likelihood()
self.log_likelihood_path.append(self.log_likelihood())
self.parameters_path.append(self.kernel._get_params())
self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde])
self.inducing_inputs_path.append(self.Z)
iteration += 1

217
GPy/models/sparse_GP.py Normal file
View file

@ -0,0 +1,217 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
from ..util.plot import gpplot
from .. import kern
from GP import GP
#Still TODO:
# make use of slices properly (kernel can now do this)
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
class sparse_GP(GP):
"""
Variational sparse GP model
:param X: inputs
:type X: np.ndarray (N x Q)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel : the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, likelihood, kernel, Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False):
self.scale_factor = 1.0# a scaling factor to help keep the algorithm stable
self.Z = Z
self.Zslices = Zslices
self.Xslices = Xslices
self.M = Z.shape[0]
self.likelihood = likelihood
if X_uncertainty is None:
self.has_uncertain_inputs=False
else:
assert X_uncertainty.shape==X.shape
self.has_uncertain_inputs=True
self.X_uncertainty = X_uncertainty
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
#normalise X uncertainty also
if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd)
def _computations(self):
# TODO find routine to multiply triangular matrices
#TODO: slices for psi statistics (easy enough)
sf = self.scale_factor
sf2 = sf**2
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty)
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
if self.likelihood.is_heteroscedastic:
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.reshape(self.N,1,1)/sf2)).sum(0)
#TODO: what is the likelihood is heterscedatic and there are multiple independent outputs?
else:
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
self.psi1 = self.kern.K(self.Z,self.X)
if self.likelihood.is_heteroscedastic:
tmp = self.psi1*(np.sqrt(self.likelihood.precision.reshape(self.N,1))/sf)
else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:] # TODO: remove me for efficiency and stability
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
self.B = np.eye(self.M)/sf2 + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
# Compute dL_dpsi # FIXME: this is untested for the het. case
self.dL_dpsi0 = - 0.5 * self.D * self.likelihood.precision * np.ones(self.N)
self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
if self.likelihood.is_heteroscedastic:
self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
else:
self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi[None,:,:] # dB
self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C[None,:,:] # dC
self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E[None,:,:] # dD
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
#the partial derivative vector for the likelihood
if self.likelihood.Nparams ==0:
#save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
#self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA
#self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB
#self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC
#self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
else:
#likelihood is not heterscedatic
beta = self.likelihood.precision
dbeta = 0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y))
dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2)
dbeta += - 0.5 * self.D * np.sum(self.Bi*self.A)/beta
dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta
self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
self._computations()
def _get_params(self):
return np.hstack([self.Z.flatten(),GP._get_params(self)])
def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + GP._get_param_names(self)
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2
if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
return A+B+C+D
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty)
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta
def dL_dZ(self):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
return dL_dZ
def _raw_predict(self, Xnew, slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
def plot(self, *args, **kwargs):
"""
Plot the fitted model: just call the GP plot function and then add inducing inputs
"""
GP.plot(self,*args,**kwargs)
if self.Q==1:
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
if self.Q==2:
pb.plot(self.Z[:,0],self.Z[:,1],'wo')

View file

@ -1,205 +1,46 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
import pylab as pb from sparse_GP import sparse_GP
from ..util.linalg import mdot, jitchol, chol_inv, pdinv from .. import likelihoods
from ..util.plot import gpplot
from .. import kern from .. import kern
from ..inference.likelihoods import likelihood from ..inference.likelihoods import likelihood
from GP_regression import GP_regression from GP_regression import GP_regression
#Still TODO: class sparse_GP_regression(sparse_GP):
# make use of slices properly (kernel can now do this)
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
class sparse_GP_regression(GP_regression):
""" """
Variational sparse GP model (Regression) Gaussian Process model for regression
This is a thin wrapper around the GP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
:rtype: model object
.. Note:: Multiple independent outputs are allowed using columns of Y
:param X: inputs
:type X: np.ndarray (N x Q)
:param Y: observed data
:type Y: np.ndarray of observations (N x D)
:param kernel : the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param beta: noise precision. TODO> ignore beta if doing EP
:type beta: float
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
""" """
def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False): def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None,Z=None, M=10):
self.scale_factor = 100.0 #kern defaults to rbf
self.beta = beta if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
#Z defaults to a subset of the data
if Z is None: if Z is None:
self.Z = np.random.permutation(X.copy())[:M] Z = np.random.permutation(X.copy())[:M]
self.M = M
else: else:
assert Z.shape[1]==X.shape[1] assert Z.shape[1]==X.shape[1]
self.Z = Z
self.M = Z.shape[0]
if X_uncertainty is None:
self.has_uncertain_inputs=False
else:
assert X_uncertainty.shape==X.shape
self.has_uncertain_inputs=True
self.X_uncertainty = X_uncertainty
GP_regression.__init__(self, X, Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y) #likelihood defaults to Gaussian
self.trYYT = np.sum(np.square(self.Y)) likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
#normalise X uncertainty also sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X, Xslices=Xslices)
if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd)
def _computations(self):
# TODO find routine to multiply triangular matrices
#TODO: slices for psi statistics (easy enough)
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
self.psi2_beta_scaled = (self.psi2*(self.beta/self.scale_factor**2)).sum(0)
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
self.psi1 = self.kern.K(self.Z,self.X)
#self.psi2 = np.dot(self.psi1,self.psi1.T)
#self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:]
tmp = self.psi1/(self.scale_factor/np.sqrt(self.beta))
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
sf = self.scale_factor
sf2 = sf**2
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)#+np.eye(self.M)*1e-3)
self.V = (self.beta/self.scale_factor)*self.Y
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
self.B = np.eye(self.M)/sf2 + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
# Compute dL_dpsi
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi[None,:,:] # dB
self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C[None,:,:] # dC
self.dL_dpsi2 += - 0.5 * self.beta * self.E[None,:,:] # dD
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.beta = p[self.M*self.Q]
self.kern._set_params(p[self.Z.size + 1:])
self._computations()
def _get_params(self):
return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) -0.5*self.beta*self.trYYT
B = -0.5*self.D*(self.beta*self.psi0-np.trace(self.A)*sf2)
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
return A+B+C+D
def _log_likelihood_gradients(self):
return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
def dL_dbeta(self):
"""
Compute the gradient of the log likelihood wrt beta.
"""
#TODO: suport heteroscedatic noise
sf2 = self.scale_factor**2
dA_dbeta = 0.5 * self.N*self.D/self.beta - 0.5 * self.trYYT
dB_dbeta = - 0.5 * self.D * (self.psi0 - np.trace(self.A)/self.beta*sf2)
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/self.beta
return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta)
def dL_dtheta(self):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta
def dL_dZ(self):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
else:
#re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
return dL_dZ
def _raw_predict(self, Xnew, slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case.
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case.
return mu,var
def plot(self, *args, **kwargs):
"""
Plot the fitted model: just call the GP_regression plot function and then add inducing inputs
"""
GP_regression.plot(self,*args,**kwargs)
if self.Q==1:
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
if self.Q==2:
pb.plot(self.Z[:,0],self.Z[:,1],'wo')

View file

@ -6,7 +6,7 @@ import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv from ..util.linalg import mdot, jitchol, chol_inv, pdinv
from ..util.plot import gpplot from ..util.plot import gpplot
from .. import kern from .. import kern
from ..inference.likelihoods import likelihood from ..likelihoods import likelihood
from sparse_GP_regression import sparse_GP_regression from sparse_GP_regression import sparse_GP_regression
class uncollapsed_sparse_GP(sparse_GP_regression): class uncollapsed_sparse_GP(sparse_GP_regression):
@ -136,8 +136,8 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
#dL_dm = np.dot(self.Kmmi,self.psi1V) - np.dot(self.Lambda,self.q_u_mean) #dL_dm = np.dot(self.Kmmi,self.psi1V) - np.dot(self.Lambda,self.q_u_mean)
dL_dm = np.dot(self.Kmmi,self.psi1V) - self.q_u_canonical[0] dL_dm = np.dot(self.Kmmi,self.psi1V) - self.q_u_canonical[0]
#dL_dSim = #dL_dSim =
#dL_dmhSi = #dL_dmhSi =
return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) # natgrad only, grad TODO return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) # natgrad only, grad TODO

View file

@ -154,17 +154,16 @@ class GradientTests(unittest.TestCase):
m.constrain_positive('(linear|bias|white)') m.constrain_positive('(linear|bias|white)')
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_GP_EP(self): def test_GP_EP_probit(self):
return # Disabled TODO
N = 20 N = 20
X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None] X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
k = GPy.kern.rbf(1) + GPy.kern.white(1) Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None] kernel = GPy.kern.rbf(1)
likelihood = GPy.inference.likelihoods.probit(Y) distribution = GPy.likelihoods.likelihood_functions.probit()
m = GPy.models.GP_EP(X,likelihood,k) likelihood = GPy.likelihoods.EP(Y, distribution)
m.constrain_positive('(var|len)') m = GPy.models.GP(X, likelihood, kernel)
m.approximate_likelihood() m.ensure_default_constraints()
self.assertTrue(m.checkgrad()) self.assertTrue(m.EPEM)
@unittest.skip("FITC will be broken for a while") @unittest.skip("FITC will be broken for a while")
def test_generalized_FITC(self): def test_generalized_FITC(self):

View file

@ -3,7 +3,6 @@
import matplotlib as mpl import matplotlib as mpl
import pylab as pb import pylab as pb
import sys import sys
#sys.path.append('/home/james/mlprojects/sitran_cluster/') #sys.path.append('/home/james/mlprojects/sitran_cluster/')
@ -15,12 +14,12 @@ def removeRightTicks(ax=None):
ax = ax or pb.gca() ax = ax or pb.gca()
for i, line in enumerate(ax.get_yticklines()): for i, line in enumerate(ax.get_yticklines()):
if i%2 == 1: # odd indices if i%2 == 1: # odd indices
line.set_visible(False) line.set_visible(False)
def removeUpperTicks(ax=None): def removeUpperTicks(ax=None):
ax = ax or pb.gca() ax = ax or pb.gca()
for i, line in enumerate(ax.get_xticklines()): for i, line in enumerate(ax.get_xticklines()):
if i%2 == 1: # odd indices if i%2 == 1: # odd indices
line.set_visible(False) line.set_visible(False)
def fewerXticks(ax=None,divideby=2): def fewerXticks(ax=None,divideby=2):
ax = ax or pb.gca() ax = ax or pb.gca()
ax.set_xticks(ax.get_xticks()[::divideby]) ax.set_xticks(ax.get_xticks()[::divideby])
@ -126,8 +125,6 @@ cdict_RB = {'red' :((0.,coloursRGB['mediumRed'][0]/256.,coloursRGB['mediumRed'][
'blue':((0.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.), 'blue':((0.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.),
(.5,coloursRGB['mediumPurple'][2]/256.,coloursRGB['mediumPurple'][2]/256.), (.5,coloursRGB['mediumPurple'][2]/256.,coloursRGB['mediumPurple'][2]/256.),
(1.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.))} (1.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.))}
cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256)
cdict_BGR = {'red' :((0.,coloursRGB['mediumBlue'][0]/256.,coloursRGB['mediumBlue'][0]/256.), cdict_BGR = {'red' :((0.,coloursRGB['mediumBlue'][0]/256.,coloursRGB['mediumBlue'][0]/256.),
(.5,coloursRGB['mediumGreen'][0]/256.,coloursRGB['mediumGreen'][0]/256.), (.5,coloursRGB['mediumGreen'][0]/256.,coloursRGB['mediumGreen'][0]/256.),
@ -138,7 +135,7 @@ cdict_BGR = {'red' :((0.,coloursRGB['mediumBlue'][0]/256.,coloursRGB['mediumBlue
'blue':((0.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.), 'blue':((0.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.),
(.5,coloursRGB['mediumGreen'][2]/256.,coloursRGB['mediumGreen'][2]/256.), (.5,coloursRGB['mediumGreen'][2]/256.,coloursRGB['mediumGreen'][2]/256.),
(1.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.))} (1.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.))}
cmap_BGR = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_BGR,256)
cdict_Alu = {'red' :((0./5,coloursRGB['Aluminium1'][0]/256.,coloursRGB['Aluminium1'][0]/256.), cdict_Alu = {'red' :((0./5,coloursRGB['Aluminium1'][0]/256.,coloursRGB['Aluminium1'][0]/256.),
(1./5,coloursRGB['Aluminium2'][0]/256.,coloursRGB['Aluminium2'][0]/256.), (1./5,coloursRGB['Aluminium2'][0]/256.,coloursRGB['Aluminium2'][0]/256.),
@ -158,13 +155,12 @@ cdict_Alu = {'red' :((0./5,coloursRGB['Aluminium1'][0]/256.,coloursRGB['Aluminiu
(3./5,coloursRGB['Aluminium4'][2]/256.,coloursRGB['Aluminium4'][2]/256.), (3./5,coloursRGB['Aluminium4'][2]/256.,coloursRGB['Aluminium4'][2]/256.),
(4./5,coloursRGB['Aluminium5'][2]/256.,coloursRGB['Aluminium5'][2]/256.), (4./5,coloursRGB['Aluminium5'][2]/256.,coloursRGB['Aluminium5'][2]/256.),
(5./5,coloursRGB['Aluminium6'][2]/256.,coloursRGB['Aluminium6'][2]/256.))} (5./5,coloursRGB['Aluminium6'][2]/256.,coloursRGB['Aluminium6'][2]/256.))}
cmap_Alu = mpl.colors.LinearSegmentedColormap('TangoAluminium',cdict_Alu,256) # cmap_Alu = mpl.colors.LinearSegmentedColormap('TangoAluminium',cdict_Alu,256)
# cmap_BGR = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_BGR,256)
# cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256)
if __name__=='__main__': if __name__=='__main__':
import pylab as pb import pylab as pb
pb.figure() pb.figure()
pb.pcolor(pb.rand(10,10),cmap=cmap_RB) pb.pcolor(pb.rand(10,10),cmap=cmap_RB)
pb.colorbar() pb.colorbar()
pb.show() pb.show()

View file

@ -6,30 +6,26 @@ import Tango
import pylab as pb import pylab as pb
import numpy as np import numpy as np
def gpplot(x,mu,var,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs): def gpplot(x,mu,lower,upper,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
if axes is None: if axes is None:
axes = pb.gca() axes = pb.gca()
mu = mu.flatten() mu = mu.flatten()
x = x.flatten() x = x.flatten()
lower = lower.flatten()
upper = upper.flatten()
#here's the mean #here's the mean
axes.plot(x,mu,color=edgecol,linewidth=2) axes.plot(x,mu,color=edgecol,linewidth=2)
#ensure variance is a vector #here's the box
if len(var.shape)>1:
err = 2*np.sqrt(np.diag(var))
else:
err = 2*np.sqrt(var)
#here's the 2*std box
kwargs['linewidth']=0.5 kwargs['linewidth']=0.5
if not 'alpha' in kwargs.keys(): if not 'alpha' in kwargs.keys():
kwargs['alpha'] = 0.3 kwargs['alpha'] = 0.3
axes.fill(np.hstack((x,x[::-1])),np.hstack((mu+err,mu[::-1]-err[::-1])),color=fillcol,**kwargs) axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs)
#this is the edge: #this is the edge:
axes.plot(x,mu+err,color=edgecol,linewidth=0.2) axes.plot(x,upper,color=edgecol,linewidth=0.2)
axes.plot(x,mu-err,color=edgecol,linewidth=0.2) axes.plot(x,lower,color=edgecol,linewidth=0.2)
def removeRightTicks(ax=None): def removeRightTicks(ax=None):
ax = ax or pb.gca() ax = ax or pb.gca()
@ -74,4 +70,36 @@ def align_subplots(N,M,xlim=None, ylim=None):
else: else:
removeUpperTicks() removeUpperTicks()
def x_frame1D(X,plot_limits=None,resolution=None):
"""
Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
"""
assert X.shape[1] ==1, "x_frame1D is defined for one-dimensional inputs"
if plot_limits is None:
xmin,xmax = X.min(0),X.max(0)
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
elif len(plot_limits)==2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
return Xnew, xmin, xmax
def x_frame2D(X,plot_limits=None,resolution=None):
"""
Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
"""
assert X.shape[1] ==2, "x_frame2D is defined for two-dimensional inputs"
if plot_limits is None:
xmin,xmax = X.min(0),X.max(0)
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
elif len(plot_limits)==2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
resolution = resolution or 50
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xnew = np.vstack((xx.flatten(),yy.flatten())).T
return Xnew, xx, yy, xmin, xmax

View file

@ -1,7 +1,7 @@
GPy GPy
=== ===
A Gaussian processes framework in python. A Gaussian processes framework in python
* [Online documentation](https://gpy.readthedocs.org/en/latest/) * [Online documentation](https://gpy.readthedocs.org/en/latest/)
* [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy) * [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy)

BIN
doc/Figures/kern-def.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 32 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 45 KiB

After

Width:  |  Height:  |  Size: 51 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 78 KiB

After

Width:  |  Height:  |  Size: 77 KiB

Before After
Before After

Binary file not shown.

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 129 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB

View file

@ -33,6 +33,14 @@ examples Package
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`poisson` Module
---------------------
.. automodule:: GPy.examples.poisson
:members:
:undoc-members:
:show-inheritance:
:mod:`regression` Module :mod:`regression` Module
------------------------ ------------------------
@ -57,6 +65,14 @@ examples Package
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`sparse_ep_fix` Module
---------------------------
.. automodule:: GPy.examples.sparse_ep_fix
:members:
:undoc-members:
:show-inheritance:
:mod:`uncertain_input_GP_regression_demo` Module :mod:`uncertain_input_GP_regression_demo` Module
------------------------------------------------ ------------------------------------------------

View file

@ -1,22 +1,6 @@
inference Package inference Package
================= =================
:mod:`Expectation_Propagation` Module
-------------------------------------
.. automodule:: GPy.inference.Expectation_Propagation
:members:
:undoc-members:
:show-inheritance:
:mod:`likelihoods` Module
-------------------------
.. automodule:: GPy.inference.likelihoods
:members:
:undoc-members:
:show-inheritance:
:mod:`optimization` Module :mod:`optimization` Module
-------------------------- --------------------------

43
doc/GPy.likelihoods.rst Normal file
View file

@ -0,0 +1,43 @@
likelihoods Package
===================
:mod:`likelihoods` Package
--------------------------
.. automodule:: GPy.likelihoods
:members:
:undoc-members:
:show-inheritance:
:mod:`EP` Module
----------------
.. automodule:: GPy.likelihoods.EP
:members:
:undoc-members:
:show-inheritance:
:mod:`Gaussian` Module
----------------------
.. automodule:: GPy.likelihoods.Gaussian
:members:
:undoc-members:
:show-inheritance:
:mod:`likelihood` Module
------------------------
.. automodule:: GPy.likelihoods.likelihood
:members:
:undoc-members:
:show-inheritance:
:mod:`likelihood_functions` Module
----------------------------------
.. automodule:: GPy.likelihoods.likelihood_functions
:members:
:undoc-members:
:show-inheritance:

View file

@ -17,18 +17,18 @@ models Package
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`GPLVM` Module :mod:`GP` Module
------------------- ----------------
.. automodule:: GPy.models.GPLVM .. automodule:: GPy.models.GP
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`GP_EP` Module :mod:`GPLVM` Module
------------------- -------------------
.. automodule:: GPy.models.GP_EP .. automodule:: GPy.models.GPLVM
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
@ -41,10 +41,10 @@ models Package
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`generalized_FITC` Module :mod:`sparse_GP` Module
------------------------------ -----------------------
.. automodule:: GPy.models.generalized_FITC .. automodule:: GPy.models.sparse_GP
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:

View file

@ -18,6 +18,7 @@ Subpackages
GPy.examples GPy.examples
GPy.inference GPy.inference
GPy.kern GPy.kern
GPy.likelihoods
GPy.models GPy.models
GPy.util GPy.util

View file

@ -41,6 +41,7 @@ help:
clean: clean:
-rm -rf $(BUILDDIR)/* -rm -rf $(BUILDDIR)/*
html: html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo @echo

View file

@ -11,12 +11,50 @@
# All configuration values have a default; values that are commented out # All configuration values have a default; values that are commented out
# serve to show the default. # serve to show the default.
import sys, os import sys
import os
print "python exec:", sys.executable
print "sys.path:", sys.path
try:
import numpy
print "numpy: %s, %s" % (numpy.__version__, numpy.__file__)
except ImportError:
print "no numpy"
try:
import matplotlib
print "matplotlib: %s, %s" % (matplotlib.__version__, matplotlib.__file__)
except ImportError:
print "no matplotlib"
try:
import ipython
print "ipython: %s, %s" % (ipython.__version__, ipython.__file__)
except ImportError:
print "no ipython"
try:
import sphinx
print "sphinx: %s, %s" % (sphinx.__version__, sphinx.__file__)
except ImportError:
print "no sphinx"
print "sys.path:", sys.path
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.')) #sys.path.insert(0, os.path.abspath('../GPy'))
#print "sys.path.after:", sys.path
# If your extensions are in another directory, add it here. If the directory
# is relative to the documentation root, use os.path.abspath to make it
# absolute, like shown here.
sys.path.append(os.path.abspath('sphinxext'))
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('./sphinxext'))
# -- General configuration ----------------------------------------------------- # -- General configuration -----------------------------------------------------
@ -25,15 +63,77 @@ import sys, os
# Add any Sphinx extension module names here, as strings. They can be extensions # Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] print "Importing extensions"
extensions = ['sphinx.ext.autodoc',
#'sphinx.ext.doctest'
'sphinx.ext.viewcode',
'sphinx.ext.pngmath',
'ipython_directive',
'ipython_console_highlighting'
#'matplotlib.sphinxext.plot_directive'
]
plot_formats = [('png', 80), ('pdf', 50)]
print "finished importing"
##############################################################################
##
## Mock out imports with C dependencies because ReadTheDocs can't build them.
#############################################################################
class Mock(object):
def __init__(self, *args, **kwargs):
pass
def __call__(self, *args, **kwargs):
return Mock()
@classmethod
def __getattr__(cls, name):
if name in ('__file__', '__path__'):
return '/dev/null'
elif name[0] == name[0].upper():
mockType = type(name, (), {})
mockType.__module__ = __name__
return mockType
else:
return Mock()
#import mock
print "Mocking"
MOCK_MODULES = ['pylab', 'sympy', 'sympy.utilities', 'sympy.utilities.codegen', 'sympy.core.cache', 'sympy.core', 'sympy.parsing', 'sympy.parsing.sympy_parser', 'matplotlib']
#'matplotlib', 'matplotlib.color', 'matplotlib.pyplot', 'pylab' ]
for mod_name in MOCK_MODULES:
sys.modules[mod_name] = Mock()
# ----------------------- READTHEDOCS ------------------ # ----------------------- READTHEDOCS ------------------
on_rtd = os.environ.get('READTHEDOCS', None) == 'True' on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
on_rtd = True
if on_rtd: if on_rtd:
sys.path.append("../GPy") sys.path.append(os.path.abspath('../GPy'))
os.system("pwd")
os.system("sphinx-apidoc -f -o . ../GPy") import subprocess
proc = subprocess.Popen("pwd", stdout=subprocess.PIPE, shell=True)
(out, err) = proc.communicate()
print "program output:", out
proc = subprocess.Popen("ls ../", stdout=subprocess.PIPE, shell=True)
(out, err) = proc.communicate()
print "program output:", out
proc = subprocess.Popen("sphinx-apidoc -f -o . ../GPy", stdout=subprocess.PIPE, shell=True)
(out, err) = proc.communicate()
print "program output:", out
#proc = subprocess.Popen("whereis numpy", stdout=subprocess.PIPE, shell=True)
#(out, err) = proc.communicate()
#print "program output:", out
#proc = subprocess.Popen("whereis matplotlib", stdout=subprocess.PIPE, shell=True)
#(out, err) = proc.communicate()
#print "program output:", out
print "Compiled files"
# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates'] templates_path = ['_templates']
@ -181,21 +281,21 @@ htmlhelp_basename = 'GPydoc'
# -- Options for LaTeX output -------------------------------------------------- # -- Options for LaTeX output --------------------------------------------------
latex_elements = { latex_elements = {
# The paper size ('letterpaper' or 'a4paper'). # The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper', #'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt'). # The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt', #'pointsize': '10pt',
# Additional stuff for the LaTeX preamble. # Additional stuff for the LaTeX preamble.
#'preamble': '', #'preamble': '',
} }
# Grouping the document tree into LaTeX files. List of tuples # Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]). # (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [ latex_documents = [
('index', 'GPy.tex', u'GPy Documentation', ('index', 'GPy.tex', u'GPy Documentation',
u'Author', 'manual'), u'Author', 'manual'),
] ]
# The name of an image file (relative to this directory) to place at the top of # The name of an image file (relative to this directory) to place at the top of
@ -238,9 +338,9 @@ man_pages = [
# (source start file, target name, title, author, # (source start file, target name, title, author,
# dir menu entry, description, category) # dir menu entry, description, category)
texinfo_documents = [ texinfo_documents = [
('index', 'GPy', u'GPy Documentation', ('index', 'GPy', u'GPy Documentation',
u'Author', 'GPy', 'One line description of project.', u'Author', 'GPy', 'One line description of project.',
'Miscellaneous'), 'Miscellaneous'),
] ]
# Documents to append as an appendix to all manuals. # Documents to append as an appendix to all manuals.
@ -294,3 +394,5 @@ epub_copyright = u'2013, Author'
# Allow duplicate toc entries. # Allow duplicate toc entries.
#epub_tocdup = True #epub_tocdup = True
autodoc_member_order = "source"

3
doc/doc-requirements.txt Normal file
View file

@ -0,0 +1,3 @@
ipython
numpy
scipy

View file

@ -8,8 +8,8 @@ Welcome to GPy's documentation!
For a quick start, you can have a look at one of the tutorials: For a quick start, you can have a look at one of the tutorials:
* `Basic Gaussian process regression <tuto_GP_regression.html>`_ * `Basic Gaussian process regression <tuto_GP_regression.html>`_
* `A kernel overview <tuto_kernel_overview.html>`_
* Advanced GP regression (Forthcoming) * Advanced GP regression (Forthcoming)
* Kernel manipulation (Forthcoming)
* Writting kernels (Forthcoming) * Writting kernels (Forthcoming)
You may also be interested by some examples in the GPy/examples folder. You may also be interested by some examples in the GPy/examples folder.
@ -28,4 +28,3 @@ Indices and tables
* :ref:`genindex` * :ref:`genindex`
* :ref:`modindex` * :ref:`modindex`
* :ref:`search` * :ref:`search`

View file

@ -0,0 +1,115 @@
"""reST directive for syntax-highlighting ipython interactive sessions.
XXX - See what improvements can be made based on the new (as of Sept 2009)
'pycon' lexer for the python console. At the very least it will give better
highlighted tracebacks.
"""
#-----------------------------------------------------------------------------
# Needed modules
# Standard library
import re
# Third party
from pygments.lexer import Lexer, do_insertions
from pygments.lexers.agile import (PythonConsoleLexer, PythonLexer,
PythonTracebackLexer)
from pygments.token import Comment, Generic
from sphinx import highlighting
#-----------------------------------------------------------------------------
# Global constants
line_re = re.compile('.*?\n')
#-----------------------------------------------------------------------------
# Code begins - classes and functions
class IPythonConsoleLexer(Lexer):
"""
For IPython console output or doctests, such as:
.. sourcecode:: ipython
In [1]: a = 'foo'
In [2]: a
Out[2]: 'foo'
In [3]: print a
foo
In [4]: 1 / 0
Notes:
- Tracebacks are not currently supported.
- It assumes the default IPython prompts, not customized ones.
"""
name = 'IPython console session'
aliases = ['ipython']
mimetypes = ['text/x-ipython-console']
input_prompt = re.compile("(In \[[0-9]+\]: )|( \.\.\.+:)")
output_prompt = re.compile("(Out\[[0-9]+\]: )|( \.\.\.+:)")
continue_prompt = re.compile(" \.\.\.+:")
tb_start = re.compile("\-+")
def get_tokens_unprocessed(self, text):
pylexer = PythonLexer(**self.options)
tblexer = PythonTracebackLexer(**self.options)
curcode = ''
insertions = []
for match in line_re.finditer(text):
line = match.group()
input_prompt = self.input_prompt.match(line)
continue_prompt = self.continue_prompt.match(line.rstrip())
output_prompt = self.output_prompt.match(line)
if line.startswith("#"):
insertions.append((len(curcode),
[(0, Comment, line)]))
elif input_prompt is not None:
insertions.append((len(curcode),
[(0, Generic.Prompt, input_prompt.group())]))
curcode += line[input_prompt.end():]
elif continue_prompt is not None:
insertions.append((len(curcode),
[(0, Generic.Prompt, continue_prompt.group())]))
curcode += line[continue_prompt.end():]
elif output_prompt is not None:
# Use the 'error' token for output. We should probably make
# our own token, but error is typicaly in a bright color like
# red, so it works fine for our output prompts.
insertions.append((len(curcode),
[(0, Generic.Error, output_prompt.group())]))
curcode += line[output_prompt.end():]
else:
if curcode:
for item in do_insertions(insertions,
pylexer.get_tokens_unprocessed(curcode)):
yield item
curcode = ''
insertions = []
yield match.start(), Generic.Output, line
if curcode:
for item in do_insertions(insertions,
pylexer.get_tokens_unprocessed(curcode)):
yield item
def setup(app):
"""Setup as a sphinx extension."""
# This is only a lexer, so adding it below to pygments appears sufficient.
# But if somebody knows that the right API usage should be to do that via
# sphinx, by all means fix it here. At least having this setup.py
# suppresses the sphinx warning we'd get without it.
pass
#-----------------------------------------------------------------------------
# Register the extension as a valid pygments lexer
highlighting.lexers['ipython'] = IPythonConsoleLexer()

View file

@ -0,0 +1,835 @@
# -*- coding: utf-8 -*-
"""Sphinx directive to support embedded IPython code.
This directive allows pasting of entire interactive IPython sessions, prompts
and all, and their code will actually get re-executed at doc build time, with
all prompts renumbered sequentially. It also allows you to input code as a pure
python input by giving the argument python to the directive. The output looks
like an interactive ipython section.
To enable this directive, simply list it in your Sphinx ``conf.py`` file
(making sure the directory where you placed it is visible to sphinx, as is
needed for all Sphinx directives).
By default this directive assumes that your prompts are unchanged IPython ones,
but this can be customized. The configurable options that can be placed in
conf.py are
ipython_savefig_dir:
The directory in which to save the figures. This is relative to the
Sphinx source directory. The default is `html_static_path`.
ipython_rgxin:
The compiled regular expression to denote the start of IPython input
lines. The default is re.compile('In \[(\d+)\]:\s?(.*)\s*'). You
shouldn't need to change this.
ipython_rgxout:
The compiled regular expression to denote the start of IPython output
lines. The default is re.compile('Out\[(\d+)\]:\s?(.*)\s*'). You
shouldn't need to change this.
ipython_promptin:
The string to represent the IPython input prompt in the generated ReST.
The default is 'In [%d]:'. This expects that the line numbers are used
in the prompt.
ipython_promptout:
The string to represent the IPython prompt in the generated ReST. The
default is 'Out [%d]:'. This expects that the line numbers are used
in the prompt.
ToDo
----
- Turn the ad-hoc test() function into a real test suite.
- Break up ipython-specific functionality from matplotlib stuff into better
separated code.
Authors
-------
- John D Hunter: orignal author.
- Fernando Perez: refactoring, documentation, cleanups, port to 0.11.
- VáclavŠmilauer <eudoxos-AT-arcig.cz>: Prompt generalizations.
- Skipper Seabold, refactoring, cleanups, pure python addition
"""
#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------
# Stdlib
import cStringIO
import os
import re
import sys
import tempfile
import ast
# To keep compatibility with various python versions
try:
from hashlib import md5
except ImportError:
from md5 import md5
# Third-party
try:
import matplotlib
matplotlib.use('Agg')
except ImportError:
print "Couldn't find matplotlib"
import sphinx
from docutils.parsers.rst import directives
from docutils import nodes
from sphinx.util.compat import Directive
# Our own
from IPython import Config, InteractiveShell
from IPython.core.profiledir import ProfileDir
from IPython.utils import io
#-----------------------------------------------------------------------------
# Globals
#-----------------------------------------------------------------------------
# for tokenizing blocks
COMMENT, INPUT, OUTPUT = range(3)
#-----------------------------------------------------------------------------
# Functions and class declarations
#-----------------------------------------------------------------------------
def block_parser(part, rgxin, rgxout, fmtin, fmtout):
"""
part is a string of ipython text, comprised of at most one
input, one ouput, comments, and blank lines. The block parser
parses the text into a list of::
blocks = [ (TOKEN0, data0), (TOKEN1, data1), ...]
where TOKEN is one of [COMMENT | INPUT | OUTPUT ] and
data is, depending on the type of token::
COMMENT : the comment string
INPUT: the (DECORATOR, INPUT_LINE, REST) where
DECORATOR: the input decorator (or None)
INPUT_LINE: the input as string (possibly multi-line)
REST : any stdout generated by the input line (not OUTPUT)
OUTPUT: the output string, possibly multi-line
"""
block = []
lines = part.split('\n')
N = len(lines)
i = 0
decorator = None
while 1:
if i==N:
# nothing left to parse -- the last line
break
line = lines[i]
i += 1
line_stripped = line.strip()
if line_stripped.startswith('#'):
block.append((COMMENT, line))
continue
if line_stripped.startswith('@'):
# we're assuming at most one decorator -- may need to
# rethink
decorator = line_stripped
continue
# does this look like an input line?
matchin = rgxin.match(line)
if matchin:
lineno, inputline = int(matchin.group(1)), matchin.group(2)
# the ....: continuation string
continuation = ' %s:'%''.join(['.']*(len(str(lineno))+2))
Nc = len(continuation)
# input lines can continue on for more than one line, if
# we have a '\' line continuation char or a function call
# echo line 'print'. The input line can only be
# terminated by the end of the block or an output line, so
# we parse out the rest of the input line if it is
# multiline as well as any echo text
rest = []
while i<N:
# look ahead; if the next line is blank, or a comment, or
# an output line, we're done
nextline = lines[i]
matchout = rgxout.match(nextline)
#print "nextline=%s, continuation=%s, starts=%s"%(nextline, continuation, nextline.startswith(continuation))
if matchout or nextline.startswith('#'):
break
elif nextline.startswith(continuation):
inputline += '\n' + nextline[Nc:]
else:
rest.append(nextline)
i+= 1
block.append((INPUT, (decorator, inputline, '\n'.join(rest))))
continue
# if it looks like an output line grab all the text to the end
# of the block
matchout = rgxout.match(line)
if matchout:
lineno, output = int(matchout.group(1)), matchout.group(2)
if i<N-1:
output = '\n'.join([output] + lines[i:])
block.append((OUTPUT, output))
break
return block
class EmbeddedSphinxShell(object):
"""An embedded IPython instance to run inside Sphinx"""
def __init__(self):
self.cout = cStringIO.StringIO()
# Create config object for IPython
config = Config()
config.Global.display_banner = False
config.Global.exec_lines = ['import numpy as np',
'from pylab import *'
]
config.InteractiveShell.autocall = False
config.InteractiveShell.autoindent = False
config.InteractiveShell.colors = 'NoColor'
# create a profile so instance history isn't saved
tmp_profile_dir = tempfile.mkdtemp(prefix='profile_')
profname = 'auto_profile_sphinx_build'
pdir = os.path.join(tmp_profile_dir,profname)
profile = ProfileDir.create_profile_dir(pdir)
# Create and initialize ipython, but don't start its mainloop
IP = InteractiveShell.instance(config=config, profile_dir=profile)
# io.stdout redirect must be done *after* instantiating InteractiveShell
io.stdout = self.cout
io.stderr = self.cout
# For debugging, so we can see normal output, use this:
#from IPython.utils.io import Tee
#io.stdout = Tee(self.cout, channel='stdout') # dbg
#io.stderr = Tee(self.cout, channel='stderr') # dbg
# Store a few parts of IPython we'll need.
self.IP = IP
self.user_ns = self.IP.user_ns
self.user_global_ns = self.IP.user_global_ns
self.input = ''
self.output = ''
self.is_verbatim = False
self.is_doctest = False
self.is_suppress = False
# on the first call to the savefig decorator, we'll import
# pyplot as plt so we can make a call to the plt.gcf().savefig
self._pyplot_imported = False
def clear_cout(self):
self.cout.seek(0)
self.cout.truncate(0)
def process_input_line(self, line, store_history=True):
"""process the input, capturing stdout"""
#print "input='%s'"%self.input
stdout = sys.stdout
splitter = self.IP.input_splitter
try:
sys.stdout = self.cout
splitter.push(line)
more = splitter.push_accepts_more()
if not more:
source_raw = splitter.source_raw_reset()[1]
self.IP.run_cell(source_raw, store_history=store_history)
finally:
sys.stdout = stdout
def process_image(self, decorator):
"""
# build out an image directive like
# .. image:: somefile.png
# :width 4in
#
# from an input like
# savefig somefile.png width=4in
"""
savefig_dir = self.savefig_dir
source_dir = self.source_dir
saveargs = decorator.split(' ')
filename = saveargs[1]
# insert relative path to image file in source
outfile = os.path.relpath(os.path.join(savefig_dir,filename),
source_dir)
imagerows = ['.. image:: %s'%outfile]
for kwarg in saveargs[2:]:
arg, val = kwarg.split('=')
arg = arg.strip()
val = val.strip()
imagerows.append(' :%s: %s'%(arg, val))
image_file = os.path.basename(outfile) # only return file name
image_directive = '\n'.join(imagerows)
return image_file, image_directive
# Callbacks for each type of token
def process_input(self, data, input_prompt, lineno):
"""Process data block for INPUT token."""
decorator, input, rest = data
image_file = None
image_directive = None
#print 'INPUT:', data # dbg
is_verbatim = decorator=='@verbatim' or self.is_verbatim
is_doctest = decorator=='@doctest' or self.is_doctest
is_suppress = decorator=='@suppress' or self.is_suppress
is_savefig = decorator is not None and \
decorator.startswith('@savefig')
input_lines = input.split('\n')
if len(input_lines) > 1:
if input_lines[-1] != "":
input_lines.append('') # make sure there's a blank line
# so splitter buffer gets reset
continuation = ' %s:'%''.join(['.']*(len(str(lineno))+2))
Nc = len(continuation)
if is_savefig:
image_file, image_directive = self.process_image(decorator)
ret = []
is_semicolon = False
for i, line in enumerate(input_lines):
if line.endswith(';'):
is_semicolon = True
if i==0:
# process the first input line
if is_verbatim:
self.process_input_line('')
self.IP.execution_count += 1 # increment it anyway
else:
# only submit the line in non-verbatim mode
self.process_input_line(line, store_history=True)
formatted_line = '%s %s'%(input_prompt, line)
else:
# process a continuation line
if not is_verbatim:
self.process_input_line(line, store_history=True)
formatted_line = '%s %s'%(continuation, line)
if not is_suppress:
ret.append(formatted_line)
if not is_suppress and len(rest.strip()) and is_verbatim:
# the "rest" is the standard output of the
# input, which needs to be added in
# verbatim mode
ret.append(rest)
self.cout.seek(0)
output = self.cout.read()
if not is_suppress and not is_semicolon:
ret.append(output)
elif is_semicolon: # get spacing right
ret.append('')
self.cout.truncate(0)
return (ret, input_lines, output, is_doctest, image_file,
image_directive)
#print 'OUTPUT', output # dbg
def process_output(self, data, output_prompt,
input_lines, output, is_doctest, image_file):
"""Process data block for OUTPUT token."""
if is_doctest:
submitted = data.strip()
found = output
if found is not None:
found = found.strip()
# XXX - fperez: in 0.11, 'output' never comes with the prompt
# in it, just the actual output text. So I think all this code
# can be nuked...
# the above comment does not appear to be accurate... (minrk)
ind = found.find(output_prompt)
if ind<0:
e='output prompt="%s" does not match out line=%s' % \
(output_prompt, found)
raise RuntimeError(e)
found = found[len(output_prompt):].strip()
if found!=submitted:
e = ('doctest failure for input_lines="%s" with '
'found_output="%s" and submitted output="%s"' %
(input_lines, found, submitted) )
raise RuntimeError(e)
#print 'doctest PASSED for input_lines="%s" with found_output="%s" and submitted output="%s"'%(input_lines, found, submitted)
def process_comment(self, data):
"""Process data fPblock for COMMENT token."""
if not self.is_suppress:
return [data]
def save_image(self, image_file):
"""
Saves the image file to disk.
"""
self.ensure_pyplot()
command = 'plt.gcf().savefig("%s")'%image_file
#print 'SAVEFIG', command # dbg
self.process_input_line('bookmark ipy_thisdir', store_history=False)
self.process_input_line('cd -b ipy_savedir', store_history=False)
self.process_input_line(command, store_history=False)
self.process_input_line('cd -b ipy_thisdir', store_history=False)
self.process_input_line('bookmark -d ipy_thisdir', store_history=False)
self.clear_cout()
def process_block(self, block):
"""
process block from the block_parser and return a list of processed lines
"""
ret = []
output = None
input_lines = None
lineno = self.IP.execution_count
input_prompt = self.promptin%lineno
output_prompt = self.promptout%lineno
image_file = None
image_directive = None
for token, data in block:
if token==COMMENT:
out_data = self.process_comment(data)
elif token==INPUT:
(out_data, input_lines, output, is_doctest, image_file,
image_directive) = \
self.process_input(data, input_prompt, lineno)
elif token==OUTPUT:
out_data = \
self.process_output(data, output_prompt,
input_lines, output, is_doctest,
image_file)
if out_data:
ret.extend(out_data)
# save the image files
if image_file is not None:
self.save_image(image_file)
return ret, image_directive
def ensure_pyplot(self):
if self._pyplot_imported:
return
self.process_input_line('import matplotlib.pyplot as plt',
store_history=False)
def process_pure_python(self, content):
"""
content is a list of strings. it is unedited directive conent
This runs it line by line in the InteractiveShell, prepends
prompts as needed capturing stderr and stdout, then returns
the content as a list as if it were ipython code
"""
output = []
savefig = False # keep up with this to clear figure
multiline = False # to handle line continuation
multiline_start = None
fmtin = self.promptin
ct = 0
for lineno, line in enumerate(content):
line_stripped = line.strip()
if not len(line):
output.append(line)
continue
# handle decorators
if line_stripped.startswith('@'):
output.extend([line])
if 'savefig' in line:
savefig = True # and need to clear figure
continue
# handle comments
if line_stripped.startswith('#'):
output.extend([line])
continue
# deal with lines checking for multiline
continuation = u' %s:'% ''.join(['.']*(len(str(ct))+2))
if not multiline:
modified = u"%s %s" % (fmtin % ct, line_stripped)
output.append(modified)
ct += 1
try:
ast.parse(line_stripped)
output.append(u'')
except Exception: # on a multiline
multiline = True
multiline_start = lineno
else: # still on a multiline
modified = u'%s %s' % (continuation, line)
output.append(modified)
try:
mod = ast.parse(
'\n'.join(content[multiline_start:lineno+1]))
if isinstance(mod.body[0], ast.FunctionDef):
# check to see if we have the whole function
for element in mod.body[0].body:
if isinstance(element, ast.Return):
multiline = False
else:
output.append(u'')
multiline = False
except Exception:
pass
if savefig: # clear figure if plotted
self.ensure_pyplot()
self.process_input_line('plt.clf()', store_history=False)
self.clear_cout()
savefig = False
return output
class IpythonDirective(Directive):
has_content = True
required_arguments = 0
optional_arguments = 4 # python, suppress, verbatim, doctest
final_argumuent_whitespace = True
option_spec = { 'python': directives.unchanged,
'suppress' : directives.flag,
'verbatim' : directives.flag,
'doctest' : directives.flag,
}
shell = EmbeddedSphinxShell()
def get_config_options(self):
# contains sphinx configuration variables
config = self.state.document.settings.env.config
# get config variables to set figure output directory
confdir = self.state.document.settings.env.app.confdir
savefig_dir = config.ipython_savefig_dir
source_dir = os.path.dirname(self.state.document.current_source)
if savefig_dir is None:
savefig_dir = config.html_static_path
if isinstance(savefig_dir, list):
savefig_dir = savefig_dir[0] # safe to assume only one path?
savefig_dir = os.path.join(confdir, savefig_dir)
# get regex and prompt stuff
rgxin = config.ipython_rgxin
rgxout = config.ipython_rgxout
promptin = config.ipython_promptin
promptout = config.ipython_promptout
return savefig_dir, source_dir, rgxin, rgxout, promptin, promptout
def setup(self):
# reset the execution count if we haven't processed this doc
#NOTE: this may be borked if there are multiple seen_doc tmp files
#check time stamp?
seen_docs = [i for i in os.listdir(tempfile.tempdir)
if i.startswith('seen_doc')]
if seen_docs:
fname = os.path.join(tempfile.tempdir, seen_docs[0])
docs = open(fname).read().split('\n')
if not self.state.document.current_source in docs:
self.shell.IP.history_manager.reset()
self.shell.IP.execution_count = 1
else: # haven't processed any docs yet
docs = []
# get config values
(savefig_dir, source_dir, rgxin,
rgxout, promptin, promptout) = self.get_config_options()
# and attach to shell so we don't have to pass them around
self.shell.rgxin = rgxin
self.shell.rgxout = rgxout
self.shell.promptin = promptin
self.shell.promptout = promptout
self.shell.savefig_dir = savefig_dir
self.shell.source_dir = source_dir
# setup bookmark for saving figures directory
self.shell.process_input_line('bookmark ipy_savedir %s'%savefig_dir,
store_history=False)
self.shell.clear_cout()
# write the filename to a tempfile because it's been "seen" now
if not self.state.document.current_source in docs:
fd, fname = tempfile.mkstemp(prefix="seen_doc", text=True)
fout = open(fname, 'a')
fout.write(self.state.document.current_source+'\n')
fout.close()
return rgxin, rgxout, promptin, promptout
def teardown(self):
# delete last bookmark
self.shell.process_input_line('bookmark -d ipy_savedir',
store_history=False)
self.shell.clear_cout()
def run(self):
debug = False
#TODO, any reason block_parser can't be a method of embeddable shell
# then we wouldn't have to carry these around
rgxin, rgxout, promptin, promptout = self.setup()
options = self.options
self.shell.is_suppress = 'suppress' in options
self.shell.is_doctest = 'doctest' in options
self.shell.is_verbatim = 'verbatim' in options
# handle pure python code
if 'python' in self.arguments:
content = self.content
self.content = self.shell.process_pure_python(content)
parts = '\n'.join(self.content).split('\n\n')
lines = ['.. code-block:: ipython','']
figures = []
for part in parts:
block = block_parser(part, rgxin, rgxout, promptin, promptout)
if len(block):
rows, figure = self.shell.process_block(block)
for row in rows:
lines.extend([' %s'%line for line in row.split('\n')])
if figure is not None:
figures.append(figure)
#text = '\n'.join(lines)
#figs = '\n'.join(figures)
for figure in figures:
lines.append('')
lines.extend(figure.split('\n'))
lines.append('')
#print lines
if len(lines)>2:
if debug:
print '\n'.join(lines)
else: #NOTE: this raises some errors, what's it for?
#print 'INSERTING %d lines'%len(lines)
self.state_machine.insert_input(
lines, self.state_machine.input_lines.source(0))
text = '\n'.join(lines)
txtnode = nodes.literal_block(text, text)
txtnode['language'] = 'ipython'
#imgnode = nodes.image(figs)
# cleanup
self.teardown()
return []#, imgnode]
# Enable as a proper Sphinx directive
def setup(app):
setup.app = app
app.add_directive('ipython', IpythonDirective)
app.add_config_value('ipython_savefig_dir', None, True)
app.add_config_value('ipython_rgxin',
re.compile('In \[(\d+)\]:\s?(.*)\s*'), True)
app.add_config_value('ipython_rgxout',
re.compile('Out\[(\d+)\]:\s?(.*)\s*'), True)
app.add_config_value('ipython_promptin', 'In [%d]:', True)
app.add_config_value('ipython_promptout', 'Out[%d]:', True)
# Simple smoke test, needs to be converted to a proper automatic test.
def test():
examples = [
r"""
In [9]: pwd
Out[9]: '/home/jdhunter/py4science/book'
In [10]: cd bookdata/
/home/jdhunter/py4science/book/bookdata
In [2]: from pylab import *
In [2]: ion()
In [3]: im = imread('stinkbug.png')
@savefig mystinkbug.png width=4in
In [4]: imshow(im)
Out[4]: <matplotlib.image.AxesImage object at 0x39ea850>
""",
r"""
In [1]: x = 'hello world'
# string methods can be
# used to alter the string
@doctest
In [2]: x.upper()
Out[2]: 'HELLO WORLD'
@verbatim
In [3]: x.st<TAB>
x.startswith x.strip
""",
r"""
In [130]: url = 'http://ichart.finance.yahoo.com/table.csv?s=CROX\
.....: &d=9&e=22&f=2009&g=d&a=1&br=8&c=2006&ignore=.csv'
In [131]: print url.split('&')
['http://ichart.finance.yahoo.com/table.csv?s=CROX', 'd=9', 'e=22', 'f=2009', 'g=d', 'a=1', 'b=8', 'c=2006', 'ignore=.csv']
In [60]: import urllib
""",
r"""\
In [133]: import numpy.random
@suppress
In [134]: numpy.random.seed(2358)
@doctest
In [135]: numpy.random.rand(10,2)
Out[135]:
array([[ 0.64524308, 0.59943846],
[ 0.47102322, 0.8715456 ],
[ 0.29370834, 0.74776844],
[ 0.99539577, 0.1313423 ],
[ 0.16250302, 0.21103583],
[ 0.81626524, 0.1312433 ],
[ 0.67338089, 0.72302393],
[ 0.7566368 , 0.07033696],
[ 0.22591016, 0.77731835],
[ 0.0072729 , 0.34273127]])
""",
r"""
In [106]: print x
jdh
In [109]: for i in range(10):
.....: print i
.....:
.....:
0
1
2
3
4
5
6
7
8
9
""",
r"""
In [144]: from pylab import *
In [145]: ion()
# use a semicolon to suppress the output
@savefig test_hist.png width=4in
In [151]: hist(np.random.randn(10000), 100);
@savefig test_plot.png width=4in
In [151]: plot(np.random.randn(10000), 'o');
""",
r"""
# use a semicolon to suppress the output
In [151]: plt.clf()
@savefig plot_simple.png width=4in
In [151]: plot([1,2,3])
@savefig hist_simple.png width=4in
In [151]: hist(np.random.randn(10000), 100);
""",
r"""
# update the current fig
In [151]: ylabel('number')
In [152]: title('normal distribution')
@savefig hist_with_text.png
In [153]: grid(True)
""",
]
# skip local-file depending first example:
examples = examples[1:]
#ipython_directive.DEBUG = True # dbg
#options = dict(suppress=True) # dbg
options = dict()
for example in examples:
content = example.split('\n')
ipython_directive('debug', arguments=None, options=options,
content=content, lineno=0,
content_offset=None, block_text=None,
state=None, state_machine=None,
)
# Run test suite as a script
if __name__=='__main__':
if not os.path.isdir('_static'):
os.mkdir('_static')
test()
print 'All OK? Check figures in _static/'

View file

@ -1,4 +1,3 @@
************************************* *************************************
Gaussian process regression tutorial Gaussian process regression tutorial
************************************* *************************************
@ -12,7 +11,7 @@ We first import the libraries we will need: ::
import numpy as np import numpy as np
import GPy import GPy
1 dimensional model 1-dimensional model
=================== ===================
For this toy example, we assume we have the following inputs and outputs:: For this toy example, we assume we have the following inputs and outputs::
@ -22,13 +21,11 @@ For this toy example, we assume we have the following inputs and outputs::
Note that the observations Y include some noise. Note that the observations Y include some noise.
The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential) plus some white noise:: The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential)::
Gaussian = GPy.kern.rbf(D=1) kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
noise = GPy.kern.white(D=1)
kernel = Gaussian + noise
The parameter ``D`` stands for the dimension of the input space. Note that many other kernels are implemented such as: The parameter ``D`` stands for the dimension of the input space. The parameters ``variance`` and ``lengthscale`` are optional. Note that many other kernels are implemented such as:
* linear (``GPy.kern.linear``) * linear (``GPy.kern.linear``)
* exponential kernel (``GPy.kern.exponential``) * exponential kernel (``GPy.kern.exponential``)
@ -41,19 +38,19 @@ The inputs required for building the model are the observations and the kernel::
m = GPy.models.GP_regression(X,Y,kernel) m = GPy.models.GP_regression(X,Y,kernel)
The functions ``print`` and ``plot`` give an insight of the model we have just build. The code:: By default, some observation noise is added to the modle. The functions ``print`` and ``plot`` give an insight of the model we have just build. The code::
print m print m
m.plot() m.plot()
gives the following output: :: gives the following output: ::
Marginal log-likelihood: -2.281e+01 Marginal log-likelihood: -4.479e+00
Name | Value | Constraints | Ties | Prior Name | Value | Constraints | Ties | Prior
----------------------------------------------------------------- -----------------------------------------------------------------
rbf_variance | 1.0000 | | | rbf_variance | 1.0000 | | |
rbf_lengthscale | 1.0000 | | | rbf_lengthscale | 1.0000 | | |
white_variance | 1.0000 | | | noise variance | 1.0000 | | |
.. figure:: Figures/tuto_GP_regression_m1.png .. figure:: Figures/tuto_GP_regression_m1.png
:align: center :align: center
@ -75,24 +72,24 @@ but it is also possible to set a range on to constrain one parameter to be fixed
m.unconstrain('') # Required to remove the previous constrains m.unconstrain('') # Required to remove the previous constrains
m.constrain_positive('rbf_variance') m.constrain_positive('rbf_variance')
m.constrain_bounded('lengthscale',1.,10. ) m.constrain_bounded('lengthscale',1.,10. )
m.constrain_fixed('white',0.0025) m.constrain_fixed('noise',0.0025)
Once the constrains have been imposed, the model can be optimized:: Once the constrains have been imposed, the model can be optimized::
m.optimize() m.optimize()
If we want to perform some restarts to try to improve the result of the optimization, we can use the optimize_restart function:: If we want to perform some restarts to try to improve the result of the optimization, we can use the ``optimize_restart`` function::
m.optimize_restarts(Nrestarts = 10) m.optimize_restarts(Nrestarts = 10)
Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting model resulting model:: Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting model resulting model::
Marginal log-likelihood: 2.001e+01 Marginal log-likelihood: 3.603e+01
Name | Value | Constraints | Ties | Prior Name | Value | Constraints | Ties | Prior
----------------------------------------------------------------- -----------------------------------------------------------------
rbf_variance | 0.8033 | (+ve) | | rbf_variance | 0.8151 | (+ve) | |
rbf_lengthscale | 1.8033 | (1.0, 10.0) | | rbf_lengthscale | 1.8037 | (1.0, 10.0) | |
white_variance | 0.0025 | Fixed | | noise variance | 0.0025 | Fixed | |
.. figure:: Figures/tuto_GP_regression_m2.png .. figure:: Figures/tuto_GP_regression_m2.png
:align: center :align: center
@ -101,7 +98,7 @@ Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting mo
GP regression model after optimization of the parameters. GP regression model after optimization of the parameters.
2 dimensional example 2-dimensional example
===================== =====================
Here is a 2 dimensional example:: Here is a 2 dimensional example::
@ -131,15 +128,16 @@ Here is a 2 dimensional example::
m.plot() m.plot()
print(m) print(m)
The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic). The output of the last 2 lines is:: The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic). The output of the last two lines is::
Marginal log-likelihood: 2.893e+01 Marginal log-likelihood: 6.682e+01
Name | Value | Constraints | Ties | Prior Name | Value | Constraints | Ties | Prior
------------------------------------------------------------------------- ---------------------------------------------------------------------
Mat52_ARD_variance | 0.4094 | (+ve) | | Mat52_variance | 0.3860 | (+ve) | |
Mat52_ARD_lengthscale_0 | 2.1060 | (+ve) | | Mat52_lengthscale_0 | 2.0578 | (+ve) | |
Mat52_ARD_lengthscale_1 | 2.0546 | (+ve) | | Mat52_lengthscale_1 | 1.8542 | (+ve) | |
white_variance | 0.0012 | (+ve) | | white_variance | 0.0023 | (+ve) | |
noise variance | 0.0000 | (+ve) | |
.. figure:: Figures/tuto_GP_regression_m3.png .. figure:: Figures/tuto_GP_regression_m3.png
:align: center :align: center

View file

@ -0,0 +1,177 @@
****************************
tutorial : A kernel overview
****************************
First we import the libraries we will need ::
import pylab as pb
import numpy as np
import GPy
pb.ion()
For most kernels, the dimension is the only mandatory parameter to define a kernel object. However, it is also possible to specify the values of the parameters. For example, the three following commands are valid for defining a squared exponential kernel (ie rbf or Gaussian) ::
ker1 = GPy.kern.rbf(1) # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
ker2 = GPy.kern.rbf(D=1, variance = 1.5, lengthscale=2.)
ker3 = GPy.kern.rbf(1, .5, .5)
A `plot` and a `print` functions are implemented to represent kernel objects ::
print ker1
ker1.plot()
ker2.plot()
ker3.plot()
.. figure:: Figures/tuto_kern_overview_basicdef.png
:align: center
:height: 350px
Implemented kernels
===================
Many kernels are already implemented in GPy. Here is a summary of most of them:
.. figure:: Figures/tuto_kern_overview_allkern.png
:align: center
:height: 800px
On the other hand, it is possible to use the `sympy` package to build new kernels. This will be the subject of another tutorial.
Operations to combine kernel
============================
In ``GPy``, kernel objects can be combined with the usual ``+`` and ``*`` operators. ::
k1 = GPy.kern.rbf(1,variance=1., lengthscale=2)
k2 = GPy.kern.Matern32(1,variance=1., lengthscale=2)
ker_add = k1 + k2
print ker_add
ker_prod = k1 * k2
print ker_prod
Note that by default, the operator ``+`` adds kernels defined on the same input space whereas ``*`` assumes that the kernels are defined on different input spaces. Here for example ``ker_add.D`` will return ``1`` whereas ``ker_prod.D`` will return ``2``.
In order to add kernels defined on the different input spaces, the required command is::
ker_add_orth = k1.add_orthogonal(k2)
.. figure:: Figures/tuto_kern_overview_add_orth.png
:align: center
:height: 350px
Output of ``ker_add_orth.plot(plot_limits=[[-10,-10],[10,10]])``.
Example : Building an ANOVA kernel
==================================
In two dimensions ANOVA kernels have the following form:
.. math::
k_{ANOVA}(x,y) = \prod_{i=1}^2 (1 + k_i(x_i,y_i)) = 1 + k_1(x_1,y_1) + k_2(x_2,y_2) + k_1(x_1,y_1) \times k_2(x_2,y_2).
Let us assume that we want to define an ANOVA kernel with a Matern 3/2 kernel for :math:`k_i`. As seen previously, we can define this kernel as follows ::
k_cst = GPy.kern.bias(1,variance=1.)
k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
Kanova = (k_cst + k_mat) * (k_cst + k_mat)
print Kanova
Printing the resulting kernel outputs the following ::
Name | Value | Constraints | Ties
---------------------------------------------------------------------------
bias<times>bias_variance | 1.0000 | |
bias<times>Mat52_variance | 1.0000 | |
bias<times>Mat52_Mat52_lengthscale | 3.0000 | | (1)
Mat52<times>bias_variance | 1.0000 | |
Mat52<times>bias_Mat52_lengthscale | 3.0000 | | (0)
Mat52<times>Mat52_variance | 1.0000 | |
Mat52<times>Mat52_Mat52_lengthscale | 3.0000 | | (0)
Mat52<times>Mat52_Mat52_lengthscale | 3.0000 | | (1)
Note the ties between the lengthscales of ``Kanova`` to keep the number of lengthscales equal to 2. On the other hand, there are four variance terms in the new parameterization: one for each term of the right hand part of the above equation. We can illustrate the use of this kernel on a toy example::
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(40,2))
Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
# Create GP regression model
m = GPy.models.GP_regression(X,Y,Kanova)
m.plot()
.. figure:: Figures/tuto_kern_overview_mANOVA.png
:align: center
:height: 350px
As :math:`k_{ANOVA}` corresponds to the sum of 4 kernels, the best predictor can be splited in a sum of 4 functions
.. math::
bp(x) & = k(x)^t K^{-1} Y \\
& = (1 + k_1(x_1) + k_2(x_2) + k_1(x_1)k_2(x_2))^t K^{-1} Y \\
& = 1^t K^{-1} Y + k_1(x_1)^t K^{-1} Y + k_2(x_2)^t K^{-1} Y + (k_1(x_1)k_2(x_2))^t K^{-1} Y
The submodels can be represented with the option ``which_function`` of ``plot``: ::
pb.figure(figsize=(20,5))
pb.subplots_adjust(wspace=0.5)
pb.subplot(1,5,1)
m.plot()
pb.subplot(1,5,2)
pb.ylabel("= ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,3)
m.plot(which_functions=[False,True,False,False])
pb.ylabel("cst +",rotation='horizontal',fontsize='30')
pb.subplot(1,5,4)
m.plot(which_functions=[False,False,True,False])
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,5)
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
m.plot(which_functions=[False,False,False,True])
.. figure:: Figures/tuto_kern_overview_mANOVAdec.png
:align: center
:height: 200px
.. import pylab as pb
import numpy as np
import GPy
pb.ion()
ker1 = GPy.kern.rbf(D=1) # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=3.)
ker3 = GPy.kern.rbf(1, .5, .25)
ker1.plot()
ker2.plot()
ker3.plot()
#pb.savefig("Figures/tuto_kern_overview_basicdef.png")
kernels = [GPy.kern.rbf(1), GPy.kern.exponential(1), GPy.kern.Matern32(1), GPy.kern.Matern52(1), GPy.kern.Brownian(1), GPy.kern.bias(1), GPy.kern.linear(1), GPy.kern.spline(1), GPy.kern.periodic_exponential(1), GPy.kern.periodic_Matern32(1), GPy.kern.periodic_Matern52(1), GPy.kern.white(1)]
kernel_names = ["GPy.kern.rbf", "GPy.kern.exponential", "GPy.kern.Matern32", "GPy.kern.Matern52", "GPy.kern.Brownian", "GPy.kern.bias", "GPy.kern.linear", "GPy.kern.spline", "GPy.kern.periodic_exponential", "GPy.kern.periodic_Matern32", "GPy.kern.periodic_Matern52", "GPy.kern.white"]
pb.figure(figsize=(16,12))
pb.subplots_adjust(wspace=.5, hspace=.5)
for i, kern in enumerate(kernels):
pb.subplot(3,4,i+1)
kern.plot(x=7.5,plot_limits=[0.00001,15.])
pb.title(kernel_names[i]+ '\n')
#pb.axes([.1,.1,.8,.7])
#pb.figtext(.5,.9,'Foo Bar', fontsize=18, ha='center')
#pb.figtext(.5,.85,'Lorem ipsum dolor sit amet, consectetur adipiscing elit',fontsize=10,ha='center')
# actual plot for the noise
i = 11
X = np.linspace(0.,15.,201)
WN = 0*X
WN[100] = 1.
pb.subplot(3,4,i+1)
pb.plot(X,WN,'b')

View file

@ -1,64 +0,0 @@
import numpy as np
import pylab as pb
pb.ion()
import sys
import GPy
pb.close('all')
N = 200
M = 15
resolution=5
X = np.linspace(0,12,N)[:,None]
Z = np.linspace(0,12,M)[:,None] # inducing points (fixed for now)
Y = np.sin(X) + np.random.randn(*X.shape)/np.sqrt(50.)
#k = GPy.kern.rbf(1)
k = GPy.kern.Matern32(1) + GPy.kern.white(1)
models = [GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)]
models[0].scale_factor = 1.
models[1].scale_factor = 10.
models[2].scale_factor = 100.
models[3].scale_factor = 1000.
#GPy.models.sgp_debugB(X,Y,Z=Z,kernel=k),
#GPy.models.sgp_debugC(X,Y,Z=Z,kernel=k)]#,
#GPy.models.sgp_debugE(X,Y,Z=Z,kernel=k)]
[m.constrain_fixed('white',0.1) for m in models]
#xx,yy = np.mgrid[1.5:4:0+resolution*1j,-2:2:0+resolution*1j]
xx,yy = np.mgrid[3:16:0+resolution*1j,-2:1:0+resolution*1j]
lls = []
cgs = []
grads = []
count = 0
for l,v in zip(xx.flatten(),yy.flatten()):
count += 1
print count, 'of', resolution**2
sys.stdout.flush()
[m.set('lengthscale',l) for m in models]
[m.set('_variance',10.**v) for m in models]
lls.append([m.log_likelihood() for m in models])
grads.append([m.log_likelihood_gradients() for m in models])
cgs.append([m.checkgrad(verbose=0,return_ratio=True) for m in models])
lls = np.array(zip(*lls)).reshape(-1,resolution,resolution)
cgs = np.array(zip(*cgs)).reshape(-1,resolution,resolution)
for ll,cg in zip(lls,cgs):
pb.figure()
pb.contourf(xx,yy,ll,100,cmap=pb.cm.gray)
pb.colorbar()
try:
pb.contour(xx,yy,np.exp(ll),colors='k')
except:
pass
pb.scatter(xx.flatten(),yy.flatten(),20,np.log(np.abs(cg.flatten())),cmap=pb.cm.jet,linewidth=0)
pb.colorbar()

View file

@ -3,7 +3,7 @@
import os import os
from numpy.distutils.core import Extension, setup from numpy.distutils.core import Extension, setup
from sphinx.setup_command import BuildDoc #from sphinx.setup_command import BuildDoc
# Version number # Version number
version = '0.1.3' version = '0.1.3'
@ -19,7 +19,7 @@ setup(name = 'GPy',
license = "BSD 3-clause", license = "BSD 3-clause",
keywords = "machine-learning gaussian-processes kernels", keywords = "machine-learning gaussian-processes kernels",
url = "http://ml.sheffield.ac.uk/GPy/", url = "http://ml.sheffield.ac.uk/GPy/",
packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples'], packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples', 'GPy.likelihoods'],
package_dir={'GPy': 'GPy'}, package_dir={'GPy': 'GPy'},
package_data = {'GPy': ['GPy/examples']}, package_data = {'GPy': ['GPy/examples']},
py_modules = ['GPy.__init__'], py_modules = ['GPy.__init__'],
@ -27,8 +27,11 @@ setup(name = 'GPy',
#ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py', #ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py',
# sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])], # sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'], install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
setup_requires=['sphinx'], extras_require = {
cmdclass = {'build_sphinx': BuildDoc}, 'docs':['Sphinx', 'ipython'],
},
#setup_requires=['sphinx'],
#cmdclass = {'build_sphinx': BuildDoc},
classifiers=[ classifiers=[
"Development Status :: 1 - Alpha", "Development Status :: 1 - Alpha",
"Topic :: Machine Learning", "Topic :: Machine Learning",