merged with master
17
.travis.yml
|
|
@ -1,11 +1,20 @@
|
||||||
language: python
|
language: python
|
||||||
python:
|
python:
|
||||||
- "2.7"
|
- "2.7"
|
||||||
|
|
||||||
|
#Set virtual env with system-site-packages to true
|
||||||
|
virtualenv:
|
||||||
|
system_site_packages: true
|
||||||
|
|
||||||
# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
|
# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
|
||||||
install:
|
before_install:
|
||||||
- sudo apt-get install python-scipy
|
- sudo apt-get install -qq python-scipy python-pip
|
||||||
- pip install sphinx
|
- sudo apt-get install -qq python-matplotlib
|
||||||
|
|
||||||
|
install:
|
||||||
|
- pip install sphinx
|
||||||
|
- pip install nose
|
||||||
- pip install . --use-mirrors
|
- pip install . --use-mirrors
|
||||||
# command to run tests, e.g. python setup.py test
|
# command to run tests, e.g. python setup.py test
|
||||||
script:
|
script:
|
||||||
- nosetests --with-xcoverage --with-xunit --cover-package=GPy --cover-erase GPy/testing
|
- nosetests GPy/testing
|
||||||
|
|
@ -7,5 +7,5 @@ import models
|
||||||
import inference
|
import inference
|
||||||
import util
|
import util
|
||||||
import examples
|
import examples
|
||||||
#import examples TODO: discuss!
|
|
||||||
from core import priors
|
from core import priors
|
||||||
|
import likelihoods
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ from parameterised import parameterised, truncate_pad
|
||||||
import priors
|
import priors
|
||||||
from ..util.linalg import jitchol
|
from ..util.linalg import jitchol
|
||||||
from ..inference import optimization
|
from ..inference import optimization
|
||||||
|
from .. import likelihoods
|
||||||
|
|
||||||
class model(parameterised):
|
class model(parameterised):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
@ -82,7 +83,7 @@ class model(parameterised):
|
||||||
|
|
||||||
def get(self,name, return_names=False):
|
def get(self,name, return_names=False):
|
||||||
"""
|
"""
|
||||||
Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
||||||
"""
|
"""
|
||||||
matches = self.grep_param_names(name)
|
matches = self.grep_param_names(name)
|
||||||
if len(matches):
|
if len(matches):
|
||||||
|
|
@ -107,7 +108,7 @@ class model(parameterised):
|
||||||
|
|
||||||
def get_gradient(self,name, return_names=False):
|
def get_gradient(self,name, return_names=False):
|
||||||
"""
|
"""
|
||||||
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
||||||
"""
|
"""
|
||||||
matches = self.grep_param_names(name)
|
matches = self.grep_param_names(name)
|
||||||
if len(matches):
|
if len(matches):
|
||||||
|
|
@ -303,54 +304,62 @@ class model(parameterised):
|
||||||
return '\n'.join(s)
|
return '\n'.join(s)
|
||||||
|
|
||||||
|
|
||||||
def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3, return_ratio=False, *args):
|
def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3):
|
||||||
"""
|
"""
|
||||||
Check the gradient of the model by comparing to a numerical estimate.
|
Check the gradient of the model by comparing to a numerical estimate.
|
||||||
If the overall gradient fails, invividual components are tested.
|
If the verbose flag is passed, invividual components are tested (and printed)
|
||||||
|
|
||||||
|
:param verbose: If True, print a "full" checking of each parameter
|
||||||
|
:type verbose: bool
|
||||||
|
:param step: The size of the step around which to linearise the objective
|
||||||
|
:type step: float (defaul 1e-6)
|
||||||
|
:param tolerance: the tolerance allowed (see note)
|
||||||
|
:type tolerance: float (default 1e-3)
|
||||||
|
|
||||||
|
Note:-
|
||||||
|
The gradient is considered correct if the ratio of the analytical
|
||||||
|
and numerical gradients is within <tolerance> of unity.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
x = self._get_params_transformed().copy()
|
x = self._get_params_transformed().copy()
|
||||||
|
|
||||||
#choose a random direction to step in:
|
if not verbose:
|
||||||
dx = step*np.sign(np.random.uniform(-1,1,x.size))
|
#just check the global ratio
|
||||||
|
dx = step*np.sign(np.random.uniform(-1,1,x.size))
|
||||||
|
|
||||||
#evaulate around the point x
|
#evaulate around the point x
|
||||||
self._set_params_transformed(x+dx)
|
self._set_params_transformed(x+dx)
|
||||||
f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
|
f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
|
||||||
self._set_params_transformed(x-dx)
|
self._set_params_transformed(x-dx)
|
||||||
f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
|
f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
|
||||||
self._set_params_transformed(x)
|
self._set_params_transformed(x)
|
||||||
gradient = self._log_likelihood_gradients_transformed()
|
gradient = self._log_likelihood_gradients_transformed()
|
||||||
|
|
||||||
numerical_gradient = (f1-f2)/(2*dx)
|
numerical_gradient = (f1-f2)/(2*dx)
|
||||||
global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
|
global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
|
||||||
if verbose:
|
|
||||||
print "Gradient ratio = ", global_ratio, '\n'
|
|
||||||
sys.stdout.flush()
|
|
||||||
|
|
||||||
if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
|
if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
|
||||||
if verbose:
|
return True
|
||||||
print 'Gradcheck passed'
|
else:
|
||||||
|
return False
|
||||||
else:
|
else:
|
||||||
if verbose:
|
#check the gradient of each parameter individually, and do some pretty printing
|
||||||
print "Global check failed. Testing individual gradients\n"
|
try:
|
||||||
|
names = self._get_param_names_transformed()
|
||||||
|
except NotImplementedError:
|
||||||
|
names = ['Variable %i'%i for i in range(len(x))]
|
||||||
|
|
||||||
try:
|
# Prepare for pretty-printing
|
||||||
names = self._get_param_names_transformed()
|
header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
|
||||||
except NotImplementedError:
|
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
|
||||||
names = ['Variable %i'%i for i in range(len(x))]
|
float_len = 10
|
||||||
|
cols = [max_names]
|
||||||
# Prepare for pretty-printing
|
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
|
||||||
header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
|
cols = np.array(cols) + 5
|
||||||
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
|
header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
|
||||||
float_len = 10
|
header_string = map(lambda x: '|'.join(x), [header_string])
|
||||||
cols = [max_names]
|
separator = '-'*len(header_string[0])
|
||||||
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
|
print '\n'.join([header_string[0], separator])
|
||||||
cols = np.array(cols) + 5
|
|
||||||
header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
|
|
||||||
header_string = map(lambda x: '|'.join(x), [header_string])
|
|
||||||
separator = '-'*len(header_string[0])
|
|
||||||
print '\n'.join([header_string[0], separator])
|
|
||||||
|
|
||||||
for i in range(len(x)):
|
for i in range(len(x)):
|
||||||
xx = x.copy()
|
xx = x.copy()
|
||||||
|
|
@ -368,27 +377,52 @@ class model(parameterised):
|
||||||
ratio = (f1-f2)/(2*step*gradient)
|
ratio = (f1-f2)/(2*step*gradient)
|
||||||
difference = np.abs((f1-f2)/2/step - gradient)
|
difference = np.abs((f1-f2)/2/step - gradient)
|
||||||
|
|
||||||
if verbose:
|
if (np.abs(ratio-1)<tolerance):
|
||||||
if (np.abs(ratio-1)<tolerance):
|
formatted_name = "\033[92m {0} \033[0m".format(names[i])
|
||||||
formatted_name = "\033[92m {0} \033[0m".format(names[i])
|
else:
|
||||||
else:
|
formatted_name = "\033[91m {0} \033[0m".format(names[i])
|
||||||
formatted_name = "\033[91m {0} \033[0m".format(names[i])
|
r = '%.6f' % float(ratio)
|
||||||
r = '%.6f' % float(ratio)
|
d = '%.6f' % float(difference)
|
||||||
d = '%.6f' % float(difference)
|
g = '%.6f' % gradient
|
||||||
g = '%.6f' % gradient
|
ng = '%.6f' % float(numerical_gradient)
|
||||||
ng = '%.6f' % float(numerical_gradient)
|
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
|
||||||
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
|
print grad_string
|
||||||
print grad_string
|
|
||||||
|
|
||||||
if verbose:
|
def EPEM(self,epsilon=.1,**kwargs):
|
||||||
print ''
|
"""
|
||||||
|
TODO: Should this not bein the GP class?
|
||||||
|
Expectation maximization for Expectation Propagation.
|
||||||
|
|
||||||
if return_ratio:
|
kwargs are passed to the optimize function. They can be:
|
||||||
return global_ratio
|
|
||||||
|
:epsilon: convergence criterion
|
||||||
|
:max_f_eval: maximum number of function evaluations
|
||||||
|
:messages: whether to display during optimisation
|
||||||
|
:param optimzer: whice optimizer to use (defaults to self.preferred optimizer)
|
||||||
|
:type optimzer: string TODO: valid strings?
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert isinstance(self.likelihood,likelihoods.EP), "EM is not available for Gaussian likelihoods"
|
||||||
|
log_change = epsilon + 1.
|
||||||
|
self.log_likelihood_record = []
|
||||||
|
self.gp_params_record = []
|
||||||
|
self.ep_params_record = []
|
||||||
|
iteration = 0
|
||||||
|
last_value = -np.exp(1000)
|
||||||
|
while log_change > epsilon or not iteration:
|
||||||
|
print 'EM iteration %s' %iteration
|
||||||
|
self.update_likelihood_approximation()
|
||||||
|
self.optimize(**kwargs)
|
||||||
|
new_value = self.log_likelihood()
|
||||||
|
log_change = new_value - last_value
|
||||||
|
if log_change > epsilon:
|
||||||
|
self.log_likelihood_record.append(new_value)
|
||||||
|
self.gp_params_record.append(self._get_params())
|
||||||
|
#self.ep_params_record.append((self.beta,self.Y,self.Z_ep))
|
||||||
|
last_value = new_value
|
||||||
else:
|
else:
|
||||||
return False
|
convergence = False
|
||||||
|
#self.beta, self.Y, self.Z_ep = self.ep_params_record[-1]
|
||||||
if return_ratio:
|
self._set_params(self.gp_params_record[-1])
|
||||||
return global_ratio
|
print "Log-likelihood decrement: %s \nLast iteration discarded." %log_change
|
||||||
else:
|
iteration += 1
|
||||||
return True
|
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,11 @@ class parameterised(object):
|
||||||
else:
|
else:
|
||||||
return expr
|
return expr
|
||||||
|
|
||||||
|
def Nparam_transformed(self):
|
||||||
|
ties = 0
|
||||||
|
for ar in self.tied_indices:
|
||||||
|
ties += ar.size - 1
|
||||||
|
return self.Nparam - len(self.constrained_fixed_indices) - ties
|
||||||
|
|
||||||
def constrain_positive(self, which):
|
def constrain_positive(self, which):
|
||||||
"""
|
"""
|
||||||
|
|
@ -149,8 +154,6 @@ class parameterised(object):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def constrain_negative(self,which):
|
def constrain_negative(self,which):
|
||||||
"""
|
"""
|
||||||
Set negative constraints.
|
Set negative constraints.
|
||||||
|
|
|
||||||
|
|
@ -16,8 +16,13 @@ k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
|
||||||
K = k.K(X)
|
K = k.K(X)
|
||||||
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
|
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
|
||||||
|
|
||||||
|
<<<<<<< HEAD
|
||||||
k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
|
k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
|
||||||
# k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q, 0.00001)
|
# k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q, 0.00001)
|
||||||
|
=======
|
||||||
|
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
||||||
|
k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
|
||||||
|
>>>>>>> master
|
||||||
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
|
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
|
||||||
m.constrain_positive('(rbf|bias|noise|white|S)')
|
m.constrain_positive('(rbf|bias|noise|white|S)')
|
||||||
# m.constrain_fixed('S', 1)
|
# m.constrain_fixed('S', 1)
|
||||||
|
|
|
||||||
|
|
@ -3,16 +3,15 @@
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Simple Gaussian Processes classification
|
Gaussian Processes classification
|
||||||
"""
|
"""
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import GPy
|
import GPy
|
||||||
|
|
||||||
default_seed=10000
|
default_seed=10000
|
||||||
######################################
|
|
||||||
## 2 dimensional example
|
def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME
|
||||||
def crescent_data(model_type='Full', inducing=10, seed=default_seed):
|
|
||||||
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
|
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
|
||||||
|
|
||||||
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
|
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
|
||||||
|
|
@ -21,20 +20,28 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
|
||||||
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
|
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
|
||||||
:type inducing: int
|
:type inducing: int
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data = GPy.util.datasets.crescent_data(seed=seed)
|
data = GPy.util.datasets.crescent_data(seed=seed)
|
||||||
likelihood = GPy.inference.likelihoods.probit(data['Y'])
|
|
||||||
|
# Kernel object
|
||||||
|
kernel = GPy.kern.rbf(data['X'].shape[1])
|
||||||
|
|
||||||
|
# Likelihood object
|
||||||
|
distribution = GPy.likelihoods.likelihood_functions.probit()
|
||||||
|
likelihood = GPy.likelihoods.EP(data['Y'],distribution)
|
||||||
|
|
||||||
|
|
||||||
if model_type=='Full':
|
if model_type=='Full':
|
||||||
m = GPy.models.GP_EP(data['X'],likelihood)
|
m = GPy.models.GP(data['X'],likelihood,kernel)
|
||||||
else:
|
else:
|
||||||
# create sparse GP EP model
|
# create sparse GP EP model
|
||||||
m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
|
m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
|
||||||
|
|
||||||
m.approximate_likelihood()
|
m.update_likelihood_approximation()
|
||||||
print(m)
|
print(m)
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.em()
|
m.optimize()
|
||||||
print(m)
|
print(m)
|
||||||
|
|
||||||
# plot
|
# plot
|
||||||
|
|
@ -42,54 +49,67 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def oil():
|
def oil():
|
||||||
"""Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood."""
|
"""
|
||||||
|
Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
|
||||||
|
"""
|
||||||
data = GPy.util.datasets.oil()
|
data = GPy.util.datasets.oil()
|
||||||
likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
|
# Kernel object
|
||||||
|
kernel = GPy.kern.rbf(12)
|
||||||
|
|
||||||
# create simple GP model
|
# Likelihood object
|
||||||
m = GPy.models.GP_EP(data['X'],likelihood)
|
distribution = GPy.likelihoods.likelihood_functions.probit()
|
||||||
|
likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1],distribution)
|
||||||
|
|
||||||
# contrain all parameters to be positive
|
# Create GP model
|
||||||
|
m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
|
||||||
|
|
||||||
|
# Contrain all parameters to be positive
|
||||||
m.constrain_positive('')
|
m.constrain_positive('')
|
||||||
m.tie_param('lengthscale')
|
m.tie_param('lengthscale')
|
||||||
m.approximate_likelihood()
|
m.update_likelihood_approximation()
|
||||||
|
|
||||||
# optimize
|
# Optimize
|
||||||
m.optimize()
|
m.optimize()
|
||||||
|
|
||||||
# plot
|
|
||||||
#m.plot()
|
|
||||||
print(m)
|
print(m)
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def toy_linear_1d_classification(model_type='Full', inducing=4, seed=default_seed):
|
def toy_linear_1d_classification(seed=default_seed):
|
||||||
"""Simple 1D classification example.
|
"""
|
||||||
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
|
Simple 1D classification example
|
||||||
:param seed : seed value for data generation (default is 4).
|
:param seed : seed value for data generation (default is 4).
|
||||||
:type seed: int
|
:type seed: int
|
||||||
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
|
|
||||||
:type inducing: int
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
||||||
likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
|
Y = data['Y'][:, 0:1]
|
||||||
assert model_type in ('Full','DTC','FITC')
|
Y[Y == -1] = 0
|
||||||
|
|
||||||
# create simple GP model
|
# Kernel object
|
||||||
if model_type=='Full':
|
kernel = GPy.kern.rbf(1)
|
||||||
m = GPy.models.simple_GP_EP(data['X'],likelihood)
|
|
||||||
else:
|
|
||||||
# create sparse GP EP model
|
|
||||||
m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
|
|
||||||
|
|
||||||
|
|
||||||
m.constrain_positive('var')
|
# Likelihood object
|
||||||
m.constrain_positive('len')
|
distribution = GPy.likelihoods.likelihood_functions.probit()
|
||||||
m.tie_param('lengthscale')
|
likelihood = GPy.likelihoods.EP(Y,distribution)
|
||||||
m.approximate_likelihood()
|
|
||||||
|
|
||||||
# Optimize and plot
|
# Model definition
|
||||||
m.em(plot_all=False) # EM algorithm
|
m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
|
||||||
|
|
||||||
|
# Optimize
|
||||||
|
"""
|
||||||
|
EPEM runs a loop that consists of two steps:
|
||||||
|
1) EP likelihood approximation:
|
||||||
|
m.update_likelihood_approximation()
|
||||||
|
2) Parameters optimization:
|
||||||
|
m.optimize()
|
||||||
|
"""
|
||||||
|
m.EPEM()
|
||||||
|
|
||||||
|
# Plot
|
||||||
|
pb.subplot(211)
|
||||||
|
m.plot_f()
|
||||||
|
pb.subplot(212)
|
||||||
m.plot()
|
m.plot()
|
||||||
|
|
||||||
print(m)
|
print(m)
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
|
||||||
47
GPy/examples/poisson.py
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Gaussian Processes + Expectation Propagation - Poisson Likelihood
|
||||||
|
"""
|
||||||
|
import pylab as pb
|
||||||
|
import numpy as np
|
||||||
|
import GPy
|
||||||
|
|
||||||
|
default_seed=10000
|
||||||
|
|
||||||
|
def toy_1d(seed=default_seed):
|
||||||
|
"""
|
||||||
|
Simple 1D classification example
|
||||||
|
:param seed : seed value for data generation (default is 4).
|
||||||
|
:type seed: int
|
||||||
|
"""
|
||||||
|
|
||||||
|
X = np.arange(0,100,5)[:,None]
|
||||||
|
F = np.round(np.sin(X/18.) + .1*X) + np.arange(5,25)[:,None]
|
||||||
|
E = np.random.randint(-5,5,20)[:,None]
|
||||||
|
Y = F + E
|
||||||
|
|
||||||
|
kernel = GPy.kern.rbf(1)
|
||||||
|
distribution = GPy.likelihoods.likelihood_functions.Poisson()
|
||||||
|
likelihood = GPy.likelihoods.EP(Y,distribution)
|
||||||
|
|
||||||
|
m = GPy.models.GP(X,likelihood,kernel)
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
|
||||||
|
# Approximate likelihood
|
||||||
|
m.update_likelihood_approximation()
|
||||||
|
|
||||||
|
# Optimize and plot
|
||||||
|
m.optimize()
|
||||||
|
#m.EPEM FIXME
|
||||||
|
print m
|
||||||
|
|
||||||
|
# Plot
|
||||||
|
pb.subplot(211)
|
||||||
|
m.plot_f() #GP plot
|
||||||
|
pb.subplot(212)
|
||||||
|
m.plot() #Output plot
|
||||||
|
|
||||||
|
return m
|
||||||
60
GPy/examples/sparse_ep_fix.py
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
"""
|
||||||
|
Sparse Gaussian Processes regression with an RBF kernel
|
||||||
|
"""
|
||||||
|
import pylab as pb
|
||||||
|
import numpy as np
|
||||||
|
import GPy
|
||||||
|
np.random.seed(2)
|
||||||
|
pb.ion()
|
||||||
|
N = 500
|
||||||
|
M = 5
|
||||||
|
|
||||||
|
pb.close('all')
|
||||||
|
######################################
|
||||||
|
## 1 dimensional example
|
||||||
|
|
||||||
|
# sample inputs and outputs
|
||||||
|
X = np.random.uniform(-3.,3.,(N,1))
|
||||||
|
#Y = np.sin(X)+np.random.randn(N,1)*0.05
|
||||||
|
F = np.sin(X)+np.random.randn(N,1)*0.05
|
||||||
|
Y = np.ones([F.shape[0],1])
|
||||||
|
Y[F<0] = -1
|
||||||
|
likelihood = GPy.inference.likelihoods.probit(Y)
|
||||||
|
|
||||||
|
# construct kernel
|
||||||
|
rbf = GPy.kern.rbf(1)
|
||||||
|
noise = GPy.kern.white(1)
|
||||||
|
kernel = rbf + noise
|
||||||
|
|
||||||
|
# create simple GP model
|
||||||
|
#m = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
|
||||||
|
|
||||||
|
# contrain all parameters to be positive
|
||||||
|
#m.constrain_fixed('prec',100.)
|
||||||
|
m = GPy.models.sparse_GP(X, Y, kernel, M=M)
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
#if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian):
|
||||||
|
# m.approximate_likelihood()
|
||||||
|
print m.checkgrad()
|
||||||
|
m.optimize('tnc', messages = 1)
|
||||||
|
m.plot(samples=3)
|
||||||
|
print m
|
||||||
|
|
||||||
|
n = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
|
||||||
|
n.ensure_default_constraints()
|
||||||
|
if not isinstance(n.likelihood,GPy.inference.likelihoods.gaussian):
|
||||||
|
n.approximate_likelihood()
|
||||||
|
print n.checkgrad()
|
||||||
|
pb.figure()
|
||||||
|
n.plot()
|
||||||
|
|
||||||
|
"""
|
||||||
|
m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
print m.checkgrad()
|
||||||
|
"""
|
||||||
|
|
@ -1,240 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import random
|
|
||||||
from scipy import stats, linalg
|
|
||||||
from .likelihoods import likelihood
|
|
||||||
from ..core import model
|
|
||||||
from ..util.linalg import pdinv,mdot,jitchol
|
|
||||||
from ..util.plot import gpplot
|
|
||||||
from .. import kern
|
|
||||||
|
|
||||||
class EP_base:
|
|
||||||
"""
|
|
||||||
Expectation Propagation.
|
|
||||||
|
|
||||||
This is just the base class for expectation propagation. We'll extend it for full and sparse EP.
|
|
||||||
"""
|
|
||||||
def __init__(self,likelihood,epsilon=1e-3,powerep=[1.,1.]):
|
|
||||||
self.likelihood = likelihood
|
|
||||||
self.epsilon = epsilon
|
|
||||||
self.eta, self.delta = powerep
|
|
||||||
self.jitter = 1e-12
|
|
||||||
|
|
||||||
#Initial values - Likelihood approximation parameters:
|
|
||||||
#p(y|f) = t(f|tau_tilde,v_tilde)
|
|
||||||
self.restart_EP()
|
|
||||||
|
|
||||||
def restart_EP(self):
|
|
||||||
"""
|
|
||||||
Set the EP approximation to initial state
|
|
||||||
"""
|
|
||||||
self.tau_tilde = np.zeros(self.N)
|
|
||||||
self.v_tilde = np.zeros(self.N)
|
|
||||||
self.mu = np.zeros(self.N)
|
|
||||||
|
|
||||||
class Full(EP_base):
|
|
||||||
"""
|
|
||||||
:param likelihood: Output's likelihood (e.g. probit)
|
|
||||||
:type likelihood: GPy.inference.likelihood instance
|
|
||||||
:param K: prior covariance matrix
|
|
||||||
:type K: np.ndarray (N x N)
|
|
||||||
:param likelihood: Output's likelihood (e.g. probit)
|
|
||||||
:type likelihood: GPy.inference.likelihood instance
|
|
||||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
|
||||||
:param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
|
|
||||||
"""
|
|
||||||
def __init__(self,K,likelihood,*args,**kwargs):
|
|
||||||
assert K.shape[0] == K.shape[1]
|
|
||||||
self.K = K
|
|
||||||
self.N = self.K.shape[0]
|
|
||||||
EP_base.__init__(self,likelihood,*args,**kwargs)
|
|
||||||
def fit_EP(self,messages=False):
|
|
||||||
"""
|
|
||||||
The expectation-propagation algorithm.
|
|
||||||
For nomenclature see Rasmussen & Williams 2006 (pag. 52-60)
|
|
||||||
"""
|
|
||||||
#Prior distribution parameters: p(f|X) = N(f|0,K)
|
|
||||||
#self.K = self.kernel.K(self.X,self.X)
|
|
||||||
|
|
||||||
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
|
||||||
self.mu=np.zeros(self.N)
|
|
||||||
self.Sigma=self.K.copy()
|
|
||||||
|
|
||||||
"""
|
|
||||||
Initial values - Cavity distribution parameters:
|
|
||||||
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
|
|
||||||
sigma_ = 1./tau_
|
|
||||||
mu_ = v_/tau_
|
|
||||||
"""
|
|
||||||
|
|
||||||
self.tau_ = np.empty(self.N,dtype=np.float64)
|
|
||||||
self.v_ = np.empty(self.N,dtype=np.float64)
|
|
||||||
|
|
||||||
#Initial values - Marginal moments
|
|
||||||
z = np.empty(self.N,dtype=np.float64)
|
|
||||||
self.Z_hat = np.empty(self.N,dtype=np.float64)
|
|
||||||
phi = np.empty(self.N,dtype=np.float64)
|
|
||||||
mu_hat = np.empty(self.N,dtype=np.float64)
|
|
||||||
sigma2_hat = np.empty(self.N,dtype=np.float64)
|
|
||||||
|
|
||||||
#Approximation
|
|
||||||
epsilon_np1 = self.epsilon + 1.
|
|
||||||
epsilon_np2 = self.epsilon + 1.
|
|
||||||
self.iterations = 0
|
|
||||||
self.np1 = [self.tau_tilde.copy()]
|
|
||||||
self.np2 = [self.v_tilde.copy()]
|
|
||||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
|
||||||
update_order = np.random.permutation(self.N)
|
|
||||||
for i in update_order:
|
|
||||||
#Cavity distribution parameters
|
|
||||||
self.tau_[i] = 1./self.Sigma[i,i] - self.eta*self.tau_tilde[i]
|
|
||||||
self.v_[i] = self.mu[i]/self.Sigma[i,i] - self.eta*self.v_tilde[i]
|
|
||||||
#Marginal moments
|
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
|
|
||||||
#Site parameters update
|
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma[i,i])
|
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma[i,i])
|
|
||||||
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
|
|
||||||
self.v_tilde[i] = self.v_tilde[i] + Delta_v
|
|
||||||
#Posterior distribution parameters update
|
|
||||||
si=self.Sigma[:,i].reshape(self.N,1)
|
|
||||||
self.Sigma = self.Sigma - Delta_tau/(1.+ Delta_tau*self.Sigma[i,i])*np.dot(si,si.T)
|
|
||||||
self.mu = np.dot(self.Sigma,self.v_tilde)
|
|
||||||
self.iterations += 1
|
|
||||||
#Sigma recomptutation with Cholesky decompositon
|
|
||||||
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*(self.K)
|
|
||||||
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
|
|
||||||
L = jitchol(B)
|
|
||||||
V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
|
|
||||||
self.Sigma = self.K - np.dot(V.T,V)
|
|
||||||
self.mu = np.dot(self.Sigma,self.v_tilde)
|
|
||||||
epsilon_np1 = np.mean(self.tau_tilde-self.np1[-1]**2)
|
|
||||||
epsilon_np2 = np.mean(self.v_tilde-self.np2[-1]**2)
|
|
||||||
self.np1.append(self.tau_tilde.copy())
|
|
||||||
self.np2.append(self.v_tilde.copy())
|
|
||||||
if messages:
|
|
||||||
print "EP iteration %i, epsiolon %d"%(self.iterations,epsilon_np1)
|
|
||||||
|
|
||||||
class FITC(EP_base):
|
|
||||||
"""
|
|
||||||
:param likelihood: Output's likelihood (e.g. probit)
|
|
||||||
:type likelihood: GPy.inference.likelihood instance
|
|
||||||
:param Knn_diag: The diagonal elements of Knn is a 1D vector
|
|
||||||
:param Kmn: The 'cross' variance between inducing inputs and data
|
|
||||||
:param Kmm: the covariance matrix of the inducing inputs
|
|
||||||
:param likelihood: Output's likelihood (e.g. probit)
|
|
||||||
:type likelihood: GPy.inference.likelihood instance
|
|
||||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
|
||||||
:param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
|
|
||||||
"""
|
|
||||||
def __init__(self,likelihood,Knn_diag,Kmn,Kmm,*args,**kwargs):
|
|
||||||
self.Knn_diag = Knn_diag
|
|
||||||
self.Kmn = Kmn
|
|
||||||
self.Kmm = Kmm
|
|
||||||
self.M = self.Kmn.shape[0]
|
|
||||||
self.N = self.Kmn.shape[1]
|
|
||||||
assert self.M <= self.N, 'The number of inducing inputs must be smaller than the number of observations'
|
|
||||||
assert len(Knn_diag) == self.N, 'Knn_diagonal has size different from N'
|
|
||||||
EP_base.__init__(self,likelihood,*args,**kwargs)
|
|
||||||
|
|
||||||
def fit_EP(self):
|
|
||||||
"""
|
|
||||||
The expectation-propagation algorithm with sparse pseudo-input.
|
|
||||||
For nomenclature see Naish-Guzman and Holden, 2008.
|
|
||||||
"""
|
|
||||||
|
|
||||||
"""
|
|
||||||
Prior approximation parameters:
|
|
||||||
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
|
||||||
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
|
|
||||||
"""
|
|
||||||
self.Kmmi, self.Kmm_hld = pdinv(self.Kmm)
|
|
||||||
self.P0 = self.Kmn.T
|
|
||||||
self.KmnKnm = np.dot(self.P0.T, self.P0)
|
|
||||||
self.KmmiKmn = np.dot(self.Kmmi,self.P0.T)
|
|
||||||
self.Qnn_diag = np.sum(self.P0.T*self.KmmiKmn,-2)
|
|
||||||
self.Diag0 = self.Knn_diag - self.Qnn_diag
|
|
||||||
self.R0 = jitchol(self.Kmmi).T
|
|
||||||
|
|
||||||
"""
|
|
||||||
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
|
||||||
Sigma = Diag + P*R.T*R*P.T + K
|
|
||||||
mu = w + P*gamma
|
|
||||||
"""
|
|
||||||
self.w = np.zeros(self.N)
|
|
||||||
self.gamma = np.zeros(self.M)
|
|
||||||
self.mu = np.zeros(self.N)
|
|
||||||
self.P = self.P0.copy()
|
|
||||||
self.R = self.R0.copy()
|
|
||||||
self.Diag = self.Diag0.copy()
|
|
||||||
self.Sigma_diag = self.Knn_diag
|
|
||||||
|
|
||||||
"""
|
|
||||||
Initial values - Cavity distribution parameters:
|
|
||||||
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
|
||||||
sigma_ = 1./tau_
|
|
||||||
mu_ = v_/tau_
|
|
||||||
"""
|
|
||||||
self.tau_ = np.empty(self.N,dtype=np.float64)
|
|
||||||
self.v_ = np.empty(self.N,dtype=np.float64)
|
|
||||||
|
|
||||||
#Initial values - Marginal moments
|
|
||||||
z = np.empty(self.N,dtype=np.float64)
|
|
||||||
self.Z_hat = np.empty(self.N,dtype=np.float64)
|
|
||||||
phi = np.empty(self.N,dtype=np.float64)
|
|
||||||
mu_hat = np.empty(self.N,dtype=np.float64)
|
|
||||||
sigma2_hat = np.empty(self.N,dtype=np.float64)
|
|
||||||
|
|
||||||
#Approximation
|
|
||||||
epsilon_np1 = 1
|
|
||||||
epsilon_np2 = 1
|
|
||||||
self.iterations = 0
|
|
||||||
self.np1 = [self.tau_tilde.copy()]
|
|
||||||
self.np2 = [self.v_tilde.copy()]
|
|
||||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
|
||||||
update_order = np.arange(self.N)
|
|
||||||
random.shuffle(update_order)
|
|
||||||
for i in update_order:
|
|
||||||
#Cavity distribution parameters
|
|
||||||
self.tau_[i] = 1./self.Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
|
||||||
self.v_[i] = self.mu[i]/self.Sigma_diag[i] - self.eta*self.v_tilde[i]
|
|
||||||
#Marginal moments
|
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood.moments_match(i,self.tau_[i],self.v_[i])
|
|
||||||
#Site parameters update
|
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./self.Sigma_diag[i])
|
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - self.mu[i]/self.Sigma_diag[i])
|
|
||||||
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
|
|
||||||
self.v_tilde[i] = self.v_tilde[i] + Delta_v
|
|
||||||
#Posterior distribution parameters update
|
|
||||||
dtd1 = Delta_tau*self.Diag[i] + 1.
|
|
||||||
dii = self.Diag[i]
|
|
||||||
self.Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
|
|
||||||
pi_ = self.P[i,:].reshape(1,self.M)
|
|
||||||
self.P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
|
|
||||||
Rp_i = np.dot(self.R,pi_.T)
|
|
||||||
RTR = np.dot(self.R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*self.Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),self.R))
|
|
||||||
self.R = jitchol(RTR).T
|
|
||||||
self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
|
|
||||||
self.gamma = self.gamma + (Delta_v - Delta_tau*self.mu[i])*np.dot(RTR,self.P[i,:].T)
|
|
||||||
self.RPT = np.dot(self.R,self.P.T)
|
|
||||||
self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
|
|
||||||
self.mu = self.w + np.dot(self.P,self.gamma)
|
|
||||||
self.iterations += 1
|
|
||||||
#Sigma recomptutation with Cholesky decompositon
|
|
||||||
self.Diag = self.Diag0/(1.+ self.Diag0 * self.tau_tilde)
|
|
||||||
self.P = (self.Diag / self.Diag0)[:,None] * self.P0
|
|
||||||
self.RPT0 = np.dot(self.R0,self.P0.T)
|
|
||||||
L = jitchol(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
|
|
||||||
self.R,info = linalg.flapack.dtrtrs(L,self.R0,lower=1)
|
|
||||||
self.RPT = np.dot(self.R,self.P.T)
|
|
||||||
self.Sigma_diag = self.Diag + np.sum(self.RPT.T*self.RPT.T,-1)
|
|
||||||
self.w = self.Diag * self.v_tilde
|
|
||||||
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.v_tilde))
|
|
||||||
self.mu = self.w + np.dot(self.P,self.gamma)
|
|
||||||
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
|
|
||||||
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
|
|
||||||
self.np1.append(self.tau_tilde.copy())
|
|
||||||
self.np2.append(self.v_tilde.copy())
|
|
||||||
|
|
@ -1,219 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import stats
|
|
||||||
import scipy as sp
|
|
||||||
import pylab as pb
|
|
||||||
from ..util.plot import gpplot
|
|
||||||
|
|
||||||
class likelihood:
|
|
||||||
def __init__(self,Y,location=0,scale=1):
|
|
||||||
"""
|
|
||||||
Likelihood class for doing Expectation propagation
|
|
||||||
|
|
||||||
:param Y: observed output (Nx1 numpy.darray)
|
|
||||||
..Note:: Y values allowed depend on the likelihood used
|
|
||||||
"""
|
|
||||||
self.Y = Y
|
|
||||||
self.N = self.Y.shape[0]
|
|
||||||
self.location = location
|
|
||||||
self.scale = scale
|
|
||||||
|
|
||||||
def plot1Da(self,X_new,Mean_new,Var_new,X_u,Mean_u,Var_u):
|
|
||||||
"""
|
|
||||||
Plot the predictive distribution of the GP model for 1-dimensional inputs
|
|
||||||
|
|
||||||
:param X_new: The points at which to make a prediction
|
|
||||||
:param Mean_new: mean values at X_new
|
|
||||||
:param Var_new: variance values at X_new
|
|
||||||
:param X_u: input (inducing) points used to train the model
|
|
||||||
:param Mean_u: mean values at X_u
|
|
||||||
:param Var_new: variance values at X_u
|
|
||||||
"""
|
|
||||||
assert X_new.shape[1] == 1, 'Number of dimensions must be 1'
|
|
||||||
gpplot(X_new,Mean_new,Var_new)
|
|
||||||
pb.errorbar(X_u,Mean_u,2*np.sqrt(Var_u),fmt='r+')
|
|
||||||
pb.plot(X_u,Mean_u,'ro')
|
|
||||||
|
|
||||||
def plot2D(self,X,X_new,F_new,U=None):
|
|
||||||
"""
|
|
||||||
Predictive distribution of the fitted GP model for 2-dimensional inputs
|
|
||||||
|
|
||||||
:param X_new: The points at which to make a prediction
|
|
||||||
:param Mean_new: mean values at X_new
|
|
||||||
:param Var_new: variance values at X_new
|
|
||||||
:param X_u: input points used to train the model
|
|
||||||
:param Mean_u: mean values at X_u
|
|
||||||
:param Var_new: variance values at X_u
|
|
||||||
"""
|
|
||||||
N,D = X_new.shape
|
|
||||||
assert D == 2, 'Number of dimensions must be 2'
|
|
||||||
n = np.sqrt(N)
|
|
||||||
x1min = X_new[:,0].min()
|
|
||||||
x1max = X_new[:,0].max()
|
|
||||||
x2min = X_new[:,1].min()
|
|
||||||
x2max = X_new[:,1].max()
|
|
||||||
pb.imshow(F_new.reshape(n,n),extent=(x1min,x1max,x2max,x2min),vmin=0,vmax=1)
|
|
||||||
pb.colorbar()
|
|
||||||
C1 = np.arange(self.N)[self.Y.flatten()==1]
|
|
||||||
C2 = np.arange(self.N)[self.Y.flatten()==-1]
|
|
||||||
[pb.plot(X[i,0],X[i,1],'ro') for i in C1]
|
|
||||||
[pb.plot(X[i,0],X[i,1],'bo') for i in C2]
|
|
||||||
pb.xlim(x1min,x1max)
|
|
||||||
pb.ylim(x2min,x2max)
|
|
||||||
if U is not None:
|
|
||||||
[pb.plot(a,b,'wo') for a,b in U]
|
|
||||||
|
|
||||||
class probit(likelihood):
|
|
||||||
"""
|
|
||||||
Probit likelihood
|
|
||||||
Y is expected to take values in {-1,1}
|
|
||||||
-----
|
|
||||||
$$
|
|
||||||
L(x) = \\Phi (Y_i*f_i)
|
|
||||||
$$
|
|
||||||
"""
|
|
||||||
def moments_match(self,i,tau_i,v_i):
|
|
||||||
"""
|
|
||||||
Moments match of the marginal approximation in EP algorithm
|
|
||||||
|
|
||||||
:param i: number of observation (int)
|
|
||||||
:param tau_i: precision of the cavity distribution (float)
|
|
||||||
:param v_i: mean/variance of the cavity distribution (float)
|
|
||||||
"""
|
|
||||||
z = self.Y[i]*v_i/np.sqrt(tau_i**2 + tau_i)
|
|
||||||
Z_hat = stats.norm.cdf(z)
|
|
||||||
phi = stats.norm.pdf(z)
|
|
||||||
mu_hat = v_i/tau_i + self.Y[i]*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
|
|
||||||
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
|
|
||||||
return Z_hat, mu_hat, sigma2_hat
|
|
||||||
|
|
||||||
def plot1Db(self,X,X_new,F_new,U=None):
|
|
||||||
assert X.shape[1] == 1, 'Number of dimensions must be 1'
|
|
||||||
gpplot(X_new,F_new,np.zeros(X_new.shape[0]))
|
|
||||||
pb.plot(X,(self.Y+1)/2,'kx',mew=1.5)
|
|
||||||
pb.ylim(-0.2,1.2)
|
|
||||||
if U is not None:
|
|
||||||
pb.plot(U,U*0+.5,'r|',mew=1.5,markersize=12)
|
|
||||||
|
|
||||||
def predictive_mean(self,mu,variance):
|
|
||||||
return stats.norm.cdf(mu/np.sqrt(1+variance))
|
|
||||||
|
|
||||||
def _log_likelihood_gradients():
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
class poisson(likelihood):
|
|
||||||
"""
|
|
||||||
Poisson likelihood
|
|
||||||
Y is expected to take values in {0,1,2,...}
|
|
||||||
-----
|
|
||||||
$$
|
|
||||||
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
|
||||||
$$
|
|
||||||
"""
|
|
||||||
def moments_match(self,i,tau_i,v_i):
|
|
||||||
"""
|
|
||||||
Moments match of the marginal approximation in EP algorithm
|
|
||||||
|
|
||||||
:param i: number of observation (int)
|
|
||||||
:param tau_i: precision of the cavity distribution (float)
|
|
||||||
:param v_i: mean/variance of the cavity distribution (float)
|
|
||||||
"""
|
|
||||||
mu = v_i/tau_i
|
|
||||||
sigma = np.sqrt(1./tau_i)
|
|
||||||
def poisson_norm(f):
|
|
||||||
"""
|
|
||||||
Product of the likelihood and the cavity distribution
|
|
||||||
"""
|
|
||||||
pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
|
|
||||||
rate = np.exp( (f*self.scale)+self.location)
|
|
||||||
poisson = stats.poisson.pmf(float(self.Y[i]),rate)
|
|
||||||
return pdf_norm_f*poisson
|
|
||||||
|
|
||||||
def log_pnm(f):
|
|
||||||
"""
|
|
||||||
Log of poisson_norm
|
|
||||||
"""
|
|
||||||
return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*self.Y[i])
|
|
||||||
|
|
||||||
"""
|
|
||||||
Golden Search and Simpson's Rule
|
|
||||||
--------------------------------
|
|
||||||
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
|
|
||||||
Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
|
|
||||||
"""
|
|
||||||
#TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
|
|
||||||
|
|
||||||
#Golden search
|
|
||||||
golden_A = -1 if self.Y[i] == 0 else np.array([np.log(self.Y[i]),mu]).min() #Lower limit
|
|
||||||
golden_B = np.array([np.log(self.Y[i]),mu]).max() #Upper limit
|
|
||||||
golden_A = (golden_A - self.location)/self.scale
|
|
||||||
golden_B = (golden_B - self.location)/self.scale
|
|
||||||
opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
|
|
||||||
|
|
||||||
# Simpson's approximation
|
|
||||||
width = 3./np.log(max(self.Y[i],2))
|
|
||||||
A = opt - width #Lower limit
|
|
||||||
B = opt + width #Upper limit
|
|
||||||
K = 10*int(np.log(max(self.Y[i],150))) #Number of points in the grid, we DON'T want K to be the same number for every case
|
|
||||||
h = (B-A)/K # length of the intervals
|
|
||||||
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
|
|
||||||
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
|
|
||||||
zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
|
|
||||||
first = zeroth*x
|
|
||||||
second = first*x
|
|
||||||
Z_hat = sum(zeroth)*h/3 # Zero-th moment
|
|
||||||
mu_hat = sum(first)*h/(3*Z_hat) # First moment
|
|
||||||
m2 = sum(second)*h/(3*Z_hat) # Second moment
|
|
||||||
sigma2_hat = m2 - mu_hat**2 # Second central moment
|
|
||||||
return float(Z_hat), float(mu_hat), float(sigma2_hat)
|
|
||||||
|
|
||||||
def plot1Db(self,X,X_new,F_new,F2_new=None,U=None):
|
|
||||||
pb.subplot(212)
|
|
||||||
#gpplot(X_new,F_new,np.sqrt(F2_new))
|
|
||||||
pb.plot(X_new,F_new)#,np.sqrt(F2_new)) #FIXME
|
|
||||||
pb.plot(X,self.Y,'kx',mew=1.5)
|
|
||||||
if U is not None:
|
|
||||||
pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12)
|
|
||||||
def predictive_mean(self,mu,variance):
|
|
||||||
return np.exp(mu*self.scale + self.location)
|
|
||||||
def predictive_variance(self,mu,variance):
|
|
||||||
return mu
|
|
||||||
def _log_likelihood_gradients():
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
class gaussian(likelihood):
|
|
||||||
"""
|
|
||||||
Gaussian likelihood
|
|
||||||
Y is expected to take values in (-inf,inf)
|
|
||||||
"""
|
|
||||||
def moments_match(self,i,tau_i,v_i):
|
|
||||||
"""
|
|
||||||
Moments match of the marginal approximation in EP algorithm
|
|
||||||
|
|
||||||
:param i: number of observation (int)
|
|
||||||
:param tau_i: precision of the cavity distribution (float)
|
|
||||||
:param v_i: mean/variance of the cavity distribution (float)
|
|
||||||
"""
|
|
||||||
mu = v_i/tau_i
|
|
||||||
sigma = np.sqrt(1./tau_i)
|
|
||||||
s = 1. if self.Y[i] == 0 else 1./self.Y[i]
|
|
||||||
sigma2_hat = 1./(1./sigma**2 + 1./s**2)
|
|
||||||
mu_hat = sigma2_hat*(mu/sigma**2 + self.Y[i]/s**2)
|
|
||||||
Z_hat = 1./np.sqrt(2*np.pi) * 1./np.sqrt(sigma**2+s**2) * np.exp(-.5*(mu-self.Y[i])**2/(sigma**2 + s**2))
|
|
||||||
return Z_hat, mu_hat, sigma2_hat
|
|
||||||
|
|
||||||
def plot1Db(self,X,X_new,F_new,U=None):
|
|
||||||
assert X.shape[1] == 1, 'Number of dimensions must be 1'
|
|
||||||
gpplot(X_new,F_new,np.zeros(X_new.shape[0]))
|
|
||||||
pb.plot(X,self.Y,'kx',mew=1.5)
|
|
||||||
if U is not None:
|
|
||||||
pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12)
|
|
||||||
|
|
||||||
def predictive_mean(self,mu,Sigma):
|
|
||||||
return mu
|
|
||||||
|
|
||||||
def _log_likelihood_gradients():
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
@ -14,14 +14,14 @@ class Matern32(kernpart):
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
|
|
||||||
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||||
|
|
||||||
:param D: the number of input dimensions
|
:param D: the number of input dimensions
|
||||||
:type D: int
|
:type D: int
|
||||||
:param variance: the variance :math:`\sigma^2`
|
:param variance: the variance :math:`\sigma^2`
|
||||||
:type variance: float
|
:type variance: float
|
||||||
:param lengthscale: the vector of lengthscale :math:`\ell_i`
|
:param lengthscale: the vector of lengthscale :math:`\ell_i`
|
||||||
:type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
|
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
||||||
:type ARD: Boolean
|
:type ARD: Boolean
|
||||||
:rtype: kernel object
|
:rtype: kernel object
|
||||||
|
|
@ -35,17 +35,19 @@ class Matern32(kernpart):
|
||||||
self.Nparam = 2
|
self.Nparam = 2
|
||||||
self.name = 'Mat32'
|
self.name = 'Mat32'
|
||||||
if lengthscale is not None:
|
if lengthscale is not None:
|
||||||
assert lengthscale.shape == (1,)
|
lengthscale = np.asarray(lengthscale)
|
||||||
|
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
||||||
else:
|
else:
|
||||||
lengthscale = np.ones(1)
|
lengthscale = np.ones(1)
|
||||||
else:
|
else:
|
||||||
self.Nparam = self.D + 1
|
self.Nparam = self.D + 1
|
||||||
self.name = 'Mat32_ARD'
|
self.name = 'Mat32'
|
||||||
if lengthscale is not None:
|
if lengthscale is not None:
|
||||||
assert lengthscale.shape == (self.D,)
|
lengthscale = np.asarray(lengthscale)
|
||||||
|
assert lengthscale.size == self.D, "bad number of lengthscales"
|
||||||
else:
|
else:
|
||||||
lengthscale = np.ones(self.D)
|
lengthscale = np.ones(self.D)
|
||||||
self._set_params(np.hstack((variance,lengthscale)))
|
self._set_params(np.hstack((variance,lengthscale.flatten())))
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
"""return the value of the parameters."""
|
"""return the value of the parameters."""
|
||||||
|
|
@ -116,9 +118,9 @@ class Matern32(kernpart):
|
||||||
:param F1: vector of derivatives of F
|
:param F1: vector of derivatives of F
|
||||||
:type F1: np.array
|
:type F1: np.array
|
||||||
:param F2: vector of second derivatives of F
|
:param F2: vector of second derivatives of F
|
||||||
:type F2: np.array
|
:type F2: np.array
|
||||||
:param lower,upper: boundaries of the input domain
|
:param lower,upper: boundaries of the input domain
|
||||||
:type lower,upper: floats
|
:type lower,upper: floats
|
||||||
"""
|
"""
|
||||||
assert self.D == 1
|
assert self.D == 1
|
||||||
def L(x,i):
|
def L(x,i):
|
||||||
|
|
@ -133,4 +135,3 @@ class Matern32(kernpart):
|
||||||
#print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
|
#print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
|
||||||
#return(G)
|
#return(G)
|
||||||
return(self.lengthscale**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
|
return(self.lengthscale**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,14 +13,14 @@ class Matern52(kernpart):
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
|
|
||||||
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||||
|
|
||||||
:param D: the number of input dimensions
|
:param D: the number of input dimensions
|
||||||
:type D: int
|
:type D: int
|
||||||
:param variance: the variance :math:`\sigma^2`
|
:param variance: the variance :math:`\sigma^2`
|
||||||
:type variance: float
|
:type variance: float
|
||||||
:param lengthscale: the vector of lengthscale :math:`\ell_i`
|
:param lengthscale: the vector of lengthscale :math:`\ell_i`
|
||||||
:type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
|
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
||||||
:type ARD: Boolean
|
:type ARD: Boolean
|
||||||
:rtype: kernel object
|
:rtype: kernel object
|
||||||
|
|
@ -33,17 +33,19 @@ class Matern52(kernpart):
|
||||||
self.Nparam = 2
|
self.Nparam = 2
|
||||||
self.name = 'Mat52'
|
self.name = 'Mat52'
|
||||||
if lengthscale is not None:
|
if lengthscale is not None:
|
||||||
assert lengthscale.shape == (1,)
|
lengthscale = np.asarray(lengthscale)
|
||||||
|
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
||||||
else:
|
else:
|
||||||
lengthscale = np.ones(1)
|
lengthscale = np.ones(1)
|
||||||
else:
|
else:
|
||||||
self.Nparam = self.D + 1
|
self.Nparam = self.D + 1
|
||||||
self.name = 'Mat52_ARD'
|
self.name = 'Mat52'
|
||||||
if lengthscale is not None:
|
if lengthscale is not None:
|
||||||
assert lengthscale.shape == (self.D,)
|
lengthscale = np.asarray(lengthscale)
|
||||||
|
assert lengthscale.size == self.D, "bad number of lengthscales"
|
||||||
else:
|
else:
|
||||||
lengthscale = np.ones(self.D)
|
lengthscale = np.ones(self.D)
|
||||||
self._set_params(np.hstack((variance,lengthscale)))
|
self._set_params(np.hstack((variance,lengthscale.flatten())))
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
"""return the value of the parameters."""
|
"""return the value of the parameters."""
|
||||||
|
|
|
||||||
|
|
@ -13,14 +13,14 @@ class exponential(kernpart):
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
|
|
||||||
k(r) = \sigma^2 \exp(- r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
k(r) = \sigma^2 \exp(- r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||||
|
|
||||||
:param D: the number of input dimensions
|
:param D: the number of input dimensions
|
||||||
:type D: int
|
:type D: int
|
||||||
:param variance: the variance :math:`\sigma^2`
|
:param variance: the variance :math:`\sigma^2`
|
||||||
:type variance: float
|
:type variance: float
|
||||||
:param lengthscale: the vector of lengthscale :math:`\ell_i`
|
:param lengthscale: the vector of lengthscale :math:`\ell_i`
|
||||||
:type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
|
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
||||||
:type ARD: Boolean
|
:type ARD: Boolean
|
||||||
:rtype: kernel object
|
:rtype: kernel object
|
||||||
|
|
@ -33,17 +33,19 @@ class exponential(kernpart):
|
||||||
self.Nparam = 2
|
self.Nparam = 2
|
||||||
self.name = 'exp'
|
self.name = 'exp'
|
||||||
if lengthscale is not None:
|
if lengthscale is not None:
|
||||||
assert lengthscale.shape == (1,)
|
lengthscale = np.asarray(lengthscale)
|
||||||
|
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
||||||
else:
|
else:
|
||||||
lengthscale = np.ones(1)
|
lengthscale = np.ones(1)
|
||||||
else:
|
else:
|
||||||
self.Nparam = self.D + 1
|
self.Nparam = self.D + 1
|
||||||
self.name = 'exp_ARD'
|
self.name = 'exp'
|
||||||
if lengthscale is not None:
|
if lengthscale is not None:
|
||||||
assert lengthscale.shape == (self.D,)
|
lengthscale = np.asarray(lengthscale)
|
||||||
|
assert lengthscale.size == self.D, "bad number of lengthscales"
|
||||||
else:
|
else:
|
||||||
lengthscale = np.ones(self.D)
|
lengthscale = np.ones(self.D)
|
||||||
self._set_params(np.hstack((variance,lengthscale)))
|
self._set_params(np.hstack((variance,lengthscale.flatten())))
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
"""return the value of the parameters."""
|
"""return the value of the parameters."""
|
||||||
|
|
@ -87,7 +89,7 @@ class exponential(kernpart):
|
||||||
dl = self.variance*dvar*dist2M.sum(-1)*invdist
|
dl = self.variance*dvar*dist2M.sum(-1)*invdist
|
||||||
target[1] += np.sum(dl*partial)
|
target[1] += np.sum(dl*partial)
|
||||||
|
|
||||||
def dKdiag_dtheta(self,partial,X,target):
|
def dKdiag_dtheta(self,partial,X,target):
|
||||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
||||||
#NB: derivative of diagonal elements wrt lengthscale is 0
|
#NB: derivative of diagonal elements wrt lengthscale is 0
|
||||||
target[0] += np.sum(partial)
|
target[0] += np.sum(partial)
|
||||||
|
|
@ -110,9 +112,9 @@ class exponential(kernpart):
|
||||||
:param F: vector of functions
|
:param F: vector of functions
|
||||||
:type F: np.array
|
:type F: np.array
|
||||||
:param F1: vector of derivatives of F
|
:param F1: vector of derivatives of F
|
||||||
:type F1: np.array
|
:type F1: np.array
|
||||||
:param lower,upper: boundaries of the input domain
|
:param lower,upper: boundaries of the input domain
|
||||||
:type lower,upper: floats
|
:type lower,upper: floats
|
||||||
"""
|
"""
|
||||||
assert self.D == 1
|
assert self.D == 1
|
||||||
def L(x,i):
|
def L(x,i):
|
||||||
|
|
@ -124,8 +126,3 @@ class exponential(kernpart):
|
||||||
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
|
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
|
||||||
Flower = np.array([f(lower) for f in F])[:,None]
|
Flower = np.array([f(lower) for f in F])[:,None]
|
||||||
return(self.lengthscale/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T))
|
return(self.lengthscale/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import numpy as np
|
||||||
from ..core.parameterised import parameterised
|
from ..core.parameterised import parameterised
|
||||||
from kernpart import kernpart
|
from kernpart import kernpart
|
||||||
import itertools
|
import itertools
|
||||||
from product_orthogonal import product_orthogonal
|
from product_orthogonal import product_orthogonal
|
||||||
|
|
||||||
class kern(parameterised):
|
class kern(parameterised):
|
||||||
def __init__(self,D,parts=[], input_slices=None):
|
def __init__(self,D,parts=[], input_slices=None):
|
||||||
|
|
@ -155,7 +155,7 @@ class kern(parameterised):
|
||||||
|
|
||||||
D = K1.D + K2.D
|
D = K1.D + K2.D
|
||||||
|
|
||||||
newkernparts = [product_orthogonal(k1,k2).parts[0] for k1, k2 in itertools.product(K1.parts,K2.parts)]
|
newkernparts = [product_orthogonal(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
|
||||||
|
|
||||||
slices = []
|
slices = []
|
||||||
for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
|
for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
|
||||||
|
|
@ -235,6 +235,8 @@ class kern(parameterised):
|
||||||
X2 = X
|
X2 = X
|
||||||
target = np.zeros(self.Nparam)
|
target = np.zeros(self.Nparam)
|
||||||
[p.dK_dtheta(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
|
[p.dK_dtheta(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
|
||||||
|
|
||||||
|
#TODO: transform the gradients here!
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def dK_dX(self,partial,X,X2=None,slices1=None,slices2=None):
|
def dK_dX(self,partial,X,X2=None,slices1=None,slices2=None):
|
||||||
|
|
@ -324,6 +326,7 @@ class kern(parameterised):
|
||||||
[p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
|
[p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# "crossterms". Here we are recomputing psi1 for white (we don't need to), but it's
|
# "crossterms". Here we are recomputing psi1 for white (we don't need to), but it's
|
||||||
# not really expensive, since it's just a matrix of zeroes.
|
# not really expensive, since it's just a matrix of zeroes.
|
||||||
# psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
|
# psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
|
||||||
|
|
|
||||||
|
|
@ -15,34 +15,33 @@ class linear(kernpart):
|
||||||
:param D: the number of input dimensions
|
:param D: the number of input dimensions
|
||||||
:type D: int
|
:type D: int
|
||||||
:param variances: the vector of variances :math:`\sigma^2_i`
|
:param variances: the vector of variances :math:`\sigma^2_i`
|
||||||
:type variances: np.ndarray of size (1,) or (D,) depending on ARD
|
:type variances: array or list of the appropriate size (or float if there is only one variance parameter)
|
||||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single variance parameter \sigma^2), otherwise there is one variance parameter per dimension.
|
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
|
||||||
:type ARD: Boolean
|
:type ARD: Boolean
|
||||||
:rtype: kernel object
|
:rtype: kernel object
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,D,variances=None,ARD=True):
|
def __init__(self,D,variances=None,ARD=False):
|
||||||
self.D = D
|
self.D = D
|
||||||
self.ARD = ARD
|
self.ARD = ARD
|
||||||
if ARD == False:
|
if ARD == False:
|
||||||
self.Nparam = 1
|
self.Nparam = 1
|
||||||
self.name = 'linear'
|
self.name = 'linear'
|
||||||
if variances is not None:
|
if variances is not None:
|
||||||
if isinstance(variances, float):
|
variances = np.asarray(variances)
|
||||||
variances = np.array([variances])
|
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
|
||||||
|
|
||||||
assert variances.shape == (1,)
|
|
||||||
else:
|
else:
|
||||||
variances = np.ones(1)
|
variances = np.ones(1)
|
||||||
self._Xcache, self._X2cache = np.empty(shape=(2,))
|
self._Xcache, self._X2cache = np.empty(shape=(2,))
|
||||||
else:
|
else:
|
||||||
self.Nparam = self.D
|
self.Nparam = self.D
|
||||||
self.name = 'linear_ARD'
|
self.name = 'linear'
|
||||||
if variances is not None:
|
if variances is not None:
|
||||||
assert variances.shape == (self.D,)
|
variances = np.asarray(variances)
|
||||||
|
assert variances.size == self.D, "bad number of lengthscales"
|
||||||
else:
|
else:
|
||||||
variances = np.ones(self.D)
|
variances = np.ones(self.D)
|
||||||
self._set_params(variances)
|
self._set_params(variances.flatten())
|
||||||
|
|
||||||
#initialize cache
|
#initialize cache
|
||||||
self._Z, self._mu, self._S = np.empty(shape=(3,1))
|
self._Z, self._mu, self._S = np.empty(shape=(3,1))
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ class rbf(kernpart):
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
|
|
||||||
k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \qquad \qquad \\text{ where } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}}
|
k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}}
|
||||||
|
|
||||||
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
|
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
|
||||||
|
|
||||||
|
|
@ -21,7 +21,7 @@ class rbf(kernpart):
|
||||||
:param variance: the variance of the kernel
|
:param variance: the variance of the kernel
|
||||||
:type variance: float
|
:type variance: float
|
||||||
:param lengthscale: the vector of lengthscale of the kernel
|
:param lengthscale: the vector of lengthscale of the kernel
|
||||||
:type lengthscale: np.ndarray od size (1,) or (D,) depending on ARD
|
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
||||||
:type ARD: Boolean
|
:type ARD: Boolean
|
||||||
:rtype: kernel object
|
:rtype: kernel object
|
||||||
|
|
@ -75,6 +75,7 @@ class rbf(kernpart):
|
||||||
def K(self,X,X2,target):
|
def K(self,X,X2,target):
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
X2 = X
|
X2 = X
|
||||||
|
|
||||||
self._K_computations(X,X2)
|
self._K_computations(X,X2)
|
||||||
np.add(self.variance*self._K_dvar, target,target)
|
np.add(self.variance*self._K_dvar, target,target)
|
||||||
|
|
||||||
|
|
|
||||||
311
GPy/likelihoods/EP.py
Normal file
|
|
@ -0,0 +1,311 @@
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats, linalg
|
||||||
|
from ..util.linalg import pdinv,mdot,jitchol
|
||||||
|
from likelihood import likelihood
|
||||||
|
|
||||||
|
class EP(likelihood):
|
||||||
|
def __init__(self,data,likelihood_function,epsilon=1e-3,power_ep=[1.,1.]):
|
||||||
|
"""
|
||||||
|
Expectation Propagation
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
||||||
|
likelihood_function : a likelihood function (see likelihood_functions.py)
|
||||||
|
"""
|
||||||
|
self.likelihood_function = likelihood_function
|
||||||
|
self.epsilon = epsilon
|
||||||
|
self.eta, self.delta = power_ep
|
||||||
|
self.data = data
|
||||||
|
self.N = self.data.size
|
||||||
|
self.is_heteroscedastic = True
|
||||||
|
self.Nparams = 0
|
||||||
|
|
||||||
|
#Initial values - Likelihood approximation parameters:
|
||||||
|
#p(y|f) = t(f|tau_tilde,v_tilde)
|
||||||
|
self.tau_tilde = np.zeros(self.N)
|
||||||
|
self.v_tilde = np.zeros(self.N)
|
||||||
|
|
||||||
|
#initial values for the GP variables
|
||||||
|
self.Y = np.zeros((self.N,1))
|
||||||
|
self.covariance_matrix = np.eye(self.N)
|
||||||
|
self.precision = np.ones(self.N)
|
||||||
|
self.Z = 0
|
||||||
|
self.YYT = None
|
||||||
|
|
||||||
|
def predictive_values(self,mu,var):
|
||||||
|
return self.likelihood_function.predictive_values(mu,var)
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.zeros(0)
|
||||||
|
def _get_param_names(self):
|
||||||
|
return []
|
||||||
|
def _set_params(self,p):
|
||||||
|
pass # TODO: the EP likelihood might want to take some parameters...
|
||||||
|
def _gradients(self,partial):
|
||||||
|
return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
|
||||||
|
|
||||||
|
def _compute_GP_variables(self):
|
||||||
|
#Variables to be called from GP
|
||||||
|
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
|
||||||
|
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
|
||||||
|
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
|
||||||
|
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
|
||||||
|
|
||||||
|
self.Y = mu_tilde[:,None]
|
||||||
|
self.YYT = np.dot(self.Y,self.Y.T)
|
||||||
|
self.precision = self.tau_tilde
|
||||||
|
self.covariance_matrix = np.diag(1./self.precision)
|
||||||
|
|
||||||
|
def fit_full(self,K):
|
||||||
|
"""
|
||||||
|
The expectation-propagation algorithm.
|
||||||
|
For nomenclature see Rasmussen & Williams 2006.
|
||||||
|
"""
|
||||||
|
#Prior distribution parameters: p(f|X) = N(f|0,K)
|
||||||
|
|
||||||
|
self.tau_tilde = np.zeros(self.N)
|
||||||
|
self.v_tilde = np.zeros(self.N)
|
||||||
|
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
||||||
|
mu = np.zeros(self.N)
|
||||||
|
Sigma = K.copy()
|
||||||
|
|
||||||
|
"""
|
||||||
|
Initial values - Cavity distribution parameters:
|
||||||
|
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
|
||||||
|
sigma_ = 1./tau_
|
||||||
|
mu_ = v_/tau_
|
||||||
|
"""
|
||||||
|
self.tau_ = np.empty(self.N,dtype=float)
|
||||||
|
self.v_ = np.empty(self.N,dtype=float)
|
||||||
|
|
||||||
|
#Initial values - Marginal moments
|
||||||
|
z = np.empty(self.N,dtype=float)
|
||||||
|
self.Z_hat = np.empty(self.N,dtype=float)
|
||||||
|
phi = np.empty(self.N,dtype=float)
|
||||||
|
mu_hat = np.empty(self.N,dtype=float)
|
||||||
|
sigma2_hat = np.empty(self.N,dtype=float)
|
||||||
|
|
||||||
|
#Approximation
|
||||||
|
epsilon_np1 = self.epsilon + 1.
|
||||||
|
epsilon_np2 = self.epsilon + 1.
|
||||||
|
self.iterations = 0
|
||||||
|
self.np1 = [self.tau_tilde.copy()]
|
||||||
|
self.np2 = [self.v_tilde.copy()]
|
||||||
|
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
||||||
|
update_order = np.random.permutation(self.N)
|
||||||
|
for i in update_order:
|
||||||
|
#Cavity distribution parameters
|
||||||
|
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
|
||||||
|
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
|
||||||
|
#Marginal moments
|
||||||
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
||||||
|
#Site parameters update
|
||||||
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
||||||
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
||||||
|
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
|
||||||
|
self.v_tilde[i] = self.v_tilde[i] + Delta_v
|
||||||
|
#Posterior distribution parameters update
|
||||||
|
si=Sigma[:,i].reshape(self.N,1)
|
||||||
|
Sigma = Sigma - Delta_tau/(1.+ Delta_tau*Sigma[i,i])*np.dot(si,si.T)
|
||||||
|
mu = np.dot(Sigma,self.v_tilde)
|
||||||
|
self.iterations += 1
|
||||||
|
#Sigma recomptutation with Cholesky decompositon
|
||||||
|
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
|
||||||
|
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
|
||||||
|
L = jitchol(B)
|
||||||
|
V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
|
||||||
|
Sigma = K - np.dot(V.T,V)
|
||||||
|
mu = np.dot(Sigma,self.v_tilde)
|
||||||
|
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
|
||||||
|
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
|
||||||
|
self.np1.append(self.tau_tilde.copy())
|
||||||
|
self.np2.append(self.v_tilde.copy())
|
||||||
|
|
||||||
|
return self._compute_GP_variables()
|
||||||
|
|
||||||
|
def fit_DTC(self, Knn_diag, Kmn, Kmm):
|
||||||
|
"""
|
||||||
|
The expectation-propagation algorithm with sparse pseudo-input.
|
||||||
|
For nomenclature see ... 2013.
|
||||||
|
"""
|
||||||
|
|
||||||
|
#TODO: this doesn;t work with uncertain inputs!
|
||||||
|
|
||||||
|
"""
|
||||||
|
Prior approximation parameters:
|
||||||
|
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
||||||
|
Sigma0 = Qnn = Knm*Kmmi*Kmn
|
||||||
|
"""
|
||||||
|
Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
|
||||||
|
KmnKnm = np.dot(Kmn, Kmn.T)
|
||||||
|
KmmiKmn = np.dot(Kmmi,Kmn)
|
||||||
|
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
||||||
|
LLT0 = Kmm.copy()
|
||||||
|
|
||||||
|
"""
|
||||||
|
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
||||||
|
Sigma = Diag + P*R.T*R*P.T + K
|
||||||
|
mu = w + P*gamma
|
||||||
|
"""
|
||||||
|
mu = np.zeros(self.N)
|
||||||
|
LLT = Kmm.copy()
|
||||||
|
Sigma_diag = Qnn_diag.copy()
|
||||||
|
|
||||||
|
"""
|
||||||
|
Initial values - Cavity distribution parameters:
|
||||||
|
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
||||||
|
sigma_ = 1./tau_
|
||||||
|
mu_ = v_/tau_
|
||||||
|
"""
|
||||||
|
tau_ = np.empty(self.N,dtype=float)
|
||||||
|
v_ = np.empty(self.N,dtype=float)
|
||||||
|
|
||||||
|
#Initial values - Marginal moments
|
||||||
|
z = np.empty(self.N,dtype=float)
|
||||||
|
Z_hat = np.empty(self.N,dtype=float)
|
||||||
|
phi = np.empty(self.N,dtype=float)
|
||||||
|
mu_hat = np.empty(self.N,dtype=float)
|
||||||
|
sigma2_hat = np.empty(self.N,dtype=float)
|
||||||
|
|
||||||
|
#Approximation
|
||||||
|
epsilon_np1 = 1
|
||||||
|
epsilon_np2 = 1
|
||||||
|
self.iterations = 0
|
||||||
|
np1 = [tau_tilde.copy()]
|
||||||
|
np2 = [v_tilde.copy()]
|
||||||
|
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
||||||
|
update_order = np.random.permutation(self.N)
|
||||||
|
for i in update_order:
|
||||||
|
#Cavity distribution parameters
|
||||||
|
tau_[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
|
||||||
|
v_[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
|
||||||
|
#Marginal moments
|
||||||
|
Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],tau_[i],v_[i])
|
||||||
|
#Site parameters update
|
||||||
|
Delta_tau = delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||||
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||||
|
tau_tilde[i] = tau_tilde[i] + Delta_tau
|
||||||
|
v_tilde[i] = v_tilde[i] + Delta_v
|
||||||
|
#Posterior distribution parameters update
|
||||||
|
LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
|
||||||
|
L = jitchol(LLT)
|
||||||
|
V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
|
||||||
|
Sigma_diag = np.sum(V*V,-2)
|
||||||
|
si = np.sum(V.T*V[:,i],-1)
|
||||||
|
mu = mu + (Delta_v-Delta_tau*mu[i])*si
|
||||||
|
self.iterations += 1
|
||||||
|
#Sigma recomputation with Cholesky decompositon
|
||||||
|
LLT0 = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
|
||||||
|
L = jitchol(LLT)
|
||||||
|
V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
|
||||||
|
V2,info = linalg.flapack.dtrtrs(L.T,V,lower=0)
|
||||||
|
Sigma_diag = np.sum(V*V,-2)
|
||||||
|
Knmv_tilde = np.dot(Kmn,v_tilde)
|
||||||
|
mu = np.dot(V2.T,Knmv_tilde)
|
||||||
|
epsilon_np1 = sum((tau_tilde-np1[-1])**2)/self.N
|
||||||
|
epsilon_np2 = sum((v_tilde-np2[-1])**2)/self.N
|
||||||
|
np1.append(tau_tilde.copy())
|
||||||
|
np2.append(v_tilde.copy())
|
||||||
|
|
||||||
|
self._compute_GP_variables()
|
||||||
|
|
||||||
|
def fit_FITC(self, Knn_diag, Kmn):
|
||||||
|
"""
|
||||||
|
The expectation-propagation algorithm with sparse pseudo-input.
|
||||||
|
For nomenclature see Naish-Guzman and Holden, 2008.
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
Prior approximation parameters:
|
||||||
|
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
||||||
|
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
|
||||||
|
"""
|
||||||
|
Kmmi, self.Lm, self.Lmi, Kmm_logdet = pdinv(Kmm)
|
||||||
|
P0 = Kmn.T
|
||||||
|
KmnKnm = np.dot(P0.T, P0)
|
||||||
|
KmmiKmn = np.dot(Kmmi,P0.T)
|
||||||
|
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
|
||||||
|
Diag0 = Knn_diag - Qnn_diag
|
||||||
|
R0 = jitchol(Kmmi).T
|
||||||
|
|
||||||
|
"""
|
||||||
|
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
||||||
|
Sigma = Diag + P*R.T*R*P.T + K
|
||||||
|
mu = w + P*gamma
|
||||||
|
"""
|
||||||
|
self.w = np.zeros(self.N)
|
||||||
|
self.gamma = np.zeros(self.M)
|
||||||
|
mu = np.zeros(self.N)
|
||||||
|
P = P0.copy()
|
||||||
|
R = R0.copy()
|
||||||
|
Diag = Diag0.copy()
|
||||||
|
Sigma_diag = Knn_diag
|
||||||
|
|
||||||
|
"""
|
||||||
|
Initial values - Cavity distribution parameters:
|
||||||
|
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
||||||
|
sigma_ = 1./tau_
|
||||||
|
mu_ = v_/tau_
|
||||||
|
"""
|
||||||
|
self.tau_ = np.empty(self.N,dtype=float)
|
||||||
|
self.v_ = np.empty(self.N,dtype=float)
|
||||||
|
|
||||||
|
#Initial values - Marginal moments
|
||||||
|
z = np.empty(self.N,dtype=float)
|
||||||
|
self.Z_hat = np.empty(self.N,dtype=float)
|
||||||
|
phi = np.empty(self.N,dtype=float)
|
||||||
|
mu_hat = np.empty(self.N,dtype=float)
|
||||||
|
sigma2_hat = np.empty(self.N,dtype=float)
|
||||||
|
|
||||||
|
#Approximation
|
||||||
|
epsilon_np1 = 1
|
||||||
|
epsilon_np2 = 1
|
||||||
|
self.iterations = 0
|
||||||
|
self.np1 = [self.tau_tilde.copy()]
|
||||||
|
self.np2 = [self.v_tilde.copy()]
|
||||||
|
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
||||||
|
update_order = np.random.permutation(self.N)
|
||||||
|
for i in update_order:
|
||||||
|
#Cavity distribution parameters
|
||||||
|
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
||||||
|
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
||||||
|
#Marginal moments
|
||||||
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i])
|
||||||
|
#Site parameters update
|
||||||
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||||
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||||
|
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
|
||||||
|
self.v_tilde[i] = self.v_tilde[i] + Delta_v
|
||||||
|
#Posterior distribution parameters update
|
||||||
|
dtd1 = Delta_tau*Diag[i] + 1.
|
||||||
|
dii = Diag[i]
|
||||||
|
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
|
||||||
|
pi_ = P[i,:].reshape(1,self.M)
|
||||||
|
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
|
||||||
|
Rp_i = np.dot(R,pi_.T)
|
||||||
|
RTR = np.dot(R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
|
||||||
|
R = jitchol(RTR).T
|
||||||
|
self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
|
||||||
|
self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
|
||||||
|
RPT = np.dot(R,P.T)
|
||||||
|
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
||||||
|
mu = self.w + np.dot(P,self.gamma)
|
||||||
|
self.iterations += 1
|
||||||
|
#Sigma recomptutation with Cholesky decompositon
|
||||||
|
Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
|
||||||
|
P = (Diag / Diag0)[:,None] * P0
|
||||||
|
RPT0 = np.dot(R0,P0.T)
|
||||||
|
L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
|
||||||
|
R,info = linalg.flapack.dtrtrs(L,R0,lower=1)
|
||||||
|
RPT = np.dot(R,P.T)
|
||||||
|
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
||||||
|
self.w = Diag * self.v_tilde
|
||||||
|
self.gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
|
||||||
|
mu = self.w + np.dot(P,self.gamma)
|
||||||
|
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
|
||||||
|
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
|
||||||
|
self.np1.append(self.tau_tilde.copy())
|
||||||
|
self.np2.append(self.v_tilde.copy())
|
||||||
|
|
||||||
|
return self._compute_GP_variables()
|
||||||
56
GPy/likelihoods/Gaussian.py
Normal file
|
|
@ -0,0 +1,56 @@
|
||||||
|
import numpy as np
|
||||||
|
from likelihood import likelihood
|
||||||
|
|
||||||
|
class Gaussian(likelihood):
|
||||||
|
def __init__(self,data,variance=1.,normalize=False):
|
||||||
|
self.is_heteroscedastic = False
|
||||||
|
self.Nparams = 1
|
||||||
|
self.data = data
|
||||||
|
self.N,D = data.shape
|
||||||
|
self.Z = 0. # a correction factor which accounts for the approximation made
|
||||||
|
|
||||||
|
#normalisation
|
||||||
|
if normalize:
|
||||||
|
self._mean = data.mean(0)[None,:]
|
||||||
|
self._std = data.std(0)[None,:]
|
||||||
|
self.Y = (self.data - self._mean)/self._std
|
||||||
|
else:
|
||||||
|
self._mean = np.zeros((1,D))
|
||||||
|
self._std = np.ones((1,D))
|
||||||
|
self.Y = self.data
|
||||||
|
|
||||||
|
#TODO: make this work efficiently (only compute YYT if D>>N)
|
||||||
|
self.YYT = np.dot(self.Y,self.Y.T)
|
||||||
|
self.trYYT = np.trace(self.YYT)
|
||||||
|
self._set_params(np.asarray(variance))
|
||||||
|
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.asarray(self._variance)
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return ["noise variance"]
|
||||||
|
|
||||||
|
def _set_params(self,x):
|
||||||
|
self._variance = float(x)
|
||||||
|
self.covariance_matrix = np.eye(self.N)*self._variance
|
||||||
|
self.precision = 1./self._variance
|
||||||
|
|
||||||
|
def predictive_values(self,mu,var):
|
||||||
|
"""
|
||||||
|
Un-normalise the prediction and add the likelihood variance, then return the 5%, 95% interval
|
||||||
|
"""
|
||||||
|
mean = mu*self._std + self._mean
|
||||||
|
true_var = (var + self._variance)*self._std**2
|
||||||
|
_5pc = mean + - 2.*np.sqrt(true_var)
|
||||||
|
_95pc = mean + 2.*np.sqrt(true_var)
|
||||||
|
return mean, _5pc, _95pc
|
||||||
|
|
||||||
|
def fit_full(self):
|
||||||
|
"""
|
||||||
|
No approximations needed
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _gradients(self,partial):
|
||||||
|
return np.sum(partial)
|
||||||
4
GPy/likelihoods/__init__.py
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
from EP import EP
|
||||||
|
from Gaussian import Gaussian
|
||||||
|
# TODO: from Laplace import Laplace
|
||||||
|
import likelihood_functions as functions
|
||||||
35
GPy/likelihoods/likelihood.py
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class likelihood:
|
||||||
|
"""
|
||||||
|
The atom for a likelihood class
|
||||||
|
|
||||||
|
This object interfaces the GP and the data. The most basic likelihood
|
||||||
|
(Gaussian) inherits directly from this, as does the EP algorithm
|
||||||
|
|
||||||
|
Some things must be defined for this to work properly:
|
||||||
|
self.Y : the effective Gaussian target of the GP
|
||||||
|
self.N, self.D : Y.shape
|
||||||
|
self.covariance_matrix : the effective (noise) covariance of the GP targets
|
||||||
|
self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
|
||||||
|
self.is_heteroscedastic : enables significant computational savings in GP
|
||||||
|
self.precision : a scalar or vector representation of the effective target precision
|
||||||
|
self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
|
||||||
|
"""
|
||||||
|
def __init__(self,data):
|
||||||
|
raise ValueError, "this class is not to be instantiated"
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def _set_params(self,x):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def fit(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def _gradients(self,partial):
|
||||||
|
raise NotImplementedError
|
||||||
134
GPy/likelihoods/likelihood_functions.py
Normal file
|
|
@ -0,0 +1,134 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats
|
||||||
|
import scipy as sp
|
||||||
|
import pylab as pb
|
||||||
|
from ..util.plot import gpplot
|
||||||
|
|
||||||
|
class likelihood_function:
|
||||||
|
"""
|
||||||
|
Likelihood class for doing Expectation propagation
|
||||||
|
|
||||||
|
:param Y: observed output (Nx1 numpy.darray)
|
||||||
|
..Note:: Y values allowed depend on the likelihood_function used
|
||||||
|
"""
|
||||||
|
def __init__(self,location=0,scale=1):
|
||||||
|
self.location = location
|
||||||
|
self.scale = scale
|
||||||
|
|
||||||
|
class probit(likelihood_function):
|
||||||
|
"""
|
||||||
|
Probit likelihood
|
||||||
|
Y is expected to take values in {-1,1}
|
||||||
|
-----
|
||||||
|
$$
|
||||||
|
L(x) = \\Phi (Y_i*f_i)
|
||||||
|
$$
|
||||||
|
"""
|
||||||
|
|
||||||
|
def moments_match(self,data_i,tau_i,v_i):
|
||||||
|
"""
|
||||||
|
Moments match of the marginal approximation in EP algorithm
|
||||||
|
|
||||||
|
:param i: number of observation (int)
|
||||||
|
:param tau_i: precision of the cavity distribution (float)
|
||||||
|
:param v_i: mean/variance of the cavity distribution (float)
|
||||||
|
"""
|
||||||
|
# TODO: some version of assert np.sum(np.abs(Y)-1) == 0, "Output values must be either -1 or 1"
|
||||||
|
if data_i == 0: data_i = -1 #NOTE Binary classification works better classes {-1,1}, 1D-plotting works better with classes {0,1}.
|
||||||
|
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
|
||||||
|
Z_hat = stats.norm.cdf(z)
|
||||||
|
phi = stats.norm.pdf(z)
|
||||||
|
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
|
||||||
|
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
|
||||||
|
return Z_hat, mu_hat, sigma2_hat
|
||||||
|
|
||||||
|
def predictive_values(self,mu,var):
|
||||||
|
"""
|
||||||
|
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
|
||||||
|
"""
|
||||||
|
mu = mu.flatten()
|
||||||
|
var = var.flatten()
|
||||||
|
mean = stats.norm.cdf(mu/np.sqrt(1+var))
|
||||||
|
p_025 = np.zeros(mu.shape)
|
||||||
|
p_975 = np.ones(mu.shape)
|
||||||
|
return mean, p_025, p_975
|
||||||
|
|
||||||
|
class Poisson(likelihood_function):
|
||||||
|
"""
|
||||||
|
Poisson likelihood
|
||||||
|
Y is expected to take values in {0,1,2,...}
|
||||||
|
-----
|
||||||
|
$$
|
||||||
|
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
||||||
|
$$
|
||||||
|
"""
|
||||||
|
def moments_match(self,data_i,tau_i,v_i):
|
||||||
|
"""
|
||||||
|
Moments match of the marginal approximation in EP algorithm
|
||||||
|
|
||||||
|
:param i: number of observation (int)
|
||||||
|
:param tau_i: precision of the cavity distribution (float)
|
||||||
|
:param v_i: mean/variance of the cavity distribution (float)
|
||||||
|
"""
|
||||||
|
mu = v_i/tau_i
|
||||||
|
sigma = np.sqrt(1./tau_i)
|
||||||
|
def poisson_norm(f):
|
||||||
|
"""
|
||||||
|
Product of the likelihood and the cavity distribution
|
||||||
|
"""
|
||||||
|
pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
|
||||||
|
rate = np.exp( (f*self.scale)+self.location)
|
||||||
|
poisson = stats.poisson.pmf(float(data_i),rate)
|
||||||
|
return pdf_norm_f*poisson
|
||||||
|
|
||||||
|
def log_pnm(f):
|
||||||
|
"""
|
||||||
|
Log of poisson_norm
|
||||||
|
"""
|
||||||
|
return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*data_i)
|
||||||
|
|
||||||
|
"""
|
||||||
|
Golden Search and Simpson's Rule
|
||||||
|
--------------------------------
|
||||||
|
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
|
||||||
|
Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
|
||||||
|
"""
|
||||||
|
#TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
|
||||||
|
|
||||||
|
#Golden search
|
||||||
|
golden_A = -1 if data_i == 0 else np.array([np.log(data_i),mu]).min() #Lower limit
|
||||||
|
golden_B = np.array([np.log(data_i),mu]).max() #Upper limit
|
||||||
|
golden_A = (golden_A - self.location)/self.scale
|
||||||
|
golden_B = (golden_B - self.location)/self.scale
|
||||||
|
opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
|
||||||
|
|
||||||
|
# Simpson's approximation
|
||||||
|
width = 3./np.log(max(data_i,2))
|
||||||
|
A = opt - width #Lower limit
|
||||||
|
B = opt + width #Upper limit
|
||||||
|
K = 10*int(np.log(max(data_i,150))) #Number of points in the grid, we DON'T want K to be the same number for every case
|
||||||
|
h = (B-A)/K # length of the intervals
|
||||||
|
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
|
||||||
|
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
|
||||||
|
zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
|
||||||
|
first = zeroth*x
|
||||||
|
second = first*x
|
||||||
|
Z_hat = sum(zeroth)*h/3 # Zero-th moment
|
||||||
|
mu_hat = sum(first)*h/(3*Z_hat) # First moment
|
||||||
|
m2 = sum(second)*h/(3*Z_hat) # Second moment
|
||||||
|
sigma2_hat = m2 - mu_hat**2 # Second central moment
|
||||||
|
return float(Z_hat), float(mu_hat), float(sigma2_hat)
|
||||||
|
|
||||||
|
def predictive_values(self,mu,var):
|
||||||
|
"""
|
||||||
|
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
|
||||||
|
"""
|
||||||
|
mean = np.exp(mu*self.scale + self.location)
|
||||||
|
tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
|
||||||
|
p_025 = tmp[:,0]
|
||||||
|
p_975 = tmp[:,1]
|
||||||
|
return mean,p_025,p_975
|
||||||
|
|
@ -5,10 +5,12 @@ import numpy as np
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
import sys, pdb
|
import sys, pdb
|
||||||
from GPLVM import GPLVM
|
from GPLVM import GPLVM
|
||||||
from sparse_GP_regression import sparse_GP_regression
|
from sparse_GP import sparse_GP
|
||||||
from GPy.util.linalg import pdinv
|
from GPy.util.linalg import pdinv
|
||||||
|
from ..likelihoods import Gaussian
|
||||||
|
from .. import kern
|
||||||
|
|
||||||
class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
|
class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
"""
|
"""
|
||||||
Bayesian Gaussian Process Latent Variable Model
|
Bayesian Gaussian Process Latent Variable Model
|
||||||
|
|
||||||
|
|
@ -20,18 +22,24 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
|
||||||
:type init: 'PCA'|'random'
|
:type init: 'PCA'|'random'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, Y, Q, X = None, S = None, init='PCA', **kwargs):
|
def __init__(self, Y, Q, init='PCA', M=10, Z=None, kernel=None, **kwargs):
|
||||||
if X == None:
|
X = self.initialise_latent(init, Q, Y)
|
||||||
X = self.initialise_latent(init, Q, Y)
|
|
||||||
if S == None:
|
|
||||||
S = np.ones_like(X) * 1e-2
|
|
||||||
|
|
||||||
sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs)
|
if Z is None:
|
||||||
|
Z = np.random.permutation(X.copy())[:M]
|
||||||
|
else:
|
||||||
|
assert Z.shape[1]==X.shape[1]
|
||||||
|
|
||||||
|
if kernel is None:
|
||||||
|
kernel = kern.rbf(Q) + kern.white(Q)
|
||||||
|
|
||||||
|
S = np.ones_like(X) * 1e-2#
|
||||||
|
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_uncertainty=S, **kwargs)
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
|
X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
|
||||||
S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
|
S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
|
||||||
return (X_names + S_names + sparse_GP_regression._get_param_names(self))
|
return (X_names + S_names + sparse_GP._get_param_names(self))
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -39,17 +47,17 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
|
||||||
The resulting 1-D array has this structure:
|
The resulting 1-D array has this structure:
|
||||||
|
|
||||||
===============================================================
|
===============================================================
|
||||||
| mu | S | Z | beta | theta |
|
| mu | S | Z | theta | beta |
|
||||||
===============================================================
|
===============================================================
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression._get_params(self)))
|
return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP._get_params(self)))
|
||||||
|
|
||||||
def _set_params(self,x):
|
def _set_params(self,x):
|
||||||
N, Q = self.N, self.Q
|
N, Q = self.N, self.Q
|
||||||
self.X = x[:self.X.size].reshape(N,Q).copy()
|
self.X = x[:self.X.size].reshape(N,Q).copy()
|
||||||
self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy()
|
self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy()
|
||||||
sparse_GP_regression._set_params(self, x[(2*N*Q):])
|
sparse_GP._set_params(self, x[(2*N*Q):])
|
||||||
|
|
||||||
def dL_dmuS(self):
|
def dL_dmuS(self):
|
||||||
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty)
|
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty)
|
||||||
|
|
@ -58,17 +66,8 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
|
||||||
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
|
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
|
||||||
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
|
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
|
||||||
|
|
||||||
dKL_dS = (1. - (1./self.X_uncertainty))*0.5
|
return np.hstack((dL_dmu.flatten(), dL_dS.flatten()))
|
||||||
dKL_dmu = self.X
|
|
||||||
return np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
|
|
||||||
|
|
||||||
def KL_divergence(self):
|
|
||||||
var_mean = np.square(self.X).sum()
|
|
||||||
var_S = np.sum(self.X_uncertainty - np.log(self.X_uncertainty))
|
|
||||||
return 0.5*(var_mean + var_S) - 0.5*self.Q*self.N
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
|
||||||
return sparse_GP_regression.log_likelihood(self) - self.KL_divergence()
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression._log_likelihood_gradients(self)))
|
return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self)))
|
||||||
|
|
||||||
|
|
|
||||||
274
GPy/models/GP.py
Normal file
|
|
@ -0,0 +1,274 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pylab as pb
|
||||||
|
from .. import kern
|
||||||
|
from ..core import model
|
||||||
|
from ..util.linalg import pdinv,mdot
|
||||||
|
from ..util.plot import gpplot,x_frame1D,x_frame2D, Tango
|
||||||
|
from ..likelihoods import EP
|
||||||
|
|
||||||
|
class GP(model):
|
||||||
|
"""
|
||||||
|
Gaussian Process model for regression and EP
|
||||||
|
|
||||||
|
:param X: input observations
|
||||||
|
:param kernel: a GPy kernel, defaults to rbf+white
|
||||||
|
:parm likelihood: a GPy likelihood
|
||||||
|
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
|
:type normalize_X: False|True
|
||||||
|
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
|
:type normalize_Y: False|True
|
||||||
|
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
|
||||||
|
:rtype: model object
|
||||||
|
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
|
||||||
|
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
|
||||||
|
:type powerep: list
|
||||||
|
|
||||||
|
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||||
|
|
||||||
|
"""
|
||||||
|
#FIXME normalize vs normalise
|
||||||
|
def __init__(self, X, likelihood, kernel, normalize_X=False, Xslices=None):
|
||||||
|
|
||||||
|
# parse arguments
|
||||||
|
self.Xslices = Xslices
|
||||||
|
self.X = X
|
||||||
|
assert len(self.X.shape)==2
|
||||||
|
self.N, self.Q = self.X.shape
|
||||||
|
assert isinstance(kernel, kern.kern)
|
||||||
|
self.kern = kernel
|
||||||
|
|
||||||
|
#here's some simple normalisation for the inputs
|
||||||
|
if normalize_X:
|
||||||
|
self._Xmean = X.mean(0)[None,:]
|
||||||
|
self._Xstd = X.std(0)[None,:]
|
||||||
|
self.X = (X.copy() - self._Xmean) / self._Xstd
|
||||||
|
if hasattr(self,'Z'):
|
||||||
|
self.Z = (self.Z - self._Xmean) / self._Xstd
|
||||||
|
else:
|
||||||
|
self._Xmean = np.zeros((1,self.X.shape[1]))
|
||||||
|
self._Xstd = np.ones((1,self.X.shape[1]))
|
||||||
|
|
||||||
|
self.likelihood = likelihood
|
||||||
|
#assert self.X.shape[0] == self.likelihood.Y.shape[0]
|
||||||
|
#self.N, self.D = self.likelihood.Y.shape
|
||||||
|
assert self.X.shape[0] == self.likelihood.data.shape[0]
|
||||||
|
self.N, self.D = self.likelihood.data.shape
|
||||||
|
|
||||||
|
model.__init__(self)
|
||||||
|
|
||||||
|
def _set_params(self,p):
|
||||||
|
self.kern._set_params_transformed(p[:self.kern.Nparam])
|
||||||
|
#self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas
|
||||||
|
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas
|
||||||
|
|
||||||
|
|
||||||
|
self.K = self.kern.K(self.X,slices1=self.Xslices)
|
||||||
|
self.K += self.likelihood.covariance_matrix
|
||||||
|
|
||||||
|
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
|
||||||
|
|
||||||
|
#the gradient of the likelihood wrt the covariance matrix
|
||||||
|
if self.likelihood.YYT is None:
|
||||||
|
alpha = np.dot(self.Ki,self.likelihood.Y)
|
||||||
|
self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
|
||||||
|
else:
|
||||||
|
tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
|
||||||
|
self.dL_dK = 0.5*(tmp - self.D*self.Ki)
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
|
||||||
|
|
||||||
|
def update_likelihood_approximation(self):
|
||||||
|
"""
|
||||||
|
Approximates a non-gaussian likelihood using Expectation Propagation
|
||||||
|
|
||||||
|
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
|
||||||
|
this function does nothing
|
||||||
|
"""
|
||||||
|
self.likelihood.fit_full(self.kern.K(self.X))
|
||||||
|
self._set_params(self._get_params()) # update the GP
|
||||||
|
|
||||||
|
def _model_fit_term(self):
|
||||||
|
"""
|
||||||
|
Computes the model fit using YYT if it's available
|
||||||
|
"""
|
||||||
|
if self.likelihood.YYT is None:
|
||||||
|
return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
|
||||||
|
else:
|
||||||
|
return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT))
|
||||||
|
|
||||||
|
def log_likelihood(self):
|
||||||
|
"""
|
||||||
|
The log marginal likelihood of the GP.
|
||||||
|
|
||||||
|
For an EP model, can be written as the log likelihood of a regression
|
||||||
|
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
|
||||||
|
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
|
||||||
|
"""
|
||||||
|
return -0.5*self.D*self.K_logdet + self._model_fit_term() + self.likelihood.Z
|
||||||
|
|
||||||
|
|
||||||
|
def _log_likelihood_gradients(self):
|
||||||
|
"""
|
||||||
|
The gradient of all parameters.
|
||||||
|
|
||||||
|
For the kernel parameters, use the chain rule via dL_dK
|
||||||
|
|
||||||
|
For the likelihood parameters, pass in alpha = K^-1 y
|
||||||
|
"""
|
||||||
|
return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
||||||
|
|
||||||
|
def _raw_predict(self,_Xnew,slices=None, full_cov=False):
|
||||||
|
"""
|
||||||
|
Internal helper function for making predictions, does not account
|
||||||
|
for normalisation or likelihood
|
||||||
|
"""
|
||||||
|
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
|
||||||
|
mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
|
||||||
|
KiKx = np.dot(self.Ki,Kx)
|
||||||
|
if full_cov:
|
||||||
|
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
|
||||||
|
var = Kxx - np.dot(KiKx.T,Kx) #NOTE this won't work for plotting
|
||||||
|
else:
|
||||||
|
Kxx = self.kern.Kdiag(_Xnew, slices=slices)
|
||||||
|
var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
|
||||||
|
var = var[:,None]
|
||||||
|
return mu, var
|
||||||
|
|
||||||
|
|
||||||
|
def predict(self,Xnew, slices=None, full_cov=False):
|
||||||
|
"""
|
||||||
|
Predict the function(s) at the new point(s) Xnew.
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
:param Xnew: The points at which to make a prediction
|
||||||
|
:type Xnew: np.ndarray, Nnew x self.Q
|
||||||
|
:param slices: specifies which outputs kernel(s) the Xnew correspond to (see below)
|
||||||
|
:type slices: (None, list of slice objects, list of ints)
|
||||||
|
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
|
||||||
|
:type full_cov: bool
|
||||||
|
:rtype: posterior mean, a Numpy array, Nnew x self.D
|
||||||
|
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
||||||
|
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D
|
||||||
|
|
||||||
|
.. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
|
||||||
|
|
||||||
|
- If None, the new points covary throigh every kernel part (default)
|
||||||
|
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
|
||||||
|
- If a list of booleans, specifying which kernel parts are active
|
||||||
|
|
||||||
|
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
|
||||||
|
This is to allow for different normalisations of the output dimensions.
|
||||||
|
|
||||||
|
"""
|
||||||
|
#normalise X values
|
||||||
|
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
|
||||||
|
mu, var = self._raw_predict(Xnew, slices, full_cov)
|
||||||
|
|
||||||
|
#now push through likelihood TODO
|
||||||
|
mean, _025pm, _975pm = self.likelihood.predictive_values(mu, var)
|
||||||
|
|
||||||
|
return mean, var, _025pm, _975pm
|
||||||
|
|
||||||
|
|
||||||
|
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, full_cov=False):
|
||||||
|
"""
|
||||||
|
Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
|
||||||
|
|
||||||
|
:param samples: the number of a posteriori samples to plot
|
||||||
|
:param which_data: which if the training data to plot (default all)
|
||||||
|
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
||||||
|
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
||||||
|
:param which_functions: which of the kernel functions to plot (additively)
|
||||||
|
:type which_functions: list of bools
|
||||||
|
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||||
|
|
||||||
|
Plot the posterior of the GP.
|
||||||
|
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||||
|
- In two dimsensions, a contour-plot shows the mean predicted function
|
||||||
|
- In higher dimensions, we've no implemented this yet !TODO!
|
||||||
|
|
||||||
|
Can plot only part of the data and part of the posterior functions using which_data and which_functions
|
||||||
|
Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
|
||||||
|
"""
|
||||||
|
if which_functions=='all':
|
||||||
|
which_functions = [True]*self.kern.Nparts
|
||||||
|
if which_data=='all':
|
||||||
|
which_data = slice(None)
|
||||||
|
|
||||||
|
if self.X.shape[1] == 1:
|
||||||
|
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
|
||||||
|
if samples == 0:
|
||||||
|
m,v = self._raw_predict(Xnew, slices=which_functions)
|
||||||
|
gpplot(Xnew,m,m-2*np.sqrt(v),m+2*np.sqrt(v))
|
||||||
|
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
|
||||||
|
else:
|
||||||
|
m,v = self._raw_predict(Xnew, slices=which_functions,full_cov=True)
|
||||||
|
Ysim = np.random.multivariate_normal(m.flatten(),v,samples)
|
||||||
|
gpplot(Xnew,m,m-2*np.sqrt(np.diag(v)[:,None]),m+2*np.sqrt(np.diag(v))[:,None])
|
||||||
|
for i in range(samples):
|
||||||
|
pb.plot(Xnew,Ysim[i,:],Tango.coloursHex['darkBlue'],linewidth=0.25)
|
||||||
|
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
|
||||||
|
pb.xlim(xmin,xmax)
|
||||||
|
ymin,ymax = min(np.append(self.likelihood.Y,m-2*np.sqrt(np.diag(v)[:,None]))), max(np.append(self.likelihood.Y,m+2*np.sqrt(np.diag(v)[:,None])))
|
||||||
|
ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
|
||||||
|
pb.ylim(ymin,ymax)
|
||||||
|
if hasattr(self,'Z'):
|
||||||
|
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
|
||||||
|
|
||||||
|
elif self.X.shape[1] == 2:
|
||||||
|
resolution = resolution or 50
|
||||||
|
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits,resolution)
|
||||||
|
m,v = self._raw_predict(Xnew, slices=which_functions)
|
||||||
|
m = m.reshape(resolution,resolution).T
|
||||||
|
pb.contour(xx,yy,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
|
||||||
|
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=m.min(), vmax=m.max())
|
||||||
|
pb.xlim(xmin[0],xmax[0])
|
||||||
|
pb.ylim(xmin[1],xmax[1])
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||||
|
|
||||||
|
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
|
||||||
|
# TODO include samples
|
||||||
|
if which_functions=='all':
|
||||||
|
which_functions = [True]*self.kern.Nparts
|
||||||
|
if which_data=='all':
|
||||||
|
which_data = slice(None)
|
||||||
|
|
||||||
|
if self.X.shape[1] == 1:
|
||||||
|
|
||||||
|
Xu = self.X * self._Xstd + self._Xmean #NOTE self.X are the normalized values now
|
||||||
|
|
||||||
|
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
|
||||||
|
m, var, lower, upper = self.predict(Xnew, slices=which_functions)
|
||||||
|
gpplot(Xnew,m, lower, upper)
|
||||||
|
pb.plot(Xu[which_data],self.likelihood.data[which_data],'kx',mew=1.5)
|
||||||
|
ymin,ymax = min(np.append(self.likelihood.data,lower)), max(np.append(self.likelihood.data,upper))
|
||||||
|
ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
|
||||||
|
pb.xlim(xmin,xmax)
|
||||||
|
pb.ylim(ymin,ymax)
|
||||||
|
if hasattr(self,'Z'):
|
||||||
|
Zu = self.Z*self._Xstd + self._Xmean
|
||||||
|
pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
|
||||||
|
|
||||||
|
elif self.X.shape[1]==2: #FIXME
|
||||||
|
resolution = resolution or 50
|
||||||
|
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits,resolution)
|
||||||
|
x, y = np.linspace(xmin[0],xmax[0],resolution), np.linspace(xmin[1],xmax[1],resolution)
|
||||||
|
m, var, lower, upper = self.predict(Xnew, slices=which_functions)
|
||||||
|
m = m.reshape(resolution,resolution).T
|
||||||
|
pb.contour(x,y,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
|
||||||
|
Yf = self.likelihood.Y.flatten()
|
||||||
|
pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
|
||||||
|
pb.xlim(xmin[0],xmax[0])
|
||||||
|
pb.ylim(xmin[1],xmax[1])
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||||
|
|
@ -8,9 +8,10 @@ import sys, pdb
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..core import model
|
from ..core import model
|
||||||
from ..util.linalg import pdinv, PCA
|
from ..util.linalg import pdinv, PCA
|
||||||
from GP_regression import GP_regression
|
from GP import GP
|
||||||
|
from ..likelihoods import Gaussian
|
||||||
|
|
||||||
class GPLVM(GP_regression):
|
class GPLVM(GP):
|
||||||
"""
|
"""
|
||||||
Gaussian Process Latent Variable Model
|
Gaussian Process Latent Variable Model
|
||||||
|
|
||||||
|
|
@ -22,10 +23,13 @@ class GPLVM(GP_regression):
|
||||||
:type init: 'PCA'|'random'
|
:type init: 'PCA'|'random'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, Y, Q, init='PCA', X = None, **kwargs):
|
def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs):
|
||||||
if X is None:
|
if X is None:
|
||||||
X = self.initialise_latent(init, Q, Y)
|
X = self.initialise_latent(init, Q, Y)
|
||||||
GP_regression.__init__(self, X, Y, **kwargs)
|
if kernel is None:
|
||||||
|
kernel = kern.rbf(Q) + kern.bias(Q)
|
||||||
|
likelihood = Gaussian(Y)
|
||||||
|
GP.__init__(self, X, likelihood, kernel, **kwargs)
|
||||||
|
|
||||||
def initialise_latent(self, init, Q, Y):
|
def initialise_latent(self, init, Q, Y):
|
||||||
if init == 'PCA':
|
if init == 'PCA':
|
||||||
|
|
@ -34,23 +38,19 @@ class GPLVM(GP_regression):
|
||||||
return np.random.randn(Y.shape[0], Q)
|
return np.random.randn(Y.shape[0], Q)
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
return (sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
|
return sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) + GP._get_param_names(self)
|
||||||
+ self.kern._get_param_names_transformed())
|
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.hstack((self.X.flatten(), self.kern._get_params_transformed()))
|
return np.hstack((self.X.flatten(), GP._get_params(self)))
|
||||||
|
|
||||||
def _set_params(self,x):
|
def _set_params(self,x):
|
||||||
self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
|
self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
|
||||||
GP_regression._set_params(self, x[self.X.size:])
|
GP._set_params(self, x[self.X.size:])
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
dL_dK = self.dL_dK()
|
dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X)
|
||||||
|
|
||||||
dL_dtheta = self.kern.dK_dtheta(dL_dK,self.X)
|
return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self)))
|
||||||
dL_dX = 2*self.kern.dK_dX(dL_dK,self.X)
|
|
||||||
|
|
||||||
return np.hstack((dL_dX.flatten(),dL_dtheta))
|
|
||||||
|
|
||||||
def plot(self):
|
def plot(self):
|
||||||
assert self.Y.shape[1]==2
|
assert self.Y.shape[1]==2
|
||||||
|
|
|
||||||
|
|
@ -1,160 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import pylab as pb
|
|
||||||
from scipy import stats, linalg
|
|
||||||
from .. import kern
|
|
||||||
from ..inference.Expectation_Propagation import Full
|
|
||||||
from ..inference.likelihoods import likelihood,probit#,poisson,gaussian
|
|
||||||
from ..core import model
|
|
||||||
from ..util.linalg import pdinv,jitchol
|
|
||||||
from ..util.plot import gpplot
|
|
||||||
|
|
||||||
class GP_EP(model):
|
|
||||||
def __init__(self,X,likelihood,kernel=None,epsilon_ep=1e-3,epsion_em=.1,powerep=[1.,1.]):
|
|
||||||
"""
|
|
||||||
Simple Gaussian Process with Non-Gaussian likelihood
|
|
||||||
|
|
||||||
Arguments
|
|
||||||
---------
|
|
||||||
:param X: input observations (NxD numpy.darray)
|
|
||||||
:param likelihood: a GPy likelihood (likelihood class)
|
|
||||||
:param kernel: a GPy kernel (kern class)
|
|
||||||
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1 (float)
|
|
||||||
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.] (list)
|
|
||||||
:rtype: GPy model class.
|
|
||||||
"""
|
|
||||||
if kernel is None:
|
|
||||||
kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1])
|
|
||||||
|
|
||||||
assert isinstance(kernel,kern.kern), 'kernel is not a kern instance'
|
|
||||||
self.likelihood = likelihood
|
|
||||||
self.Y = self.likelihood.Y
|
|
||||||
self.kernel = kernel
|
|
||||||
self.X = X
|
|
||||||
self.N, self.D = self.X.shape
|
|
||||||
self.eta,self.delta = powerep
|
|
||||||
self.epsilon_ep = epsilon_ep
|
|
||||||
self.jitter = 1e-12
|
|
||||||
self.K = self.kernel.K(self.X)
|
|
||||||
model.__init__(self)
|
|
||||||
|
|
||||||
def _set_params(self,p):
|
|
||||||
self.kernel._set_params_transformed(p)
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return self.kernel._get_params_transformed()
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
return self.kernel._get_param_names_transformed()
|
|
||||||
|
|
||||||
def approximate_likelihood(self):
|
|
||||||
self.ep_approx = Full(self.K,self.likelihood,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta])
|
|
||||||
self.ep_approx.fit_EP()
|
|
||||||
|
|
||||||
def posterior_param(self):
|
|
||||||
self.K = self.kernel.K(self.X)
|
|
||||||
self.Sroot_tilde_K = np.sqrt(self.ep_approx.tau_tilde)[:,None]*self.K
|
|
||||||
B = np.eye(self.N) + np.sqrt(self.ep_approx.tau_tilde)[None,:]*self.Sroot_tilde_K
|
|
||||||
#self.L = np.linalg.cholesky(B)
|
|
||||||
self.L = jitchol(B)
|
|
||||||
V,info = linalg.flapack.dtrtrs(self.L,self.Sroot_tilde_K,lower=1)
|
|
||||||
self.Sigma = self.K - np.dot(V.T,V)
|
|
||||||
self.mu = np.dot(self.Sigma,self.ep_approx.v_tilde)
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
|
||||||
"""
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
The EP approximation to the log-marginal likelihood
|
|
||||||
"""
|
|
||||||
self.posterior_param()
|
|
||||||
mu_ = self.ep_approx.v_/self.ep_approx.tau_
|
|
||||||
L1 =.5*sum(np.log(1+self.ep_approx.tau_tilde*1./self.ep_approx.tau_))-sum(np.log(np.diag(self.L)))
|
|
||||||
L2A =.5*np.sum((self.Sigma-np.diag(1./(self.ep_approx.tau_+self.ep_approx.tau_tilde))) * np.dot(self.ep_approx.v_tilde[:,None],self.ep_approx.v_tilde[None,:]))
|
|
||||||
L2B = .5*np.dot(mu_*(self.ep_approx.tau_/(self.ep_approx.tau_tilde+self.ep_approx.tau_)),self.ep_approx.tau_tilde*mu_ - 2*self.ep_approx.v_tilde)
|
|
||||||
L3 = sum(np.log(self.ep_approx.Z_hat))
|
|
||||||
return L1 + L2A + L2B + L3
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
|
||||||
dK_dp = self.kernel.dK_dtheta(self.X)
|
|
||||||
self.dK_dp = dK_dp
|
|
||||||
aux1,info_1 = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1)
|
|
||||||
b = self.ep_approx.v_tilde - np.sqrt(self.ep_approx.tau_tilde)*linalg.flapack.dtrtrs(self.L.T,aux1)[0]
|
|
||||||
U,info_u = linalg.flapack.dtrtrs(self.L,np.diag(np.sqrt(self.ep_approx.tau_tilde)),lower=1)
|
|
||||||
dL_dK = 0.5*(np.outer(b,b)-np.dot(U.T,U))
|
|
||||||
self.dL_dK = dL_dK
|
|
||||||
return np.array([np.sum(dK_dpi*dL_dK) for dK_dpi in dK_dp.T])
|
|
||||||
|
|
||||||
def predict(self,X):
|
|
||||||
#TODO: check output dimensions
|
|
||||||
self.posterior_param()
|
|
||||||
K_x = self.kernel.K(self.X,X)
|
|
||||||
Kxx = self.kernel.K(X)
|
|
||||||
aux1,info = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1)
|
|
||||||
aux2,info = linalg.flapack.dtrtrs(self.L.T, aux1,lower=0)
|
|
||||||
zeta = np.sqrt(self.ep_approx.tau_tilde)*aux2
|
|
||||||
f = np.dot(K_x.T,self.ep_approx.v_tilde-zeta)
|
|
||||||
v,info = linalg.flapack.dtrtrs(self.L,np.sqrt(self.ep_approx.tau_tilde)[:,None]*K_x,lower=1)
|
|
||||||
variance = Kxx - np.dot(v.T,v)
|
|
||||||
vdiag = np.diag(variance)
|
|
||||||
y=self.likelihood.predictive_mean(f,vdiag)
|
|
||||||
return f,vdiag,y
|
|
||||||
|
|
||||||
def plot(self):
|
|
||||||
"""
|
|
||||||
Plot the fitted model: training function values, inducing points used, mean estimate and confidence intervals.
|
|
||||||
"""
|
|
||||||
if self.X.shape[1]==1:
|
|
||||||
pb.figure()
|
|
||||||
xmin,xmax = self.X.min(),self.X.max()
|
|
||||||
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
|
|
||||||
Xnew = np.linspace(xmin,xmax,100)[:,None]
|
|
||||||
mu_f, var_f, mu_phi = self.predict(Xnew)
|
|
||||||
pb.subplot(211)
|
|
||||||
self.likelihood.plot1Da(X_new=Xnew,Mean_new=mu_f,Var_new=var_f,X_u=self.X,Mean_u=self.mu,Var_u=np.diag(self.Sigma))
|
|
||||||
pb.subplot(212)
|
|
||||||
self.likelihood.plot1Db(self.X,Xnew,mu_phi)
|
|
||||||
elif self.X.shape[1]==2:
|
|
||||||
pb.figure()
|
|
||||||
x1min,x1max = self.X[:,0].min(0),self.X[:,0].max(0)
|
|
||||||
x2min,x2max = self.X[:,1].min(0),self.X[:,1].max(0)
|
|
||||||
x1min, x1max = x1min-0.2*(x1max-x1min), x1max+0.2*(x1max-x1min)
|
|
||||||
x2min, x2max = x2min-0.2*(x2max-x2min), x2max+0.2*(x1max-x1min)
|
|
||||||
axis1 = np.linspace(x1min,x1max,50)
|
|
||||||
axis2 = np.linspace(x2min,x2max,50)
|
|
||||||
XX1, XX2 = [e.flatten() for e in np.meshgrid(axis1,axis2)]
|
|
||||||
Xnew = np.c_[XX1.flatten(),XX2.flatten()]
|
|
||||||
f,v,p = self.predict(Xnew)
|
|
||||||
self.likelihood.plot2D(self.X,Xnew,p)
|
|
||||||
else:
|
|
||||||
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
|
|
||||||
|
|
||||||
def em(self,max_f_eval=1e4,epsilon=.1,plot_all=False): #TODO check this makes sense
|
|
||||||
"""
|
|
||||||
Fits sparse_EP and optimizes the hyperparametes iteratively until convergence is achieved.
|
|
||||||
"""
|
|
||||||
self.epsilon_em = epsilon
|
|
||||||
log_likelihood_change = self.epsilon_em + 1.
|
|
||||||
self.parameters_path = [self.kernel._get_params()]
|
|
||||||
self.approximate_likelihood()
|
|
||||||
self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]]
|
|
||||||
self.log_likelihood_path = [self.log_likelihood()]
|
|
||||||
iteration = 0
|
|
||||||
while log_likelihood_change > self.epsilon_em:
|
|
||||||
print 'EM iteration', iteration
|
|
||||||
self.optimize(max_f_eval = max_f_eval)
|
|
||||||
log_likelihood_new = self.log_likelihood()
|
|
||||||
log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1]
|
|
||||||
if log_likelihood_change < 0:
|
|
||||||
print 'log_likelihood decrement'
|
|
||||||
self.kernel._set_params_transformed(self.parameters_path[-1])
|
|
||||||
self.kernM._set_params_transformed(self.parameters_path[-1])
|
|
||||||
else:
|
|
||||||
self.approximate_likelihood()
|
|
||||||
self.log_likelihood_path.append(self.log_likelihood())
|
|
||||||
self.parameters_path.append(self.kernel._get_params())
|
|
||||||
self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde])
|
|
||||||
iteration += 1
|
|
||||||
|
|
@ -1,18 +1,18 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, James Hensman
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pylab as pb
|
from GP import GP
|
||||||
|
from .. import likelihoods
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..core import model
|
|
||||||
from ..util.linalg import pdinv,mdot
|
|
||||||
from ..util.plot import gpplot, Tango
|
|
||||||
|
|
||||||
class GP_regression(model):
|
class GP_regression(GP):
|
||||||
"""
|
"""
|
||||||
Gaussian Process model for regression
|
Gaussian Process model for regression
|
||||||
|
|
||||||
|
This is a thin wrapper around the GP class, with a set of sensible defalts
|
||||||
|
|
||||||
:param X: input observations
|
:param X: input observations
|
||||||
:param Y: observed values
|
:param Y: observed values
|
||||||
:param kernel: a GPy kernel, defaults to rbf+white
|
:param kernel: a GPy kernel, defaults to rbf+white
|
||||||
|
|
@ -29,199 +29,8 @@ class GP_regression(model):
|
||||||
|
|
||||||
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None):
|
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None):
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(X.shape[1]) + kern.bias(X.shape[1]) + kern.white(X.shape[1])
|
kernel = kern.rbf(X.shape[1])
|
||||||
|
|
||||||
# parse arguments
|
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
||||||
self.Xslices = Xslices
|
|
||||||
assert isinstance(kernel, kern.kern)
|
|
||||||
self.kern = kernel
|
|
||||||
self.X = X
|
|
||||||
self.Y = Y
|
|
||||||
assert len(self.X.shape)==2
|
|
||||||
assert len(self.Y.shape)==2
|
|
||||||
assert self.X.shape[0] == self.Y.shape[0]
|
|
||||||
self.N, self.D = self.Y.shape
|
|
||||||
self.N, self.Q = self.X.shape
|
|
||||||
|
|
||||||
#here's some simple normalisation
|
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
|
||||||
if normalize_X:
|
|
||||||
self._Xmean = X.mean(0)[None,:]
|
|
||||||
self._Xstd = X.std(0)[None,:]
|
|
||||||
self.X = (X.copy() - self._Xmean) / self._Xstd
|
|
||||||
if hasattr(self,'Z'):
|
|
||||||
self.Z = (self.Z - self._Xmean) / self._Xstd
|
|
||||||
else:
|
|
||||||
self._Xmean = np.zeros((1,self.X.shape[1]))
|
|
||||||
self._Xstd = np.ones((1,self.X.shape[1]))
|
|
||||||
|
|
||||||
if normalize_Y:
|
|
||||||
self._Ymean = Y.mean(0)[None,:]
|
|
||||||
self._Ystd = Y.std(0)[None,:]
|
|
||||||
self.Y = (Y.copy()- self._Ymean) / self._Ystd
|
|
||||||
else:
|
|
||||||
self._Ymean = np.zeros((1,self.Y.shape[1]))
|
|
||||||
self._Ystd = np.ones((1,self.Y.shape[1]))
|
|
||||||
|
|
||||||
if self.D > self.N:
|
|
||||||
# then it's more efficient to store YYT
|
|
||||||
self.YYT = np.dot(self.Y, self.Y.T)
|
|
||||||
else:
|
|
||||||
self.YYT = None
|
|
||||||
|
|
||||||
model.__init__(self)
|
|
||||||
|
|
||||||
def _set_params(self,p):
|
|
||||||
self.kern._set_params_transformed(p)
|
|
||||||
self.K = self.kern.K(self.X,slices1=self.Xslices)
|
|
||||||
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return self.kern._get_params_transformed()
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
return self.kern._get_param_names_transformed()
|
|
||||||
|
|
||||||
def _model_fit_term(self):
|
|
||||||
"""
|
|
||||||
Computes the model fit using YYT if it's available
|
|
||||||
"""
|
|
||||||
if self.YYT is None:
|
|
||||||
return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
|
|
||||||
else:
|
|
||||||
return -0.5*np.sum(np.multiply(self.Ki, self.YYT))
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
|
||||||
complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - 0.5*self.D*self.K_logdet
|
|
||||||
return complexity_term + self._model_fit_term()
|
|
||||||
|
|
||||||
def dL_dK(self):
|
|
||||||
if self.YYT is None:
|
|
||||||
alpha = np.dot(self.Ki,self.Y)
|
|
||||||
dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
|
|
||||||
else:
|
|
||||||
dL_dK = 0.5*(mdot(self.Ki, self.YYT, self.Ki) - self.D*self.Ki)
|
|
||||||
|
|
||||||
return dL_dK
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
|
||||||
return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X)
|
|
||||||
|
|
||||||
def predict(self,Xnew, slices=None, full_cov=False):
|
|
||||||
"""
|
|
||||||
|
|
||||||
Predict the function(s) at the new point(s) Xnew.
|
|
||||||
|
|
||||||
Arguments
|
|
||||||
---------
|
|
||||||
:param Xnew: The points at which to make a prediction
|
|
||||||
:type Xnew: np.ndarray, Nnew x self.Q
|
|
||||||
:param slices: specifies which outputs kernel(s) the Xnew correspond to (see below)
|
|
||||||
:type slices: (None, list of slice objects, list of ints)
|
|
||||||
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
|
|
||||||
:type full_cov: bool
|
|
||||||
:rtype: posterior mean, a Numpy array, Nnew x self.D
|
|
||||||
:rtype: posterior variance, a Numpy array, Nnew x Nnew x (self.D)
|
|
||||||
|
|
||||||
.. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
|
|
||||||
|
|
||||||
- If None, the new points covary throigh every kernel part (default)
|
|
||||||
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
|
|
||||||
- If a list of booleans, specifying which kernel parts are active
|
|
||||||
|
|
||||||
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
|
|
||||||
This is to allow for different normalisations of the output dimensions.
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
#normalise X values
|
|
||||||
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
|
|
||||||
mu, var = self._raw_predict(Xnew, slices, full_cov)
|
|
||||||
|
|
||||||
#un-normalise
|
|
||||||
mu = mu*self._Ystd + self._Ymean
|
|
||||||
if full_cov:
|
|
||||||
if self.D==1:
|
|
||||||
var *= np.square(self._Ystd)
|
|
||||||
else:
|
|
||||||
var = var[:,:,None] * np.square(self._Ystd)
|
|
||||||
else:
|
|
||||||
if self.D==1:
|
|
||||||
var *= np.square(np.squeeze(self._Ystd))
|
|
||||||
else:
|
|
||||||
var = var[:,None] * np.square(self._Ystd)
|
|
||||||
|
|
||||||
return mu,var
|
|
||||||
|
|
||||||
def _raw_predict(self,_Xnew,slices, full_cov=False):
|
|
||||||
"""Internal helper function for making predictions, does not account for normalisation"""
|
|
||||||
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
|
|
||||||
mu = np.dot(np.dot(Kx.T,self.Ki),self.Y)
|
|
||||||
KiKx = np.dot(self.Ki,Kx)
|
|
||||||
if full_cov:
|
|
||||||
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
|
|
||||||
var = Kxx - np.dot(KiKx.T,Kx)
|
|
||||||
else:
|
|
||||||
Kxx = self.kern.Kdiag(_Xnew, slices=slices)
|
|
||||||
var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
|
|
||||||
return mu, var
|
|
||||||
|
|
||||||
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None):
|
|
||||||
"""
|
|
||||||
:param samples: the number of a posteriori samples to plot
|
|
||||||
:param which_data: which if the training data to plot (default all)
|
|
||||||
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
|
||||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
|
||||||
:param which_functions: which of the kernel functions to plot (additively)
|
|
||||||
:type which_functions: list of bools
|
|
||||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
|
||||||
|
|
||||||
Plot the posterior of the GP.
|
|
||||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
|
||||||
- In two dimsensions, a contour-plot shows the mean predicted function
|
|
||||||
- In higher dimensions, we've no implemented this yet !TODO!
|
|
||||||
|
|
||||||
Can plot only part of the data and part of the posterior functions using which_data and which_functions
|
|
||||||
"""
|
|
||||||
if which_functions=='all':
|
|
||||||
which_functions = [True]*self.kern.Nparts
|
|
||||||
if which_data=='all':
|
|
||||||
which_data = slice(None)
|
|
||||||
|
|
||||||
X = self.X[which_data,:]
|
|
||||||
Y = self.Y[which_data,:]
|
|
||||||
|
|
||||||
Xorig = X*self._Xstd + self._Xmean
|
|
||||||
Yorig = Y*self._Ystd + self._Ymean
|
|
||||||
if plot_limits is None:
|
|
||||||
xmin,xmax = Xorig.min(0),Xorig.max(0)
|
|
||||||
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
|
|
||||||
elif len(plot_limits)==2:
|
|
||||||
xmin, xmax = plot_limits
|
|
||||||
else:
|
|
||||||
raise ValueError, "Bad limits for plotting"
|
|
||||||
|
|
||||||
|
|
||||||
if self.X.shape[1]==1:
|
|
||||||
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
|
|
||||||
m,v = self.predict(Xnew,slices=which_functions)
|
|
||||||
gpplot(Xnew,m,v)
|
|
||||||
if samples:
|
|
||||||
s = np.random.multivariate_normal(m.flatten(),v,samples)
|
|
||||||
pb.plot(Xnew.flatten(),s.T, alpha = 0.4, c='#3465a4', linewidth = 0.8)
|
|
||||||
pb.plot(Xorig,Yorig,'kx',mew=1.5)
|
|
||||||
pb.xlim(xmin,xmax)
|
|
||||||
|
|
||||||
elif self.X.shape[1]==2:
|
|
||||||
resolution = 50 or resolution
|
|
||||||
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
|
|
||||||
Xtest = np.vstack((xx.flatten(),yy.flatten())).T
|
|
||||||
zz,vv = self.predict(Xtest,slices=which_functions)
|
|
||||||
zz = zz.reshape(resolution,resolution)
|
|
||||||
pb.contour(xx,yy,zz,vmin=zz.min(),vmax=zz.max(),cmap=pb.cm.jet)
|
|
||||||
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=zz.min(),vmax=zz.max())
|
|
||||||
pb.xlim(xmin[0],xmax[0])
|
|
||||||
pb.ylim(xmin[1],xmax[1])
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
|
|
||||||
|
|
|
||||||
|
|
@ -2,12 +2,12 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
from GP import GP
|
||||||
from GP_regression import GP_regression
|
from GP_regression import GP_regression
|
||||||
|
from sparse_GP import sparse_GP
|
||||||
from sparse_GP_regression import sparse_GP_regression
|
from sparse_GP_regression import sparse_GP_regression
|
||||||
from GPLVM import GPLVM
|
from GPLVM import GPLVM
|
||||||
from warped_GP import warpedGP
|
from warped_GP import warpedGP
|
||||||
from GP_EP import GP_EP
|
|
||||||
from generalized_FITC import generalized_FITC
|
|
||||||
from sparse_GPLVM import sparse_GPLVM
|
from sparse_GPLVM import sparse_GPLVM
|
||||||
from uncollapsed_sparse_GP import uncollapsed_sparse_GP
|
#from uncollapsed_sparse_GP import uncollapsed_sparse_GP
|
||||||
from BGPLVM import Bayesian_GPLVM
|
from BGPLVM import Bayesian_GPLVM
|
||||||
|
|
|
||||||
|
|
@ -1,241 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import pylab as pb
|
|
||||||
from scipy import stats, linalg
|
|
||||||
from .. import kern
|
|
||||||
from ..core import model
|
|
||||||
from ..util.linalg import pdinv,mdot
|
|
||||||
from ..util.plot import gpplot
|
|
||||||
from ..inference.Expectation_Propagation import FITC
|
|
||||||
from ..inference.likelihoods import likelihood,probit
|
|
||||||
|
|
||||||
class generalized_FITC(model):
|
|
||||||
def __init__(self,X,likelihood,kernel=None,inducing=10,epsilon_ep=1e-3,powerep=[1.,1.]):
|
|
||||||
"""
|
|
||||||
Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
|
|
||||||
|
|
||||||
:param X: input observations
|
|
||||||
:param likelihood: Output's likelihood (likelihood class)
|
|
||||||
:param kernel: a GPy kernel
|
|
||||||
:param inducing: Either an array specifying the inducing points location or a scalar defining their number.
|
|
||||||
:param epsilon_ep: EP convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
|
||||||
:param powerep: Power-EP parameters (eta,delta) - 2x1 numpy array (floats)
|
|
||||||
"""
|
|
||||||
assert isinstance(kernel,kern.kern)
|
|
||||||
self.likelihood = likelihood
|
|
||||||
self.Y = self.likelihood.Y
|
|
||||||
self.kernel = kernel
|
|
||||||
self.X = X
|
|
||||||
self.N, self.D = self.X.shape
|
|
||||||
assert self.Y.shape[0] == self.N
|
|
||||||
if type(inducing) == int:
|
|
||||||
self.M = inducing
|
|
||||||
self.Z = (np.random.random_sample(self.D*self.M)*(self.X.max()-self.X.min())+self.X.min()).reshape(self.M,-1)
|
|
||||||
elif type(inducing) == np.ndarray:
|
|
||||||
self.Z = inducing
|
|
||||||
self.M = self.Z.shape[0]
|
|
||||||
self.eta,self.delta = powerep
|
|
||||||
self.epsilon_ep = epsilon_ep
|
|
||||||
self.jitter = 1e-12
|
|
||||||
model.__init__(self)
|
|
||||||
|
|
||||||
def _set_params(self,p):
|
|
||||||
self.kernel._set_params_transformed(p[0:-self.Z.size])
|
|
||||||
self.Z = p[-self.Z.size:].reshape(self.M,self.D)
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return np.hstack([self.kernel._get_params_transformed(),self.Z.flatten()])
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
return self.kernel._get_param_names_transformed()+['iip_%i'%i for i in range(self.Z.size)]
|
|
||||||
|
|
||||||
def approximate_likelihood(self):
|
|
||||||
self.Kmm = self.kernel.K(self.Z)
|
|
||||||
self.Knm = self.kernel.K(self.X,self.Z)
|
|
||||||
self.Knn_diag = self.kernel.Kdiag(self.X)
|
|
||||||
self.ep_approx = FITC(self.Kmm,self.likelihood,self.Knm.T,self.Knn_diag,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta])
|
|
||||||
self.ep_approx.fit_EP()
|
|
||||||
|
|
||||||
def posterior_param(self):
|
|
||||||
self.Knn_diag = self.kernel.Kdiag(self.X)
|
|
||||||
self.Kmm = self.kernel.K(self.Z)
|
|
||||||
self.Kmmi, self.Lmm, self.Lmmi, self.Kmm_logdet = pdinv(self.Kmm)
|
|
||||||
self.Knm = self.kernel.K(self.X,self.Z)
|
|
||||||
self.KmmiKmn = np.dot(self.Kmmi,self.Knm.T)
|
|
||||||
self.Qnn = np.dot(self.Knm,self.KmmiKmn)
|
|
||||||
self.Diag0 = self.Knn_diag - np.diag(self.Qnn)
|
|
||||||
self.R0 = np.linalg.cholesky(self.Kmmi).T
|
|
||||||
|
|
||||||
self.Taut = self.ep_approx.tau_tilde/(1.+ self.ep_approx.tau_tilde*self.Diag0)
|
|
||||||
self.KmnTaut = self.Knm.T*self.Taut[None,:]
|
|
||||||
self.KmnTautKnm = np.dot(self.KmnTaut, self.Knm)
|
|
||||||
self.Woodbury_inv, self.Wood_L, self.Wood_Li, self.Woodbury_logdet = pdinv(self.Kmm + self.KmnTautKnm)
|
|
||||||
self.Qnn_diag = self.Knn_diag - np.diag(self.Qnn) + 1./self.ep_approx.tau_tilde
|
|
||||||
self.Qi = -np.dot(self.KmnTaut.T, np.dot(self.Woodbury_inv,self.KmnTaut)) + np.diag(self.Taut)
|
|
||||||
self.hld = 0.5*np.sum(np.log(self.Diag0 + 1./self.ep_approx.tau_tilde)) - 0.5*self.Kmm_logdet + 0.5*self.Woodbury_logdet
|
|
||||||
|
|
||||||
self.Diag = self.Diag0/(1.+ self.Diag0 * self.ep_approx.tau_tilde)
|
|
||||||
self.P = (self.Diag / self.Diag0)[:,None] * self.Knm
|
|
||||||
self.RPT0 = np.dot(self.R0,self.Knm.T)
|
|
||||||
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
|
|
||||||
self.R,info = linalg.flapack.dtrtrs(self.L,self.R0,lower=1)
|
|
||||||
self.RPT = np.dot(self.R,self.P.T)
|
|
||||||
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
|
|
||||||
self.w = self.Diag * self.ep_approx.v_tilde
|
|
||||||
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.ep_approx.v_tilde))
|
|
||||||
self.mu = self.w + np.dot(self.P,self.gamma)
|
|
||||||
self.mu_tilde = (self.ep_approx.v_tilde/self.ep_approx.tau_tilde)[:,None]
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
|
||||||
self.posterior_param()
|
|
||||||
self.YYT = np.dot(self.mu_tilde,self.mu_tilde.T)
|
|
||||||
A = -self.hld
|
|
||||||
B = -.5*np.sum(self.Qi*self.YYT)
|
|
||||||
C = sum(np.log(self.ep_approx.Z_hat))
|
|
||||||
D = .5*np.sum(np.log(1./self.ep_approx.tau_tilde + 1./self.ep_approx.tau_))
|
|
||||||
E = .5*np.sum((self.ep_approx.v_/self.ep_approx.tau_ - self.mu_tilde.flatten())**2/(1./self.ep_approx.tau_ + 1./self.ep_approx.tau_tilde))
|
|
||||||
return A + B + C + D + E
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
|
||||||
dKmm_dtheta = self.kernel.dK_dtheta(self.Z)
|
|
||||||
dKnn_dtheta = self.kernel.dK_dtheta(self.X)
|
|
||||||
dKmn_dtheta = self.kernel.dK_dtheta(self.Z,self.X)
|
|
||||||
dKmm_dZ = -self.kernel.dK_dX(self.Z)
|
|
||||||
dKnm_dZ = -self.kernel.dK_dX(self.X,self.Z)
|
|
||||||
tmp = [np.dot(dKmn_dtheta_i,self.KmmiKmn) for dKmn_dtheta_i in dKmn_dtheta.T]
|
|
||||||
dQnn_dtheta = [tmp_i + tmp_i.T - np.dot(np.dot(self.KmmiKmn.T,dKmm_dtheta_i),self.KmmiKmn) for tmp_i,dKmm_dtheta_i in zip(tmp,dKmm_dtheta.T)]
|
|
||||||
dDiag0_dtheta = [np.diag(dKnn_dtheta_i) - np.diag(dQnn_dtheta_i) for dKnn_dtheta_i,dQnn_dtheta_i in zip(dKnn_dtheta.T,dQnn_dtheta)]
|
|
||||||
dQ_dtheta = [np.diag(dDiag0_dtheta_i) + dQnn_dtheta_i for dDiag0_dtheta_i,dQnn_dtheta_i in zip(dDiag0_dtheta,dQnn_dtheta)]
|
|
||||||
dW_dtheta = [dKmm_dtheta_i + 2*np.dot(self.KmnTaut,dKmn_dtheta_i) - np.dot(self.KmnTaut*dDiag0_dtheta_i,self.KmnTaut.T) for dKmm_dtheta_i,dDiag0_dtheta_i,dKmn_dtheta_i in zip(dKmm_dtheta.T,dDiag0_dtheta,dKmn_dtheta.T)]
|
|
||||||
|
|
||||||
QiY = np.dot(self.Qi, self.mu_tilde)
|
|
||||||
QiYYQi = np.outer(QiY,QiY)
|
|
||||||
WiKmnTaut = np.dot(self.Woodbury_inv,self.KmnTaut)
|
|
||||||
K_Y = np.dot(self.KmmiKmn,QiY)
|
|
||||||
# gradient - theta
|
|
||||||
Atheta = [-0.5*np.dot(self.Taut,dDiag0_dtheta_i) + 0.5*np.sum(self.Kmmi*dKmm_dtheta_i) - 0.5*np.sum(self.Woodbury_inv*dW_dtheta_i) for dDiag0_dtheta_i,dKmm_dtheta_i,dW_dtheta_i in zip(dDiag0_dtheta,dKmm_dtheta.T,dW_dtheta)]
|
|
||||||
Btheta = np.array([0.5*np.sum(QiYYQi*dQ_dtheta_i) for dQ_dtheta_i in dQ_dtheta])
|
|
||||||
dL_dtheta = Atheta + Btheta
|
|
||||||
# gradient - Z
|
|
||||||
# Az
|
|
||||||
dQnn_dZ_diag_a2 = (np.array([d[:,:,None]*self.KmmiKmn[:,:,None] for d in dKnm_dZ.transpose(2,0,1)]).reshape(self.D,self.M,self.N)).transpose(1,2,0)
|
|
||||||
dQnn_dZ_diag_b2 = (np.array([(self.KmmiKmn*np.sum(d[:,:,None]*self.KmmiKmn,-2))[:,:,None] for d in dKmm_dZ.transpose(2,0,1)]).reshape(self.D,self.M,self.N)).transpose(1,2,0)
|
|
||||||
dQnn_dZ_diag = dQnn_dZ_diag_a2 - dQnn_dZ_diag_b2
|
|
||||||
d_hld_Diag1_dZ = -np.sum(np.dot(self.KmmiKmn*self.Taut,self.KmmiKmn.T)[:,:,None]*dKmm_dZ,-2) + np.sum((self.KmmiKmn*self.Taut)[:,:,None]*dKnm_dZ,-2)
|
|
||||||
d_hld_Kmm_dZ = np.sum(self.Kmmi[:,:,None]*dKmm_dZ,-2)
|
|
||||||
d_hld_W_dZ1 = np.sum(WiKmnTaut[:,:,None]*dKnm_dZ,-2)
|
|
||||||
d_hld_W_dZ3 = np.sum(self.Woodbury_inv[:,:,None]*dKmm_dZ,-2)
|
|
||||||
d_hld_W_dZ2 = np.array([np.sum(np.sum(WiKmnTaut.T*d[:,:,None]*self.KmnTaut.T,-2),-1) for d in dQnn_dZ_diag.transpose(2,0,1)]).T
|
|
||||||
Az = d_hld_Diag1_dZ + d_hld_Kmm_dZ - d_hld_W_dZ1 - d_hld_W_dZ2 - d_hld_W_dZ3
|
|
||||||
# Bz
|
|
||||||
Bz2 = np.sum(np.dot(K_Y,QiY.T)[:,:,None]*dKnm_dZ,-2)
|
|
||||||
Bz3 = - np.sum(np.dot(K_Y,K_Y.T)[:,:,None]*dKmm_dZ,-2)
|
|
||||||
Bz1 = -np.array([np.sum((QiY**2)*d[:,:,None],-2) for d in dQnn_dZ_diag.transpose(2,0,1)]).reshape(self.D,self.M).T
|
|
||||||
Bz = Bz1 + Bz2 + Bz3
|
|
||||||
dL_dZ = (Az + Bz).flatten()
|
|
||||||
return np.hstack([dL_dtheta, dL_dZ])
|
|
||||||
|
|
||||||
def predict(self,X):
|
|
||||||
"""
|
|
||||||
Make a prediction for the vsGP model
|
|
||||||
|
|
||||||
Arguments
|
|
||||||
---------
|
|
||||||
X : Input prediction data - Nx1 numpy array (floats)
|
|
||||||
"""
|
|
||||||
#TODO: check output dimensions
|
|
||||||
K_x = self.kernel.K(self.Z,X)
|
|
||||||
Kxx = self.kernel.K(X)
|
|
||||||
#K_x = self.kernM.cross.K(X)
|
|
||||||
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
|
|
||||||
|
|
||||||
# Ci = I + (RPT0)Di(RPT0).T
|
|
||||||
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
|
|
||||||
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
|
|
||||||
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
|
|
||||||
# = I - V.T * V
|
|
||||||
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
|
|
||||||
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
|
|
||||||
C = np.eye(self.M) - np.dot(V.T,V)
|
|
||||||
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
|
|
||||||
#self.C = C
|
|
||||||
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
|
|
||||||
#self.mu_u = mu_u
|
|
||||||
#self.U = U
|
|
||||||
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
|
|
||||||
mu_H = np.dot(mu_u,self.mu)
|
|
||||||
self.mu_H = mu_H
|
|
||||||
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
|
|
||||||
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
|
|
||||||
KR0T = np.dot(K_x.T,self.R0.T)
|
|
||||||
mu_star = np.dot(KR0T,mu_H)
|
|
||||||
sigma2_star = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
|
|
||||||
vdiag = np.diag(sigma2_star)
|
|
||||||
# q(y_star|y) = non-gaussian posterior probability of class membership
|
|
||||||
p = self.likelihood.predictive_mean(mu_star,vdiag)
|
|
||||||
return mu_star,vdiag,p
|
|
||||||
|
|
||||||
def plot(self):
|
|
||||||
"""
|
|
||||||
Plot the fitted model: training function values, inducing points used, mean estimate and confidence intervals.
|
|
||||||
"""
|
|
||||||
if self.X.shape[1]==1:
|
|
||||||
pb.figure()
|
|
||||||
xmin,xmax = np.r_[self.X,self.Z].min(),np.r_[self.X,self.Z].max()
|
|
||||||
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
|
|
||||||
Xnew = np.linspace(xmin,xmax,100)[:,None]
|
|
||||||
mu_f, var_f, mu_phi = self.predict(Xnew)
|
|
||||||
self.mu_inducing,self.var_diag_inducing,self.phi_inducing = self.predict(self.Z)
|
|
||||||
pb.subplot(211)
|
|
||||||
self.likelihood.plot1Da(X_new=Xnew,Mean_new=mu_f,Var_new=var_f,X_u=self.Z,Mean_u=self.mu_inducing,Var_u=self.var_diag_inducing)
|
|
||||||
pb.subplot(212)
|
|
||||||
self.likelihood.plot1Db(self.X,Xnew,mu_phi,self.Z)
|
|
||||||
elif self.X.shape[1]==2:
|
|
||||||
pb.figure()
|
|
||||||
x1min,x1max = self.X[:,0].min(0),self.X[:,0].max(0)
|
|
||||||
x2min,x2max = self.X[:,1].min(0),self.X[:,1].max(0)
|
|
||||||
x1min, x1max = x1min-0.2*(x1max-x1min), x1max+0.2*(x1max-x1min)
|
|
||||||
x2min, x2max = x2min-0.2*(x2max-x2min), x2max+0.2*(x1max-x1min)
|
|
||||||
axis1 = np.linspace(x1min,x1max,50)
|
|
||||||
axis2 = np.linspace(x2min,x2max,50)
|
|
||||||
XX1, XX2 = [e.flatten() for e in np.meshgrid(axis1,axis2)]
|
|
||||||
Xnew = np.c_[XX1.flatten(),XX2.flatten()]
|
|
||||||
f,v,p = self.predict(Xnew)
|
|
||||||
self.likelihood.plot2D(self.X,Xnew,p,self.Z)
|
|
||||||
else:
|
|
||||||
raise NotImplementedError, "Cannot plot GPs with more than two input dimensions"
|
|
||||||
|
|
||||||
def em(self,max_f_eval=1e4,epsilon=.1,plot_all=False): #TODO check this makes sense
|
|
||||||
"""
|
|
||||||
Fits sparse_EP and optimizes the hyperparametes iteratively until convergence is achieved.
|
|
||||||
"""
|
|
||||||
self.epsilon_em = epsilon
|
|
||||||
log_likelihood_change = self.epsilon_em + 1.
|
|
||||||
self.parameters_path = [self.kernel._get_params()]
|
|
||||||
self.approximate_likelihood()
|
|
||||||
self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]]
|
|
||||||
self.inducing_inputs_path = [self.Z]
|
|
||||||
self.log_likelihood_path = [self.log_likelihood()]
|
|
||||||
iteration = 0
|
|
||||||
while log_likelihood_change > self.epsilon_em:
|
|
||||||
print 'EM iteration', iteration
|
|
||||||
self.optimize(max_f_eval = max_f_eval)
|
|
||||||
log_likelihood_new = self.log_likelihood()
|
|
||||||
log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1]
|
|
||||||
if log_likelihood_change < 0:
|
|
||||||
print 'log_likelihood decrement'
|
|
||||||
self.kernel._set_params_transformed(self.parameters_path[-1])
|
|
||||||
self.kernM = self.kernel.copy()
|
|
||||||
slef.kernM.expand_X(self.iducing_inputs_path[-1])
|
|
||||||
self.__init__(self.kernel,self.likelihood,kernM=self.kernM,powerep=[self.eta,self.delta],epsilon_ep = self.epsilon_ep, epsilon_em = self.epsilon_em)
|
|
||||||
|
|
||||||
else:
|
|
||||||
self.approximate_likelihood()
|
|
||||||
self.log_likelihood_path.append(self.log_likelihood())
|
|
||||||
self.parameters_path.append(self.kernel._get_params())
|
|
||||||
self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde])
|
|
||||||
self.inducing_inputs_path.append(self.Z)
|
|
||||||
iteration += 1
|
|
||||||
217
GPy/models/sparse_GP.py
Normal file
|
|
@ -0,0 +1,217 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pylab as pb
|
||||||
|
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
|
||||||
|
from ..util.plot import gpplot
|
||||||
|
from .. import kern
|
||||||
|
from GP import GP
|
||||||
|
|
||||||
|
#Still TODO:
|
||||||
|
# make use of slices properly (kernel can now do this)
|
||||||
|
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
|
||||||
|
|
||||||
|
class sparse_GP(GP):
|
||||||
|
"""
|
||||||
|
Variational sparse GP model
|
||||||
|
|
||||||
|
:param X: inputs
|
||||||
|
:type X: np.ndarray (N x Q)
|
||||||
|
:param likelihood: a likelihood instance, containing the observed data
|
||||||
|
:type likelihood: GPy.likelihood.(Gaussian | EP)
|
||||||
|
:param kernel : the kernel/covariance function. See link kernels
|
||||||
|
:type kernel: a GPy kernel
|
||||||
|
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
|
||||||
|
:type X_uncertainty: np.ndarray (N x Q) | None
|
||||||
|
:param Z: inducing inputs (optional, see note)
|
||||||
|
:type Z: np.ndarray (M x Q) | None
|
||||||
|
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
|
||||||
|
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||||
|
:type M: int
|
||||||
|
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
|
||||||
|
:type normalize_(X|Y): bool
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, X, likelihood, kernel, Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False):
|
||||||
|
self.scale_factor = 1.0# a scaling factor to help keep the algorithm stable
|
||||||
|
|
||||||
|
self.Z = Z
|
||||||
|
self.Zslices = Zslices
|
||||||
|
self.Xslices = Xslices
|
||||||
|
self.M = Z.shape[0]
|
||||||
|
self.likelihood = likelihood
|
||||||
|
|
||||||
|
if X_uncertainty is None:
|
||||||
|
self.has_uncertain_inputs=False
|
||||||
|
else:
|
||||||
|
assert X_uncertainty.shape==X.shape
|
||||||
|
self.has_uncertain_inputs=True
|
||||||
|
self.X_uncertainty = X_uncertainty
|
||||||
|
|
||||||
|
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
|
||||||
|
|
||||||
|
#normalise X uncertainty also
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
self.X_uncertainty /= np.square(self._Xstd)
|
||||||
|
|
||||||
|
def _computations(self):
|
||||||
|
# TODO find routine to multiply triangular matrices
|
||||||
|
#TODO: slices for psi statistics (easy enough)
|
||||||
|
|
||||||
|
sf = self.scale_factor
|
||||||
|
sf2 = sf**2
|
||||||
|
|
||||||
|
# kernel computations, using BGPLVM notation
|
||||||
|
self.Kmm = self.kern.K(self.Z)
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty)
|
||||||
|
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
|
||||||
|
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
|
||||||
|
if self.likelihood.is_heteroscedastic:
|
||||||
|
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.reshape(self.N,1,1)/sf2)).sum(0)
|
||||||
|
#TODO: what is the likelihood is heterscedatic and there are multiple independent outputs?
|
||||||
|
else:
|
||||||
|
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
|
||||||
|
else:
|
||||||
|
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
|
||||||
|
self.psi1 = self.kern.K(self.Z,self.X)
|
||||||
|
if self.likelihood.is_heteroscedastic:
|
||||||
|
tmp = self.psi1*(np.sqrt(self.likelihood.precision.reshape(self.N,1))/sf)
|
||||||
|
else:
|
||||||
|
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
|
||||||
|
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
||||||
|
self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:] # TODO: remove me for efficiency and stability
|
||||||
|
|
||||||
|
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
|
||||||
|
|
||||||
|
self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y
|
||||||
|
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
|
||||||
|
self.B = np.eye(self.M)/sf2 + self.A
|
||||||
|
|
||||||
|
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
|
||||||
|
|
||||||
|
self.psi1V = np.dot(self.psi1, self.V)
|
||||||
|
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
|
||||||
|
self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
|
||||||
|
self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
|
||||||
|
|
||||||
|
# Compute dL_dpsi # FIXME: this is untested for the het. case
|
||||||
|
self.dL_dpsi0 = - 0.5 * self.D * self.likelihood.precision * np.ones(self.N)
|
||||||
|
self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
|
||||||
|
if self.likelihood.is_heteroscedastic:
|
||||||
|
self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
|
||||||
|
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
|
||||||
|
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
|
||||||
|
else:
|
||||||
|
self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi[None,:,:] # dB
|
||||||
|
self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C[None,:,:] # dC
|
||||||
|
self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E[None,:,:] # dD
|
||||||
|
|
||||||
|
# Compute dL_dKmm
|
||||||
|
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
|
||||||
|
self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
|
||||||
|
self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
|
||||||
|
|
||||||
|
#the partial derivative vector for the likelihood
|
||||||
|
if self.likelihood.Nparams ==0:
|
||||||
|
#save computation here.
|
||||||
|
self.partial_for_likelihood = None
|
||||||
|
elif self.likelihood.is_heteroscedastic:
|
||||||
|
raise NotImplementedError, "heteroscedatic derivates not implemented"
|
||||||
|
#self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA
|
||||||
|
#self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB
|
||||||
|
#self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC
|
||||||
|
#self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
|
||||||
|
else:
|
||||||
|
#likelihood is not heterscedatic
|
||||||
|
beta = self.likelihood.precision
|
||||||
|
dbeta = 0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y))
|
||||||
|
dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2)
|
||||||
|
dbeta += - 0.5 * self.D * np.sum(self.Bi*self.A)/beta
|
||||||
|
dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta
|
||||||
|
self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
|
||||||
|
|
||||||
|
|
||||||
|
def _set_params(self, p):
|
||||||
|
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
|
||||||
|
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
|
||||||
|
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
|
||||||
|
self._computations()
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.hstack([self.Z.flatten(),GP._get_params(self)])
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + GP._get_param_names(self)
|
||||||
|
|
||||||
|
def log_likelihood(self):
|
||||||
|
""" Compute the (lower bound on the) log marginal likelihood """
|
||||||
|
sf2 = self.scale_factor**2
|
||||||
|
if self.likelihood.is_heteroscedastic:
|
||||||
|
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
|
||||||
|
else:
|
||||||
|
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT
|
||||||
|
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
|
||||||
|
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
|
||||||
|
D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
|
||||||
|
return A+B+C+D
|
||||||
|
|
||||||
|
def _log_likelihood_gradients(self):
|
||||||
|
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
|
||||||
|
|
||||||
|
def dL_dtheta(self):
|
||||||
|
"""
|
||||||
|
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
|
||||||
|
"""
|
||||||
|
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
|
||||||
|
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
|
||||||
|
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty)
|
||||||
|
else:
|
||||||
|
#re-cast computations in psi2 back to psi1:
|
||||||
|
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
|
||||||
|
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
|
||||||
|
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
|
||||||
|
|
||||||
|
return dL_dtheta
|
||||||
|
|
||||||
|
def dL_dZ(self):
|
||||||
|
"""
|
||||||
|
The derivative of the bound wrt the inducing inputs Z
|
||||||
|
"""
|
||||||
|
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
|
||||||
|
dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
|
||||||
|
else:
|
||||||
|
#re-cast computations in psi2 back to psi1:
|
||||||
|
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
|
||||||
|
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
|
||||||
|
return dL_dZ
|
||||||
|
|
||||||
|
def _raw_predict(self, Xnew, slices, full_cov=False):
|
||||||
|
"""Internal helper function for making predictions, does not account for normalisation"""
|
||||||
|
|
||||||
|
Kx = self.kern.K(self.Z, Xnew)
|
||||||
|
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
|
||||||
|
if full_cov:
|
||||||
|
Kxx = self.kern.K(Xnew)
|
||||||
|
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
|
||||||
|
else:
|
||||||
|
Kxx = self.kern.Kdiag(Xnew)
|
||||||
|
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
|
||||||
|
|
||||||
|
return mu,var[:,None]
|
||||||
|
|
||||||
|
def plot(self, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Plot the fitted model: just call the GP plot function and then add inducing inputs
|
||||||
|
"""
|
||||||
|
GP.plot(self,*args,**kwargs)
|
||||||
|
if self.Q==1:
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
|
||||||
|
if self.Q==2:
|
||||||
|
pb.plot(self.Z[:,0],self.Z[:,1],'wo')
|
||||||
|
|
@ -1,205 +1,46 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, James Hensman
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pylab as pb
|
from sparse_GP import sparse_GP
|
||||||
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
|
from .. import likelihoods
|
||||||
from ..util.plot import gpplot
|
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..inference.likelihoods import likelihood
|
from ..inference.likelihoods import likelihood
|
||||||
from GP_regression import GP_regression
|
from GP_regression import GP_regression
|
||||||
|
|
||||||
#Still TODO:
|
class sparse_GP_regression(sparse_GP):
|
||||||
# make use of slices properly (kernel can now do this)
|
|
||||||
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
|
|
||||||
|
|
||||||
class sparse_GP_regression(GP_regression):
|
|
||||||
"""
|
"""
|
||||||
Variational sparse GP model (Regression)
|
Gaussian Process model for regression
|
||||||
|
|
||||||
|
This is a thin wrapper around the GP class, with a set of sensible defalts
|
||||||
|
|
||||||
|
:param X: input observations
|
||||||
|
:param Y: observed values
|
||||||
|
:param kernel: a GPy kernel, defaults to rbf+white
|
||||||
|
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
|
:type normalize_X: False|True
|
||||||
|
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
|
:type normalize_Y: False|True
|
||||||
|
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
|
||||||
|
:rtype: model object
|
||||||
|
|
||||||
|
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||||
|
|
||||||
:param X: inputs
|
|
||||||
:type X: np.ndarray (N x Q)
|
|
||||||
:param Y: observed data
|
|
||||||
:type Y: np.ndarray of observations (N x D)
|
|
||||||
:param kernel : the kernel/covariance function. See link kernels
|
|
||||||
:type kernel: a GPy kernel
|
|
||||||
:param Z: inducing inputs (optional, see note)
|
|
||||||
:type Z: np.ndarray (M x Q) | None
|
|
||||||
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
|
|
||||||
:type X_uncertainty: np.ndarray (N x Q) | None
|
|
||||||
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
|
|
||||||
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
|
||||||
:type M: int
|
|
||||||
:param beta: noise precision. TODO> ignore beta if doing EP
|
|
||||||
:type beta: float
|
|
||||||
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
|
|
||||||
:type normalize_(X|Y): bool
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False):
|
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None,Z=None, M=10):
|
||||||
self.scale_factor = 100.0
|
#kern defaults to rbf
|
||||||
self.beta = beta
|
if kernel is None:
|
||||||
|
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
|
||||||
|
|
||||||
|
#Z defaults to a subset of the data
|
||||||
if Z is None:
|
if Z is None:
|
||||||
self.Z = np.random.permutation(X.copy())[:M]
|
Z = np.random.permutation(X.copy())[:M]
|
||||||
self.M = M
|
|
||||||
else:
|
else:
|
||||||
assert Z.shape[1]==X.shape[1]
|
assert Z.shape[1]==X.shape[1]
|
||||||
self.Z = Z
|
|
||||||
self.M = Z.shape[0]
|
|
||||||
if X_uncertainty is None:
|
|
||||||
self.has_uncertain_inputs=False
|
|
||||||
else:
|
|
||||||
assert X_uncertainty.shape==X.shape
|
|
||||||
self.has_uncertain_inputs=True
|
|
||||||
self.X_uncertainty = X_uncertainty
|
|
||||||
|
|
||||||
GP_regression.__init__(self, X, Y, kernel=kernel, normalize_X=normalize_X, normalize_Y=normalize_Y)
|
#likelihood defaults to Gaussian
|
||||||
self.trYYT = np.sum(np.square(self.Y))
|
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
||||||
|
|
||||||
#normalise X uncertainty also
|
sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X, Xslices=Xslices)
|
||||||
if self.has_uncertain_inputs:
|
|
||||||
self.X_uncertainty /= np.square(self._Xstd)
|
|
||||||
|
|
||||||
def _computations(self):
|
|
||||||
# TODO find routine to multiply triangular matrices
|
|
||||||
#TODO: slices for psi statistics (easy enough)
|
|
||||||
|
|
||||||
# kernel computations, using BGPLVM notation
|
|
||||||
self.Kmm = self.kern.K(self.Z)
|
|
||||||
if self.has_uncertain_inputs:
|
|
||||||
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
|
|
||||||
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
|
|
||||||
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
|
|
||||||
self.psi2_beta_scaled = (self.psi2*(self.beta/self.scale_factor**2)).sum(0)
|
|
||||||
else:
|
|
||||||
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
|
|
||||||
self.psi1 = self.kern.K(self.Z,self.X)
|
|
||||||
#self.psi2 = np.dot(self.psi1,self.psi1.T)
|
|
||||||
#self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:]
|
|
||||||
tmp = self.psi1/(self.scale_factor/np.sqrt(self.beta))
|
|
||||||
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
|
||||||
|
|
||||||
sf = self.scale_factor
|
|
||||||
sf2 = sf**2
|
|
||||||
|
|
||||||
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)#+np.eye(self.M)*1e-3)
|
|
||||||
|
|
||||||
self.V = (self.beta/self.scale_factor)*self.Y
|
|
||||||
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
|
|
||||||
self.B = np.eye(self.M)/sf2 + self.A
|
|
||||||
|
|
||||||
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
|
|
||||||
|
|
||||||
self.psi1V = np.dot(self.psi1, self.V)
|
|
||||||
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
|
|
||||||
self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
|
|
||||||
self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
|
|
||||||
|
|
||||||
# Compute dL_dpsi
|
|
||||||
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
|
|
||||||
self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
|
|
||||||
self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi[None,:,:] # dB
|
|
||||||
self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C[None,:,:] # dC
|
|
||||||
self.dL_dpsi2 += - 0.5 * self.beta * self.E[None,:,:] # dD
|
|
||||||
|
|
||||||
# Compute dL_dKmm
|
|
||||||
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
|
|
||||||
self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
|
|
||||||
self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
|
|
||||||
|
|
||||||
|
|
||||||
def _set_params(self, p):
|
|
||||||
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
|
|
||||||
self.beta = p[self.M*self.Q]
|
|
||||||
self.kern._set_params(p[self.Z.size + 1:])
|
|
||||||
self._computations()
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
|
|
||||||
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
|
||||||
""" Compute the (lower bound on the) log marginal likelihood """
|
|
||||||
sf2 = self.scale_factor**2
|
|
||||||
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) -0.5*self.beta*self.trYYT
|
|
||||||
B = -0.5*self.D*(self.beta*self.psi0-np.trace(self.A)*sf2)
|
|
||||||
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
|
|
||||||
D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
|
|
||||||
return A+B+C+D
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
|
||||||
return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
|
|
||||||
|
|
||||||
def dL_dbeta(self):
|
|
||||||
"""
|
|
||||||
Compute the gradient of the log likelihood wrt beta.
|
|
||||||
"""
|
|
||||||
#TODO: suport heteroscedatic noise
|
|
||||||
sf2 = self.scale_factor**2
|
|
||||||
dA_dbeta = 0.5 * self.N*self.D/self.beta - 0.5 * self.trYYT
|
|
||||||
dB_dbeta = - 0.5 * self.D * (self.psi0 - np.trace(self.A)/self.beta*sf2)
|
|
||||||
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
|
|
||||||
dD_dbeta = np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/self.beta
|
|
||||||
|
|
||||||
return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta)
|
|
||||||
|
|
||||||
def dL_dtheta(self):
|
|
||||||
"""
|
|
||||||
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
|
|
||||||
"""
|
|
||||||
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z)
|
|
||||||
if self.has_uncertain_inputs:
|
|
||||||
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
|
|
||||||
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
|
|
||||||
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape
|
|
||||||
else:
|
|
||||||
#re-cast computations in psi2 back to psi1:
|
|
||||||
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
|
|
||||||
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
|
|
||||||
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
|
|
||||||
|
|
||||||
return dL_dtheta
|
|
||||||
|
|
||||||
def dL_dZ(self):
|
|
||||||
"""
|
|
||||||
The derivative of the bound wrt the inducing inputs Z
|
|
||||||
"""
|
|
||||||
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
|
|
||||||
if self.has_uncertain_inputs:
|
|
||||||
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
|
|
||||||
dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
|
|
||||||
else:
|
|
||||||
#re-cast computations in psi2 back to psi1:
|
|
||||||
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
|
|
||||||
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
|
|
||||||
return dL_dZ
|
|
||||||
|
|
||||||
def _raw_predict(self, Xnew, slices, full_cov=False):
|
|
||||||
"""Internal helper function for making predictions, does not account for normalisation"""
|
|
||||||
|
|
||||||
Kx = self.kern.K(self.Z, Xnew)
|
|
||||||
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
|
|
||||||
|
|
||||||
if full_cov:
|
|
||||||
Kxx = self.kern.K(Xnew)
|
|
||||||
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case.
|
|
||||||
else:
|
|
||||||
Kxx = self.kern.Kdiag(Xnew)
|
|
||||||
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case.
|
|
||||||
|
|
||||||
return mu,var
|
|
||||||
|
|
||||||
def plot(self, *args, **kwargs):
|
|
||||||
"""
|
|
||||||
Plot the fitted model: just call the GP_regression plot function and then add inducing inputs
|
|
||||||
"""
|
|
||||||
GP_regression.plot(self,*args,**kwargs)
|
|
||||||
if self.Q==1:
|
|
||||||
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'k|',mew=1.5,markersize=12)
|
|
||||||
if self.has_uncertain_inputs:
|
|
||||||
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
|
|
||||||
if self.Q==2:
|
|
||||||
pb.plot(self.Z[:,0],self.Z[:,1],'wo')
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import pylab as pb
|
||||||
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
|
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
|
||||||
from ..util.plot import gpplot
|
from ..util.plot import gpplot
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..inference.likelihoods import likelihood
|
from ..likelihoods import likelihood
|
||||||
from sparse_GP_regression import sparse_GP_regression
|
from sparse_GP_regression import sparse_GP_regression
|
||||||
|
|
||||||
class uncollapsed_sparse_GP(sparse_GP_regression):
|
class uncollapsed_sparse_GP(sparse_GP_regression):
|
||||||
|
|
@ -136,8 +136,8 @@ class uncollapsed_sparse_GP(sparse_GP_regression):
|
||||||
#dL_dm = np.dot(self.Kmmi,self.psi1V) - np.dot(self.Lambda,self.q_u_mean)
|
#dL_dm = np.dot(self.Kmmi,self.psi1V) - np.dot(self.Lambda,self.q_u_mean)
|
||||||
dL_dm = np.dot(self.Kmmi,self.psi1V) - self.q_u_canonical[0]
|
dL_dm = np.dot(self.Kmmi,self.psi1V) - self.q_u_canonical[0]
|
||||||
|
|
||||||
#dL_dSim =
|
#dL_dSim =
|
||||||
#dL_dmhSi =
|
#dL_dmhSi =
|
||||||
|
|
||||||
return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) # natgrad only, grad TODO
|
return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) # natgrad only, grad TODO
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -154,17 +154,16 @@ class GradientTests(unittest.TestCase):
|
||||||
m.constrain_positive('(linear|bias|white)')
|
m.constrain_positive('(linear|bias|white)')
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
def test_GP_EP(self):
|
def test_GP_EP_probit(self):
|
||||||
return # Disabled TODO
|
|
||||||
N = 20
|
N = 20
|
||||||
X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None]
|
X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
|
||||||
k = GPy.kern.rbf(1) + GPy.kern.white(1)
|
Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
|
||||||
Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None]
|
kernel = GPy.kern.rbf(1)
|
||||||
likelihood = GPy.inference.likelihoods.probit(Y)
|
distribution = GPy.likelihoods.likelihood_functions.probit()
|
||||||
m = GPy.models.GP_EP(X,likelihood,k)
|
likelihood = GPy.likelihoods.EP(Y, distribution)
|
||||||
m.constrain_positive('(var|len)')
|
m = GPy.models.GP(X, likelihood, kernel)
|
||||||
m.approximate_likelihood()
|
m.ensure_default_constraints()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.EPEM)
|
||||||
|
|
||||||
@unittest.skip("FITC will be broken for a while")
|
@unittest.skip("FITC will be broken for a while")
|
||||||
def test_generalized_FITC(self):
|
def test_generalized_FITC(self):
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@
|
||||||
|
|
||||||
|
|
||||||
import matplotlib as mpl
|
import matplotlib as mpl
|
||||||
|
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
import sys
|
import sys
|
||||||
#sys.path.append('/home/james/mlprojects/sitran_cluster/')
|
#sys.path.append('/home/james/mlprojects/sitran_cluster/')
|
||||||
|
|
@ -15,12 +14,12 @@ def removeRightTicks(ax=None):
|
||||||
ax = ax or pb.gca()
|
ax = ax or pb.gca()
|
||||||
for i, line in enumerate(ax.get_yticklines()):
|
for i, line in enumerate(ax.get_yticklines()):
|
||||||
if i%2 == 1: # odd indices
|
if i%2 == 1: # odd indices
|
||||||
line.set_visible(False)
|
line.set_visible(False)
|
||||||
def removeUpperTicks(ax=None):
|
def removeUpperTicks(ax=None):
|
||||||
ax = ax or pb.gca()
|
ax = ax or pb.gca()
|
||||||
for i, line in enumerate(ax.get_xticklines()):
|
for i, line in enumerate(ax.get_xticklines()):
|
||||||
if i%2 == 1: # odd indices
|
if i%2 == 1: # odd indices
|
||||||
line.set_visible(False)
|
line.set_visible(False)
|
||||||
def fewerXticks(ax=None,divideby=2):
|
def fewerXticks(ax=None,divideby=2):
|
||||||
ax = ax or pb.gca()
|
ax = ax or pb.gca()
|
||||||
ax.set_xticks(ax.get_xticks()[::divideby])
|
ax.set_xticks(ax.get_xticks()[::divideby])
|
||||||
|
|
@ -126,8 +125,6 @@ cdict_RB = {'red' :((0.,coloursRGB['mediumRed'][0]/256.,coloursRGB['mediumRed'][
|
||||||
'blue':((0.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.),
|
'blue':((0.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.),
|
||||||
(.5,coloursRGB['mediumPurple'][2]/256.,coloursRGB['mediumPurple'][2]/256.),
|
(.5,coloursRGB['mediumPurple'][2]/256.,coloursRGB['mediumPurple'][2]/256.),
|
||||||
(1.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.))}
|
(1.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.))}
|
||||||
cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256)
|
|
||||||
|
|
||||||
|
|
||||||
cdict_BGR = {'red' :((0.,coloursRGB['mediumBlue'][0]/256.,coloursRGB['mediumBlue'][0]/256.),
|
cdict_BGR = {'red' :((0.,coloursRGB['mediumBlue'][0]/256.,coloursRGB['mediumBlue'][0]/256.),
|
||||||
(.5,coloursRGB['mediumGreen'][0]/256.,coloursRGB['mediumGreen'][0]/256.),
|
(.5,coloursRGB['mediumGreen'][0]/256.,coloursRGB['mediumGreen'][0]/256.),
|
||||||
|
|
@ -138,7 +135,7 @@ cdict_BGR = {'red' :((0.,coloursRGB['mediumBlue'][0]/256.,coloursRGB['mediumBlue
|
||||||
'blue':((0.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.),
|
'blue':((0.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.),
|
||||||
(.5,coloursRGB['mediumGreen'][2]/256.,coloursRGB['mediumGreen'][2]/256.),
|
(.5,coloursRGB['mediumGreen'][2]/256.,coloursRGB['mediumGreen'][2]/256.),
|
||||||
(1.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.))}
|
(1.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.))}
|
||||||
cmap_BGR = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_BGR,256)
|
|
||||||
|
|
||||||
cdict_Alu = {'red' :((0./5,coloursRGB['Aluminium1'][0]/256.,coloursRGB['Aluminium1'][0]/256.),
|
cdict_Alu = {'red' :((0./5,coloursRGB['Aluminium1'][0]/256.,coloursRGB['Aluminium1'][0]/256.),
|
||||||
(1./5,coloursRGB['Aluminium2'][0]/256.,coloursRGB['Aluminium2'][0]/256.),
|
(1./5,coloursRGB['Aluminium2'][0]/256.,coloursRGB['Aluminium2'][0]/256.),
|
||||||
|
|
@ -158,13 +155,12 @@ cdict_Alu = {'red' :((0./5,coloursRGB['Aluminium1'][0]/256.,coloursRGB['Aluminiu
|
||||||
(3./5,coloursRGB['Aluminium4'][2]/256.,coloursRGB['Aluminium4'][2]/256.),
|
(3./5,coloursRGB['Aluminium4'][2]/256.,coloursRGB['Aluminium4'][2]/256.),
|
||||||
(4./5,coloursRGB['Aluminium5'][2]/256.,coloursRGB['Aluminium5'][2]/256.),
|
(4./5,coloursRGB['Aluminium5'][2]/256.,coloursRGB['Aluminium5'][2]/256.),
|
||||||
(5./5,coloursRGB['Aluminium6'][2]/256.,coloursRGB['Aluminium6'][2]/256.))}
|
(5./5,coloursRGB['Aluminium6'][2]/256.,coloursRGB['Aluminium6'][2]/256.))}
|
||||||
cmap_Alu = mpl.colors.LinearSegmentedColormap('TangoAluminium',cdict_Alu,256)
|
# cmap_Alu = mpl.colors.LinearSegmentedColormap('TangoAluminium',cdict_Alu,256)
|
||||||
|
# cmap_BGR = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_BGR,256)
|
||||||
|
# cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256)
|
||||||
if __name__=='__main__':
|
if __name__=='__main__':
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
pb.figure()
|
pb.figure()
|
||||||
pb.pcolor(pb.rand(10,10),cmap=cmap_RB)
|
pb.pcolor(pb.rand(10,10),cmap=cmap_RB)
|
||||||
pb.colorbar()
|
pb.colorbar()
|
||||||
pb.show()
|
pb.show()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,30 +6,26 @@ import Tango
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
def gpplot(x,mu,var,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
|
def gpplot(x,mu,lower,upper,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
|
||||||
if axes is None:
|
if axes is None:
|
||||||
axes = pb.gca()
|
axes = pb.gca()
|
||||||
mu = mu.flatten()
|
mu = mu.flatten()
|
||||||
x = x.flatten()
|
x = x.flatten()
|
||||||
|
lower = lower.flatten()
|
||||||
|
upper = upper.flatten()
|
||||||
|
|
||||||
#here's the mean
|
#here's the mean
|
||||||
axes.plot(x,mu,color=edgecol,linewidth=2)
|
axes.plot(x,mu,color=edgecol,linewidth=2)
|
||||||
|
|
||||||
#ensure variance is a vector
|
#here's the box
|
||||||
if len(var.shape)>1:
|
|
||||||
err = 2*np.sqrt(np.diag(var))
|
|
||||||
else:
|
|
||||||
err = 2*np.sqrt(var)
|
|
||||||
|
|
||||||
#here's the 2*std box
|
|
||||||
kwargs['linewidth']=0.5
|
kwargs['linewidth']=0.5
|
||||||
if not 'alpha' in kwargs.keys():
|
if not 'alpha' in kwargs.keys():
|
||||||
kwargs['alpha'] = 0.3
|
kwargs['alpha'] = 0.3
|
||||||
axes.fill(np.hstack((x,x[::-1])),np.hstack((mu+err,mu[::-1]-err[::-1])),color=fillcol,**kwargs)
|
axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs)
|
||||||
|
|
||||||
#this is the edge:
|
#this is the edge:
|
||||||
axes.plot(x,mu+err,color=edgecol,linewidth=0.2)
|
axes.plot(x,upper,color=edgecol,linewidth=0.2)
|
||||||
axes.plot(x,mu-err,color=edgecol,linewidth=0.2)
|
axes.plot(x,lower,color=edgecol,linewidth=0.2)
|
||||||
|
|
||||||
def removeRightTicks(ax=None):
|
def removeRightTicks(ax=None):
|
||||||
ax = ax or pb.gca()
|
ax = ax or pb.gca()
|
||||||
|
|
@ -74,4 +70,36 @@ def align_subplots(N,M,xlim=None, ylim=None):
|
||||||
else:
|
else:
|
||||||
removeUpperTicks()
|
removeUpperTicks()
|
||||||
|
|
||||||
|
def x_frame1D(X,plot_limits=None,resolution=None):
|
||||||
|
"""
|
||||||
|
Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
|
||||||
|
"""
|
||||||
|
assert X.shape[1] ==1, "x_frame1D is defined for one-dimensional inputs"
|
||||||
|
if plot_limits is None:
|
||||||
|
xmin,xmax = X.min(0),X.max(0)
|
||||||
|
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
|
||||||
|
elif len(plot_limits)==2:
|
||||||
|
xmin, xmax = plot_limits
|
||||||
|
else:
|
||||||
|
raise ValueError, "Bad limits for plotting"
|
||||||
|
|
||||||
|
Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
|
||||||
|
return Xnew, xmin, xmax
|
||||||
|
|
||||||
|
def x_frame2D(X,plot_limits=None,resolution=None):
|
||||||
|
"""
|
||||||
|
Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits
|
||||||
|
"""
|
||||||
|
assert X.shape[1] ==2, "x_frame2D is defined for two-dimensional inputs"
|
||||||
|
if plot_limits is None:
|
||||||
|
xmin,xmax = X.min(0),X.max(0)
|
||||||
|
xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin)
|
||||||
|
elif len(plot_limits)==2:
|
||||||
|
xmin, xmax = plot_limits
|
||||||
|
else:
|
||||||
|
raise ValueError, "Bad limits for plotting"
|
||||||
|
|
||||||
|
resolution = resolution or 50
|
||||||
|
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
|
||||||
|
Xnew = np.vstack((xx.flatten(),yy.flatten())).T
|
||||||
|
return Xnew, xx, yy, xmin, xmax
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
GPy
|
GPy
|
||||||
===
|
===
|
||||||
|
|
||||||
A Gaussian processes framework in python.
|
A Gaussian processes framework in python
|
||||||
|
|
||||||
* [Online documentation](https://gpy.readthedocs.org/en/latest/)
|
* [Online documentation](https://gpy.readthedocs.org/en/latest/)
|
||||||
* [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy)
|
* [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy)
|
||||||
BIN
doc/Figures/kern-def.png
Normal file
|
After Width: | Height: | Size: 38 KiB |
|
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 32 KiB |
|
Before Width: | Height: | Size: 45 KiB After Width: | Height: | Size: 51 KiB |
|
Before Width: | Height: | Size: 78 KiB After Width: | Height: | Size: 77 KiB |
BIN
doc/Figures/tuto_kern_overview_add_orth.png
Normal file
|
After Width: | Height: | Size: 63 KiB |
BIN
doc/Figures/tuto_kern_overview_allkern.png
Normal file
|
After Width: | Height: | Size: 129 KiB |
BIN
doc/Figures/tuto_kern_overview_basicdef.png
Normal file
|
After Width: | Height: | Size: 38 KiB |
BIN
doc/Figures/tuto_kern_overview_mANOVA.png
Normal file
|
After Width: | Height: | Size: 55 KiB |
BIN
doc/Figures/tuto_kern_overview_mANOVAdec.png
Normal file
|
After Width: | Height: | Size: 84 KiB |
|
|
@ -33,6 +33,14 @@ examples Package
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
:mod:`poisson` Module
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.examples.poisson
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
:mod:`regression` Module
|
:mod:`regression` Module
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
|
|
@ -57,6 +65,14 @@ examples Package
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
:mod:`sparse_ep_fix` Module
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.examples.sparse_ep_fix
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
:mod:`uncertain_input_GP_regression_demo` Module
|
:mod:`uncertain_input_GP_regression_demo` Module
|
||||||
------------------------------------------------
|
------------------------------------------------
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,22 +1,6 @@
|
||||||
inference Package
|
inference Package
|
||||||
=================
|
=================
|
||||||
|
|
||||||
:mod:`Expectation_Propagation` Module
|
|
||||||
-------------------------------------
|
|
||||||
|
|
||||||
.. automodule:: GPy.inference.Expectation_Propagation
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
:mod:`likelihoods` Module
|
|
||||||
-------------------------
|
|
||||||
|
|
||||||
.. automodule:: GPy.inference.likelihoods
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
:mod:`optimization` Module
|
:mod:`optimization` Module
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
|
|
|
||||||
43
doc/GPy.likelihoods.rst
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
likelihoods Package
|
||||||
|
===================
|
||||||
|
|
||||||
|
:mod:`likelihoods` Package
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.likelihoods
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
:mod:`EP` Module
|
||||||
|
----------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.likelihoods.EP
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
:mod:`Gaussian` Module
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.likelihoods.Gaussian
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
:mod:`likelihood` Module
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.likelihoods.likelihood
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
:mod:`likelihood_functions` Module
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
.. automodule:: GPy.likelihoods.likelihood_functions
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
@ -17,18 +17,18 @@ models Package
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
:mod:`GPLVM` Module
|
:mod:`GP` Module
|
||||||
-------------------
|
----------------
|
||||||
|
|
||||||
.. automodule:: GPy.models.GPLVM
|
.. automodule:: GPy.models.GP
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
:mod:`GP_EP` Module
|
:mod:`GPLVM` Module
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
.. automodule:: GPy.models.GP_EP
|
.. automodule:: GPy.models.GPLVM
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
@ -41,10 +41,10 @@ models Package
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
:mod:`generalized_FITC` Module
|
:mod:`sparse_GP` Module
|
||||||
------------------------------
|
-----------------------
|
||||||
|
|
||||||
.. automodule:: GPy.models.generalized_FITC
|
.. automodule:: GPy.models.sparse_GP
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ Subpackages
|
||||||
GPy.examples
|
GPy.examples
|
||||||
GPy.inference
|
GPy.inference
|
||||||
GPy.kern
|
GPy.kern
|
||||||
|
GPy.likelihoods
|
||||||
GPy.models
|
GPy.models
|
||||||
GPy.util
|
GPy.util
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,7 @@ help:
|
||||||
clean:
|
clean:
|
||||||
-rm -rf $(BUILDDIR)/*
|
-rm -rf $(BUILDDIR)/*
|
||||||
|
|
||||||
|
|
||||||
html:
|
html:
|
||||||
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
|
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
|
||||||
@echo
|
@echo
|
||||||
|
|
|
||||||
136
doc/conf.py
|
|
@ -11,12 +11,50 @@
|
||||||
# All configuration values have a default; values that are commented out
|
# All configuration values have a default; values that are commented out
|
||||||
# serve to show the default.
|
# serve to show the default.
|
||||||
|
|
||||||
import sys, os
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
print "python exec:", sys.executable
|
||||||
|
print "sys.path:", sys.path
|
||||||
|
try:
|
||||||
|
import numpy
|
||||||
|
print "numpy: %s, %s" % (numpy.__version__, numpy.__file__)
|
||||||
|
except ImportError:
|
||||||
|
print "no numpy"
|
||||||
|
try:
|
||||||
|
import matplotlib
|
||||||
|
print "matplotlib: %s, %s" % (matplotlib.__version__, matplotlib.__file__)
|
||||||
|
except ImportError:
|
||||||
|
print "no matplotlib"
|
||||||
|
try:
|
||||||
|
import ipython
|
||||||
|
print "ipython: %s, %s" % (ipython.__version__, ipython.__file__)
|
||||||
|
except ImportError:
|
||||||
|
print "no ipython"
|
||||||
|
try:
|
||||||
|
import sphinx
|
||||||
|
print "sphinx: %s, %s" % (sphinx.__version__, sphinx.__file__)
|
||||||
|
except ImportError:
|
||||||
|
print "no sphinx"
|
||||||
|
|
||||||
|
print "sys.path:", sys.path
|
||||||
|
|
||||||
# If extensions (or modules to document with autodoc) are in another directory,
|
# If extensions (or modules to document with autodoc) are in another directory,
|
||||||
# add these directories to sys.path here. If the directory is relative to the
|
# add these directories to sys.path here. If the directory is relative to the
|
||||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||||
#sys.path.insert(0, os.path.abspath('.'))
|
#sys.path.insert(0, os.path.abspath('../GPy'))
|
||||||
|
|
||||||
|
#print "sys.path.after:", sys.path
|
||||||
|
|
||||||
|
# If your extensions are in another directory, add it here. If the directory
|
||||||
|
# is relative to the documentation root, use os.path.abspath to make it
|
||||||
|
# absolute, like shown here.
|
||||||
|
sys.path.append(os.path.abspath('sphinxext'))
|
||||||
|
|
||||||
|
# If extensions (or modules to document with autodoc) are in another directory,
|
||||||
|
# add these directories to sys.path here. If the directory is relative to the
|
||||||
|
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||||
|
#sys.path.insert(0, os.path.abspath('./sphinxext'))
|
||||||
|
|
||||||
# -- General configuration -----------------------------------------------------
|
# -- General configuration -----------------------------------------------------
|
||||||
|
|
||||||
|
|
@ -25,15 +63,77 @@ import sys, os
|
||||||
|
|
||||||
# Add any Sphinx extension module names here, as strings. They can be extensions
|
# Add any Sphinx extension module names here, as strings. They can be extensions
|
||||||
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
||||||
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
|
print "Importing extensions"
|
||||||
|
|
||||||
|
extensions = ['sphinx.ext.autodoc',
|
||||||
|
#'sphinx.ext.doctest'
|
||||||
|
'sphinx.ext.viewcode',
|
||||||
|
'sphinx.ext.pngmath',
|
||||||
|
'ipython_directive',
|
||||||
|
'ipython_console_highlighting'
|
||||||
|
#'matplotlib.sphinxext.plot_directive'
|
||||||
|
]
|
||||||
|
plot_formats = [('png', 80), ('pdf', 50)]
|
||||||
|
|
||||||
|
print "finished importing"
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
##
|
||||||
|
## Mock out imports with C dependencies because ReadTheDocs can't build them.
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
class Mock(object):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __call__(self, *args, **kwargs):
|
||||||
|
return Mock()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __getattr__(cls, name):
|
||||||
|
if name in ('__file__', '__path__'):
|
||||||
|
return '/dev/null'
|
||||||
|
elif name[0] == name[0].upper():
|
||||||
|
mockType = type(name, (), {})
|
||||||
|
mockType.__module__ = __name__
|
||||||
|
return mockType
|
||||||
|
else:
|
||||||
|
return Mock()
|
||||||
|
|
||||||
|
#import mock
|
||||||
|
|
||||||
|
print "Mocking"
|
||||||
|
MOCK_MODULES = ['pylab', 'sympy', 'sympy.utilities', 'sympy.utilities.codegen', 'sympy.core.cache', 'sympy.core', 'sympy.parsing', 'sympy.parsing.sympy_parser', 'matplotlib']
|
||||||
|
#'matplotlib', 'matplotlib.color', 'matplotlib.pyplot', 'pylab' ]
|
||||||
|
for mod_name in MOCK_MODULES:
|
||||||
|
sys.modules[mod_name] = Mock()
|
||||||
|
|
||||||
# ----------------------- READTHEDOCS ------------------
|
# ----------------------- READTHEDOCS ------------------
|
||||||
on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
|
on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
|
||||||
|
|
||||||
|
on_rtd = True
|
||||||
if on_rtd:
|
if on_rtd:
|
||||||
sys.path.append("../GPy")
|
sys.path.append(os.path.abspath('../GPy'))
|
||||||
os.system("pwd")
|
|
||||||
os.system("sphinx-apidoc -f -o . ../GPy")
|
import subprocess
|
||||||
|
|
||||||
|
proc = subprocess.Popen("pwd", stdout=subprocess.PIPE, shell=True)
|
||||||
|
(out, err) = proc.communicate()
|
||||||
|
print "program output:", out
|
||||||
|
proc = subprocess.Popen("ls ../", stdout=subprocess.PIPE, shell=True)
|
||||||
|
(out, err) = proc.communicate()
|
||||||
|
print "program output:", out
|
||||||
|
proc = subprocess.Popen("sphinx-apidoc -f -o . ../GPy", stdout=subprocess.PIPE, shell=True)
|
||||||
|
(out, err) = proc.communicate()
|
||||||
|
print "program output:", out
|
||||||
|
#proc = subprocess.Popen("whereis numpy", stdout=subprocess.PIPE, shell=True)
|
||||||
|
#(out, err) = proc.communicate()
|
||||||
|
#print "program output:", out
|
||||||
|
#proc = subprocess.Popen("whereis matplotlib", stdout=subprocess.PIPE, shell=True)
|
||||||
|
#(out, err) = proc.communicate()
|
||||||
|
#print "program output:", out
|
||||||
|
|
||||||
|
print "Compiled files"
|
||||||
|
|
||||||
# Add any paths that contain templates here, relative to this directory.
|
# Add any paths that contain templates here, relative to this directory.
|
||||||
templates_path = ['_templates']
|
templates_path = ['_templates']
|
||||||
|
|
@ -181,21 +281,21 @@ htmlhelp_basename = 'GPydoc'
|
||||||
# -- Options for LaTeX output --------------------------------------------------
|
# -- Options for LaTeX output --------------------------------------------------
|
||||||
|
|
||||||
latex_elements = {
|
latex_elements = {
|
||||||
# The paper size ('letterpaper' or 'a4paper').
|
# The paper size ('letterpaper' or 'a4paper').
|
||||||
#'papersize': 'letterpaper',
|
#'papersize': 'letterpaper',
|
||||||
|
|
||||||
# The font size ('10pt', '11pt' or '12pt').
|
# The font size ('10pt', '11pt' or '12pt').
|
||||||
#'pointsize': '10pt',
|
#'pointsize': '10pt',
|
||||||
|
|
||||||
# Additional stuff for the LaTeX preamble.
|
# Additional stuff for the LaTeX preamble.
|
||||||
#'preamble': '',
|
#'preamble': '',
|
||||||
}
|
}
|
||||||
|
|
||||||
# Grouping the document tree into LaTeX files. List of tuples
|
# Grouping the document tree into LaTeX files. List of tuples
|
||||||
# (source start file, target name, title, author, documentclass [howto/manual]).
|
# (source start file, target name, title, author, documentclass [howto/manual]).
|
||||||
latex_documents = [
|
latex_documents = [
|
||||||
('index', 'GPy.tex', u'GPy Documentation',
|
('index', 'GPy.tex', u'GPy Documentation',
|
||||||
u'Author', 'manual'),
|
u'Author', 'manual'),
|
||||||
]
|
]
|
||||||
|
|
||||||
# The name of an image file (relative to this directory) to place at the top of
|
# The name of an image file (relative to this directory) to place at the top of
|
||||||
|
|
@ -238,9 +338,9 @@ man_pages = [
|
||||||
# (source start file, target name, title, author,
|
# (source start file, target name, title, author,
|
||||||
# dir menu entry, description, category)
|
# dir menu entry, description, category)
|
||||||
texinfo_documents = [
|
texinfo_documents = [
|
||||||
('index', 'GPy', u'GPy Documentation',
|
('index', 'GPy', u'GPy Documentation',
|
||||||
u'Author', 'GPy', 'One line description of project.',
|
u'Author', 'GPy', 'One line description of project.',
|
||||||
'Miscellaneous'),
|
'Miscellaneous'),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Documents to append as an appendix to all manuals.
|
# Documents to append as an appendix to all manuals.
|
||||||
|
|
@ -294,3 +394,5 @@ epub_copyright = u'2013, Author'
|
||||||
|
|
||||||
# Allow duplicate toc entries.
|
# Allow duplicate toc entries.
|
||||||
#epub_tocdup = True
|
#epub_tocdup = True
|
||||||
|
|
||||||
|
autodoc_member_order = "source"
|
||||||
|
|
|
||||||
3
doc/doc-requirements.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
ipython
|
||||||
|
numpy
|
||||||
|
scipy
|
||||||
|
|
@ -8,8 +8,8 @@ Welcome to GPy's documentation!
|
||||||
For a quick start, you can have a look at one of the tutorials:
|
For a quick start, you can have a look at one of the tutorials:
|
||||||
|
|
||||||
* `Basic Gaussian process regression <tuto_GP_regression.html>`_
|
* `Basic Gaussian process regression <tuto_GP_regression.html>`_
|
||||||
|
* `A kernel overview <tuto_kernel_overview.html>`_
|
||||||
* Advanced GP regression (Forthcoming)
|
* Advanced GP regression (Forthcoming)
|
||||||
* Kernel manipulation (Forthcoming)
|
|
||||||
* Writting kernels (Forthcoming)
|
* Writting kernels (Forthcoming)
|
||||||
|
|
||||||
You may also be interested by some examples in the GPy/examples folder.
|
You may also be interested by some examples in the GPy/examples folder.
|
||||||
|
|
@ -28,4 +28,3 @@ Indices and tables
|
||||||
* :ref:`genindex`
|
* :ref:`genindex`
|
||||||
* :ref:`modindex`
|
* :ref:`modindex`
|
||||||
* :ref:`search`
|
* :ref:`search`
|
||||||
|
|
||||||
|
|
|
||||||
115
doc/sphinxext/ipython_console_highlighting.py
Normal file
|
|
@ -0,0 +1,115 @@
|
||||||
|
"""reST directive for syntax-highlighting ipython interactive sessions.
|
||||||
|
|
||||||
|
XXX - See what improvements can be made based on the new (as of Sept 2009)
|
||||||
|
'pycon' lexer for the python console. At the very least it will give better
|
||||||
|
highlighted tracebacks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# Needed modules
|
||||||
|
|
||||||
|
# Standard library
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Third party
|
||||||
|
from pygments.lexer import Lexer, do_insertions
|
||||||
|
from pygments.lexers.agile import (PythonConsoleLexer, PythonLexer,
|
||||||
|
PythonTracebackLexer)
|
||||||
|
from pygments.token import Comment, Generic
|
||||||
|
|
||||||
|
from sphinx import highlighting
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# Global constants
|
||||||
|
line_re = re.compile('.*?\n')
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# Code begins - classes and functions
|
||||||
|
|
||||||
|
class IPythonConsoleLexer(Lexer):
|
||||||
|
"""
|
||||||
|
For IPython console output or doctests, such as:
|
||||||
|
|
||||||
|
.. sourcecode:: ipython
|
||||||
|
|
||||||
|
In [1]: a = 'foo'
|
||||||
|
|
||||||
|
In [2]: a
|
||||||
|
Out[2]: 'foo'
|
||||||
|
|
||||||
|
In [3]: print a
|
||||||
|
foo
|
||||||
|
|
||||||
|
In [4]: 1 / 0
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
- Tracebacks are not currently supported.
|
||||||
|
|
||||||
|
- It assumes the default IPython prompts, not customized ones.
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = 'IPython console session'
|
||||||
|
aliases = ['ipython']
|
||||||
|
mimetypes = ['text/x-ipython-console']
|
||||||
|
input_prompt = re.compile("(In \[[0-9]+\]: )|( \.\.\.+:)")
|
||||||
|
output_prompt = re.compile("(Out\[[0-9]+\]: )|( \.\.\.+:)")
|
||||||
|
continue_prompt = re.compile(" \.\.\.+:")
|
||||||
|
tb_start = re.compile("\-+")
|
||||||
|
|
||||||
|
def get_tokens_unprocessed(self, text):
|
||||||
|
pylexer = PythonLexer(**self.options)
|
||||||
|
tblexer = PythonTracebackLexer(**self.options)
|
||||||
|
|
||||||
|
curcode = ''
|
||||||
|
insertions = []
|
||||||
|
for match in line_re.finditer(text):
|
||||||
|
line = match.group()
|
||||||
|
input_prompt = self.input_prompt.match(line)
|
||||||
|
continue_prompt = self.continue_prompt.match(line.rstrip())
|
||||||
|
output_prompt = self.output_prompt.match(line)
|
||||||
|
if line.startswith("#"):
|
||||||
|
insertions.append((len(curcode),
|
||||||
|
[(0, Comment, line)]))
|
||||||
|
elif input_prompt is not None:
|
||||||
|
insertions.append((len(curcode),
|
||||||
|
[(0, Generic.Prompt, input_prompt.group())]))
|
||||||
|
curcode += line[input_prompt.end():]
|
||||||
|
elif continue_prompt is not None:
|
||||||
|
insertions.append((len(curcode),
|
||||||
|
[(0, Generic.Prompt, continue_prompt.group())]))
|
||||||
|
curcode += line[continue_prompt.end():]
|
||||||
|
elif output_prompt is not None:
|
||||||
|
# Use the 'error' token for output. We should probably make
|
||||||
|
# our own token, but error is typicaly in a bright color like
|
||||||
|
# red, so it works fine for our output prompts.
|
||||||
|
insertions.append((len(curcode),
|
||||||
|
[(0, Generic.Error, output_prompt.group())]))
|
||||||
|
curcode += line[output_prompt.end():]
|
||||||
|
else:
|
||||||
|
if curcode:
|
||||||
|
for item in do_insertions(insertions,
|
||||||
|
pylexer.get_tokens_unprocessed(curcode)):
|
||||||
|
yield item
|
||||||
|
curcode = ''
|
||||||
|
insertions = []
|
||||||
|
yield match.start(), Generic.Output, line
|
||||||
|
if curcode:
|
||||||
|
for item in do_insertions(insertions,
|
||||||
|
pylexer.get_tokens_unprocessed(curcode)):
|
||||||
|
yield item
|
||||||
|
|
||||||
|
|
||||||
|
def setup(app):
|
||||||
|
"""Setup as a sphinx extension."""
|
||||||
|
|
||||||
|
# This is only a lexer, so adding it below to pygments appears sufficient.
|
||||||
|
# But if somebody knows that the right API usage should be to do that via
|
||||||
|
# sphinx, by all means fix it here. At least having this setup.py
|
||||||
|
# suppresses the sphinx warning we'd get without it.
|
||||||
|
pass
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# Register the extension as a valid pygments lexer
|
||||||
|
highlighting.lexers['ipython'] = IPythonConsoleLexer()
|
||||||
|
|
||||||
835
doc/sphinxext/ipython_directive.py
Normal file
|
|
@ -0,0 +1,835 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Sphinx directive to support embedded IPython code.
|
||||||
|
|
||||||
|
This directive allows pasting of entire interactive IPython sessions, prompts
|
||||||
|
and all, and their code will actually get re-executed at doc build time, with
|
||||||
|
all prompts renumbered sequentially. It also allows you to input code as a pure
|
||||||
|
python input by giving the argument python to the directive. The output looks
|
||||||
|
like an interactive ipython section.
|
||||||
|
|
||||||
|
To enable this directive, simply list it in your Sphinx ``conf.py`` file
|
||||||
|
(making sure the directory where you placed it is visible to sphinx, as is
|
||||||
|
needed for all Sphinx directives).
|
||||||
|
|
||||||
|
By default this directive assumes that your prompts are unchanged IPython ones,
|
||||||
|
but this can be customized. The configurable options that can be placed in
|
||||||
|
conf.py are
|
||||||
|
|
||||||
|
ipython_savefig_dir:
|
||||||
|
The directory in which to save the figures. This is relative to the
|
||||||
|
Sphinx source directory. The default is `html_static_path`.
|
||||||
|
ipython_rgxin:
|
||||||
|
The compiled regular expression to denote the start of IPython input
|
||||||
|
lines. The default is re.compile('In \[(\d+)\]:\s?(.*)\s*'). You
|
||||||
|
shouldn't need to change this.
|
||||||
|
ipython_rgxout:
|
||||||
|
The compiled regular expression to denote the start of IPython output
|
||||||
|
lines. The default is re.compile('Out\[(\d+)\]:\s?(.*)\s*'). You
|
||||||
|
shouldn't need to change this.
|
||||||
|
ipython_promptin:
|
||||||
|
The string to represent the IPython input prompt in the generated ReST.
|
||||||
|
The default is 'In [%d]:'. This expects that the line numbers are used
|
||||||
|
in the prompt.
|
||||||
|
ipython_promptout:
|
||||||
|
|
||||||
|
The string to represent the IPython prompt in the generated ReST. The
|
||||||
|
default is 'Out [%d]:'. This expects that the line numbers are used
|
||||||
|
in the prompt.
|
||||||
|
|
||||||
|
ToDo
|
||||||
|
----
|
||||||
|
|
||||||
|
- Turn the ad-hoc test() function into a real test suite.
|
||||||
|
- Break up ipython-specific functionality from matplotlib stuff into better
|
||||||
|
separated code.
|
||||||
|
|
||||||
|
Authors
|
||||||
|
-------
|
||||||
|
|
||||||
|
- John D Hunter: orignal author.
|
||||||
|
- Fernando Perez: refactoring, documentation, cleanups, port to 0.11.
|
||||||
|
- VáclavŠmilauer <eudoxos-AT-arcig.cz>: Prompt generalizations.
|
||||||
|
- Skipper Seabold, refactoring, cleanups, pure python addition
|
||||||
|
"""
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# Imports
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Stdlib
|
||||||
|
import cStringIO
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import ast
|
||||||
|
|
||||||
|
# To keep compatibility with various python versions
|
||||||
|
try:
|
||||||
|
from hashlib import md5
|
||||||
|
except ImportError:
|
||||||
|
from md5 import md5
|
||||||
|
|
||||||
|
# Third-party
|
||||||
|
try:
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.use('Agg')
|
||||||
|
except ImportError:
|
||||||
|
print "Couldn't find matplotlib"
|
||||||
|
|
||||||
|
import sphinx
|
||||||
|
from docutils.parsers.rst import directives
|
||||||
|
from docutils import nodes
|
||||||
|
from sphinx.util.compat import Directive
|
||||||
|
|
||||||
|
# Our own
|
||||||
|
from IPython import Config, InteractiveShell
|
||||||
|
from IPython.core.profiledir import ProfileDir
|
||||||
|
from IPython.utils import io
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# Globals
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# for tokenizing blocks
|
||||||
|
COMMENT, INPUT, OUTPUT = range(3)
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# Functions and class declarations
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
def block_parser(part, rgxin, rgxout, fmtin, fmtout):
|
||||||
|
"""
|
||||||
|
part is a string of ipython text, comprised of at most one
|
||||||
|
input, one ouput, comments, and blank lines. The block parser
|
||||||
|
parses the text into a list of::
|
||||||
|
|
||||||
|
blocks = [ (TOKEN0, data0), (TOKEN1, data1), ...]
|
||||||
|
|
||||||
|
where TOKEN is one of [COMMENT | INPUT | OUTPUT ] and
|
||||||
|
data is, depending on the type of token::
|
||||||
|
|
||||||
|
COMMENT : the comment string
|
||||||
|
|
||||||
|
INPUT: the (DECORATOR, INPUT_LINE, REST) where
|
||||||
|
DECORATOR: the input decorator (or None)
|
||||||
|
INPUT_LINE: the input as string (possibly multi-line)
|
||||||
|
REST : any stdout generated by the input line (not OUTPUT)
|
||||||
|
|
||||||
|
|
||||||
|
OUTPUT: the output string, possibly multi-line
|
||||||
|
"""
|
||||||
|
|
||||||
|
block = []
|
||||||
|
lines = part.split('\n')
|
||||||
|
N = len(lines)
|
||||||
|
i = 0
|
||||||
|
decorator = None
|
||||||
|
while 1:
|
||||||
|
|
||||||
|
if i==N:
|
||||||
|
# nothing left to parse -- the last line
|
||||||
|
break
|
||||||
|
|
||||||
|
line = lines[i]
|
||||||
|
i += 1
|
||||||
|
line_stripped = line.strip()
|
||||||
|
if line_stripped.startswith('#'):
|
||||||
|
block.append((COMMENT, line))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if line_stripped.startswith('@'):
|
||||||
|
# we're assuming at most one decorator -- may need to
|
||||||
|
# rethink
|
||||||
|
decorator = line_stripped
|
||||||
|
continue
|
||||||
|
|
||||||
|
# does this look like an input line?
|
||||||
|
matchin = rgxin.match(line)
|
||||||
|
if matchin:
|
||||||
|
lineno, inputline = int(matchin.group(1)), matchin.group(2)
|
||||||
|
|
||||||
|
# the ....: continuation string
|
||||||
|
continuation = ' %s:'%''.join(['.']*(len(str(lineno))+2))
|
||||||
|
Nc = len(continuation)
|
||||||
|
# input lines can continue on for more than one line, if
|
||||||
|
# we have a '\' line continuation char or a function call
|
||||||
|
# echo line 'print'. The input line can only be
|
||||||
|
# terminated by the end of the block or an output line, so
|
||||||
|
# we parse out the rest of the input line if it is
|
||||||
|
# multiline as well as any echo text
|
||||||
|
|
||||||
|
rest = []
|
||||||
|
while i<N:
|
||||||
|
|
||||||
|
# look ahead; if the next line is blank, or a comment, or
|
||||||
|
# an output line, we're done
|
||||||
|
|
||||||
|
nextline = lines[i]
|
||||||
|
matchout = rgxout.match(nextline)
|
||||||
|
#print "nextline=%s, continuation=%s, starts=%s"%(nextline, continuation, nextline.startswith(continuation))
|
||||||
|
if matchout or nextline.startswith('#'):
|
||||||
|
break
|
||||||
|
elif nextline.startswith(continuation):
|
||||||
|
inputline += '\n' + nextline[Nc:]
|
||||||
|
else:
|
||||||
|
rest.append(nextline)
|
||||||
|
i+= 1
|
||||||
|
|
||||||
|
block.append((INPUT, (decorator, inputline, '\n'.join(rest))))
|
||||||
|
continue
|
||||||
|
|
||||||
|
# if it looks like an output line grab all the text to the end
|
||||||
|
# of the block
|
||||||
|
matchout = rgxout.match(line)
|
||||||
|
if matchout:
|
||||||
|
lineno, output = int(matchout.group(1)), matchout.group(2)
|
||||||
|
if i<N-1:
|
||||||
|
output = '\n'.join([output] + lines[i:])
|
||||||
|
|
||||||
|
block.append((OUTPUT, output))
|
||||||
|
break
|
||||||
|
|
||||||
|
return block
|
||||||
|
|
||||||
|
class EmbeddedSphinxShell(object):
|
||||||
|
"""An embedded IPython instance to run inside Sphinx"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
|
||||||
|
self.cout = cStringIO.StringIO()
|
||||||
|
|
||||||
|
|
||||||
|
# Create config object for IPython
|
||||||
|
config = Config()
|
||||||
|
config.Global.display_banner = False
|
||||||
|
config.Global.exec_lines = ['import numpy as np',
|
||||||
|
'from pylab import *'
|
||||||
|
]
|
||||||
|
config.InteractiveShell.autocall = False
|
||||||
|
config.InteractiveShell.autoindent = False
|
||||||
|
config.InteractiveShell.colors = 'NoColor'
|
||||||
|
|
||||||
|
# create a profile so instance history isn't saved
|
||||||
|
tmp_profile_dir = tempfile.mkdtemp(prefix='profile_')
|
||||||
|
profname = 'auto_profile_sphinx_build'
|
||||||
|
pdir = os.path.join(tmp_profile_dir,profname)
|
||||||
|
profile = ProfileDir.create_profile_dir(pdir)
|
||||||
|
|
||||||
|
# Create and initialize ipython, but don't start its mainloop
|
||||||
|
IP = InteractiveShell.instance(config=config, profile_dir=profile)
|
||||||
|
# io.stdout redirect must be done *after* instantiating InteractiveShell
|
||||||
|
io.stdout = self.cout
|
||||||
|
io.stderr = self.cout
|
||||||
|
|
||||||
|
# For debugging, so we can see normal output, use this:
|
||||||
|
#from IPython.utils.io import Tee
|
||||||
|
#io.stdout = Tee(self.cout, channel='stdout') # dbg
|
||||||
|
#io.stderr = Tee(self.cout, channel='stderr') # dbg
|
||||||
|
|
||||||
|
# Store a few parts of IPython we'll need.
|
||||||
|
self.IP = IP
|
||||||
|
self.user_ns = self.IP.user_ns
|
||||||
|
self.user_global_ns = self.IP.user_global_ns
|
||||||
|
|
||||||
|
self.input = ''
|
||||||
|
self.output = ''
|
||||||
|
|
||||||
|
self.is_verbatim = False
|
||||||
|
self.is_doctest = False
|
||||||
|
self.is_suppress = False
|
||||||
|
|
||||||
|
# on the first call to the savefig decorator, we'll import
|
||||||
|
# pyplot as plt so we can make a call to the plt.gcf().savefig
|
||||||
|
self._pyplot_imported = False
|
||||||
|
|
||||||
|
def clear_cout(self):
|
||||||
|
self.cout.seek(0)
|
||||||
|
self.cout.truncate(0)
|
||||||
|
|
||||||
|
def process_input_line(self, line, store_history=True):
|
||||||
|
"""process the input, capturing stdout"""
|
||||||
|
#print "input='%s'"%self.input
|
||||||
|
stdout = sys.stdout
|
||||||
|
splitter = self.IP.input_splitter
|
||||||
|
try:
|
||||||
|
sys.stdout = self.cout
|
||||||
|
splitter.push(line)
|
||||||
|
more = splitter.push_accepts_more()
|
||||||
|
if not more:
|
||||||
|
source_raw = splitter.source_raw_reset()[1]
|
||||||
|
self.IP.run_cell(source_raw, store_history=store_history)
|
||||||
|
finally:
|
||||||
|
sys.stdout = stdout
|
||||||
|
|
||||||
|
def process_image(self, decorator):
|
||||||
|
"""
|
||||||
|
# build out an image directive like
|
||||||
|
# .. image:: somefile.png
|
||||||
|
# :width 4in
|
||||||
|
#
|
||||||
|
# from an input like
|
||||||
|
# savefig somefile.png width=4in
|
||||||
|
"""
|
||||||
|
savefig_dir = self.savefig_dir
|
||||||
|
source_dir = self.source_dir
|
||||||
|
saveargs = decorator.split(' ')
|
||||||
|
filename = saveargs[1]
|
||||||
|
# insert relative path to image file in source
|
||||||
|
outfile = os.path.relpath(os.path.join(savefig_dir,filename),
|
||||||
|
source_dir)
|
||||||
|
|
||||||
|
imagerows = ['.. image:: %s'%outfile]
|
||||||
|
|
||||||
|
for kwarg in saveargs[2:]:
|
||||||
|
arg, val = kwarg.split('=')
|
||||||
|
arg = arg.strip()
|
||||||
|
val = val.strip()
|
||||||
|
imagerows.append(' :%s: %s'%(arg, val))
|
||||||
|
|
||||||
|
image_file = os.path.basename(outfile) # only return file name
|
||||||
|
image_directive = '\n'.join(imagerows)
|
||||||
|
return image_file, image_directive
|
||||||
|
|
||||||
|
|
||||||
|
# Callbacks for each type of token
|
||||||
|
def process_input(self, data, input_prompt, lineno):
|
||||||
|
"""Process data block for INPUT token."""
|
||||||
|
decorator, input, rest = data
|
||||||
|
image_file = None
|
||||||
|
image_directive = None
|
||||||
|
#print 'INPUT:', data # dbg
|
||||||
|
is_verbatim = decorator=='@verbatim' or self.is_verbatim
|
||||||
|
is_doctest = decorator=='@doctest' or self.is_doctest
|
||||||
|
is_suppress = decorator=='@suppress' or self.is_suppress
|
||||||
|
is_savefig = decorator is not None and \
|
||||||
|
decorator.startswith('@savefig')
|
||||||
|
|
||||||
|
input_lines = input.split('\n')
|
||||||
|
if len(input_lines) > 1:
|
||||||
|
if input_lines[-1] != "":
|
||||||
|
input_lines.append('') # make sure there's a blank line
|
||||||
|
# so splitter buffer gets reset
|
||||||
|
|
||||||
|
continuation = ' %s:'%''.join(['.']*(len(str(lineno))+2))
|
||||||
|
Nc = len(continuation)
|
||||||
|
|
||||||
|
if is_savefig:
|
||||||
|
image_file, image_directive = self.process_image(decorator)
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
is_semicolon = False
|
||||||
|
|
||||||
|
for i, line in enumerate(input_lines):
|
||||||
|
if line.endswith(';'):
|
||||||
|
is_semicolon = True
|
||||||
|
|
||||||
|
if i==0:
|
||||||
|
# process the first input line
|
||||||
|
if is_verbatim:
|
||||||
|
self.process_input_line('')
|
||||||
|
self.IP.execution_count += 1 # increment it anyway
|
||||||
|
else:
|
||||||
|
# only submit the line in non-verbatim mode
|
||||||
|
self.process_input_line(line, store_history=True)
|
||||||
|
formatted_line = '%s %s'%(input_prompt, line)
|
||||||
|
else:
|
||||||
|
# process a continuation line
|
||||||
|
if not is_verbatim:
|
||||||
|
self.process_input_line(line, store_history=True)
|
||||||
|
|
||||||
|
formatted_line = '%s %s'%(continuation, line)
|
||||||
|
|
||||||
|
if not is_suppress:
|
||||||
|
ret.append(formatted_line)
|
||||||
|
|
||||||
|
if not is_suppress and len(rest.strip()) and is_verbatim:
|
||||||
|
# the "rest" is the standard output of the
|
||||||
|
# input, which needs to be added in
|
||||||
|
# verbatim mode
|
||||||
|
ret.append(rest)
|
||||||
|
|
||||||
|
self.cout.seek(0)
|
||||||
|
output = self.cout.read()
|
||||||
|
if not is_suppress and not is_semicolon:
|
||||||
|
ret.append(output)
|
||||||
|
elif is_semicolon: # get spacing right
|
||||||
|
ret.append('')
|
||||||
|
|
||||||
|
self.cout.truncate(0)
|
||||||
|
return (ret, input_lines, output, is_doctest, image_file,
|
||||||
|
image_directive)
|
||||||
|
#print 'OUTPUT', output # dbg
|
||||||
|
|
||||||
|
def process_output(self, data, output_prompt,
|
||||||
|
input_lines, output, is_doctest, image_file):
|
||||||
|
"""Process data block for OUTPUT token."""
|
||||||
|
if is_doctest:
|
||||||
|
submitted = data.strip()
|
||||||
|
found = output
|
||||||
|
if found is not None:
|
||||||
|
found = found.strip()
|
||||||
|
|
||||||
|
# XXX - fperez: in 0.11, 'output' never comes with the prompt
|
||||||
|
# in it, just the actual output text. So I think all this code
|
||||||
|
# can be nuked...
|
||||||
|
|
||||||
|
# the above comment does not appear to be accurate... (minrk)
|
||||||
|
|
||||||
|
ind = found.find(output_prompt)
|
||||||
|
if ind<0:
|
||||||
|
e='output prompt="%s" does not match out line=%s' % \
|
||||||
|
(output_prompt, found)
|
||||||
|
raise RuntimeError(e)
|
||||||
|
found = found[len(output_prompt):].strip()
|
||||||
|
|
||||||
|
if found!=submitted:
|
||||||
|
e = ('doctest failure for input_lines="%s" with '
|
||||||
|
'found_output="%s" and submitted output="%s"' %
|
||||||
|
(input_lines, found, submitted) )
|
||||||
|
raise RuntimeError(e)
|
||||||
|
#print 'doctest PASSED for input_lines="%s" with found_output="%s" and submitted output="%s"'%(input_lines, found, submitted)
|
||||||
|
|
||||||
|
def process_comment(self, data):
|
||||||
|
"""Process data fPblock for COMMENT token."""
|
||||||
|
if not self.is_suppress:
|
||||||
|
return [data]
|
||||||
|
|
||||||
|
def save_image(self, image_file):
|
||||||
|
"""
|
||||||
|
Saves the image file to disk.
|
||||||
|
"""
|
||||||
|
self.ensure_pyplot()
|
||||||
|
command = 'plt.gcf().savefig("%s")'%image_file
|
||||||
|
#print 'SAVEFIG', command # dbg
|
||||||
|
self.process_input_line('bookmark ipy_thisdir', store_history=False)
|
||||||
|
self.process_input_line('cd -b ipy_savedir', store_history=False)
|
||||||
|
self.process_input_line(command, store_history=False)
|
||||||
|
self.process_input_line('cd -b ipy_thisdir', store_history=False)
|
||||||
|
self.process_input_line('bookmark -d ipy_thisdir', store_history=False)
|
||||||
|
self.clear_cout()
|
||||||
|
|
||||||
|
|
||||||
|
def process_block(self, block):
|
||||||
|
"""
|
||||||
|
process block from the block_parser and return a list of processed lines
|
||||||
|
"""
|
||||||
|
ret = []
|
||||||
|
output = None
|
||||||
|
input_lines = None
|
||||||
|
lineno = self.IP.execution_count
|
||||||
|
|
||||||
|
input_prompt = self.promptin%lineno
|
||||||
|
output_prompt = self.promptout%lineno
|
||||||
|
image_file = None
|
||||||
|
image_directive = None
|
||||||
|
|
||||||
|
for token, data in block:
|
||||||
|
if token==COMMENT:
|
||||||
|
out_data = self.process_comment(data)
|
||||||
|
elif token==INPUT:
|
||||||
|
(out_data, input_lines, output, is_doctest, image_file,
|
||||||
|
image_directive) = \
|
||||||
|
self.process_input(data, input_prompt, lineno)
|
||||||
|
elif token==OUTPUT:
|
||||||
|
out_data = \
|
||||||
|
self.process_output(data, output_prompt,
|
||||||
|
input_lines, output, is_doctest,
|
||||||
|
image_file)
|
||||||
|
if out_data:
|
||||||
|
ret.extend(out_data)
|
||||||
|
|
||||||
|
# save the image files
|
||||||
|
if image_file is not None:
|
||||||
|
self.save_image(image_file)
|
||||||
|
|
||||||
|
return ret, image_directive
|
||||||
|
|
||||||
|
def ensure_pyplot(self):
|
||||||
|
if self._pyplot_imported:
|
||||||
|
return
|
||||||
|
self.process_input_line('import matplotlib.pyplot as plt',
|
||||||
|
store_history=False)
|
||||||
|
|
||||||
|
def process_pure_python(self, content):
|
||||||
|
"""
|
||||||
|
content is a list of strings. it is unedited directive conent
|
||||||
|
|
||||||
|
This runs it line by line in the InteractiveShell, prepends
|
||||||
|
prompts as needed capturing stderr and stdout, then returns
|
||||||
|
the content as a list as if it were ipython code
|
||||||
|
"""
|
||||||
|
output = []
|
||||||
|
savefig = False # keep up with this to clear figure
|
||||||
|
multiline = False # to handle line continuation
|
||||||
|
multiline_start = None
|
||||||
|
fmtin = self.promptin
|
||||||
|
|
||||||
|
ct = 0
|
||||||
|
|
||||||
|
for lineno, line in enumerate(content):
|
||||||
|
|
||||||
|
line_stripped = line.strip()
|
||||||
|
if not len(line):
|
||||||
|
output.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# handle decorators
|
||||||
|
if line_stripped.startswith('@'):
|
||||||
|
output.extend([line])
|
||||||
|
if 'savefig' in line:
|
||||||
|
savefig = True # and need to clear figure
|
||||||
|
continue
|
||||||
|
|
||||||
|
# handle comments
|
||||||
|
if line_stripped.startswith('#'):
|
||||||
|
output.extend([line])
|
||||||
|
continue
|
||||||
|
|
||||||
|
# deal with lines checking for multiline
|
||||||
|
continuation = u' %s:'% ''.join(['.']*(len(str(ct))+2))
|
||||||
|
if not multiline:
|
||||||
|
modified = u"%s %s" % (fmtin % ct, line_stripped)
|
||||||
|
output.append(modified)
|
||||||
|
ct += 1
|
||||||
|
try:
|
||||||
|
ast.parse(line_stripped)
|
||||||
|
output.append(u'')
|
||||||
|
except Exception: # on a multiline
|
||||||
|
multiline = True
|
||||||
|
multiline_start = lineno
|
||||||
|
else: # still on a multiline
|
||||||
|
modified = u'%s %s' % (continuation, line)
|
||||||
|
output.append(modified)
|
||||||
|
try:
|
||||||
|
mod = ast.parse(
|
||||||
|
'\n'.join(content[multiline_start:lineno+1]))
|
||||||
|
if isinstance(mod.body[0], ast.FunctionDef):
|
||||||
|
# check to see if we have the whole function
|
||||||
|
for element in mod.body[0].body:
|
||||||
|
if isinstance(element, ast.Return):
|
||||||
|
multiline = False
|
||||||
|
else:
|
||||||
|
output.append(u'')
|
||||||
|
multiline = False
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if savefig: # clear figure if plotted
|
||||||
|
self.ensure_pyplot()
|
||||||
|
self.process_input_line('plt.clf()', store_history=False)
|
||||||
|
self.clear_cout()
|
||||||
|
savefig = False
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
class IpythonDirective(Directive):
|
||||||
|
|
||||||
|
has_content = True
|
||||||
|
required_arguments = 0
|
||||||
|
optional_arguments = 4 # python, suppress, verbatim, doctest
|
||||||
|
final_argumuent_whitespace = True
|
||||||
|
option_spec = { 'python': directives.unchanged,
|
||||||
|
'suppress' : directives.flag,
|
||||||
|
'verbatim' : directives.flag,
|
||||||
|
'doctest' : directives.flag,
|
||||||
|
}
|
||||||
|
|
||||||
|
shell = EmbeddedSphinxShell()
|
||||||
|
|
||||||
|
def get_config_options(self):
|
||||||
|
# contains sphinx configuration variables
|
||||||
|
config = self.state.document.settings.env.config
|
||||||
|
|
||||||
|
# get config variables to set figure output directory
|
||||||
|
confdir = self.state.document.settings.env.app.confdir
|
||||||
|
savefig_dir = config.ipython_savefig_dir
|
||||||
|
source_dir = os.path.dirname(self.state.document.current_source)
|
||||||
|
if savefig_dir is None:
|
||||||
|
savefig_dir = config.html_static_path
|
||||||
|
if isinstance(savefig_dir, list):
|
||||||
|
savefig_dir = savefig_dir[0] # safe to assume only one path?
|
||||||
|
savefig_dir = os.path.join(confdir, savefig_dir)
|
||||||
|
|
||||||
|
# get regex and prompt stuff
|
||||||
|
rgxin = config.ipython_rgxin
|
||||||
|
rgxout = config.ipython_rgxout
|
||||||
|
promptin = config.ipython_promptin
|
||||||
|
promptout = config.ipython_promptout
|
||||||
|
|
||||||
|
return savefig_dir, source_dir, rgxin, rgxout, promptin, promptout
|
||||||
|
|
||||||
|
def setup(self):
|
||||||
|
# reset the execution count if we haven't processed this doc
|
||||||
|
#NOTE: this may be borked if there are multiple seen_doc tmp files
|
||||||
|
#check time stamp?
|
||||||
|
seen_docs = [i for i in os.listdir(tempfile.tempdir)
|
||||||
|
if i.startswith('seen_doc')]
|
||||||
|
if seen_docs:
|
||||||
|
fname = os.path.join(tempfile.tempdir, seen_docs[0])
|
||||||
|
docs = open(fname).read().split('\n')
|
||||||
|
if not self.state.document.current_source in docs:
|
||||||
|
self.shell.IP.history_manager.reset()
|
||||||
|
self.shell.IP.execution_count = 1
|
||||||
|
else: # haven't processed any docs yet
|
||||||
|
docs = []
|
||||||
|
|
||||||
|
|
||||||
|
# get config values
|
||||||
|
(savefig_dir, source_dir, rgxin,
|
||||||
|
rgxout, promptin, promptout) = self.get_config_options()
|
||||||
|
|
||||||
|
# and attach to shell so we don't have to pass them around
|
||||||
|
self.shell.rgxin = rgxin
|
||||||
|
self.shell.rgxout = rgxout
|
||||||
|
self.shell.promptin = promptin
|
||||||
|
self.shell.promptout = promptout
|
||||||
|
self.shell.savefig_dir = savefig_dir
|
||||||
|
self.shell.source_dir = source_dir
|
||||||
|
|
||||||
|
# setup bookmark for saving figures directory
|
||||||
|
|
||||||
|
self.shell.process_input_line('bookmark ipy_savedir %s'%savefig_dir,
|
||||||
|
store_history=False)
|
||||||
|
self.shell.clear_cout()
|
||||||
|
|
||||||
|
# write the filename to a tempfile because it's been "seen" now
|
||||||
|
if not self.state.document.current_source in docs:
|
||||||
|
fd, fname = tempfile.mkstemp(prefix="seen_doc", text=True)
|
||||||
|
fout = open(fname, 'a')
|
||||||
|
fout.write(self.state.document.current_source+'\n')
|
||||||
|
fout.close()
|
||||||
|
|
||||||
|
return rgxin, rgxout, promptin, promptout
|
||||||
|
|
||||||
|
|
||||||
|
def teardown(self):
|
||||||
|
# delete last bookmark
|
||||||
|
self.shell.process_input_line('bookmark -d ipy_savedir',
|
||||||
|
store_history=False)
|
||||||
|
self.shell.clear_cout()
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
debug = False
|
||||||
|
|
||||||
|
#TODO, any reason block_parser can't be a method of embeddable shell
|
||||||
|
# then we wouldn't have to carry these around
|
||||||
|
rgxin, rgxout, promptin, promptout = self.setup()
|
||||||
|
|
||||||
|
options = self.options
|
||||||
|
self.shell.is_suppress = 'suppress' in options
|
||||||
|
self.shell.is_doctest = 'doctest' in options
|
||||||
|
self.shell.is_verbatim = 'verbatim' in options
|
||||||
|
|
||||||
|
|
||||||
|
# handle pure python code
|
||||||
|
if 'python' in self.arguments:
|
||||||
|
content = self.content
|
||||||
|
self.content = self.shell.process_pure_python(content)
|
||||||
|
|
||||||
|
parts = '\n'.join(self.content).split('\n\n')
|
||||||
|
|
||||||
|
lines = ['.. code-block:: ipython','']
|
||||||
|
figures = []
|
||||||
|
|
||||||
|
for part in parts:
|
||||||
|
|
||||||
|
block = block_parser(part, rgxin, rgxout, promptin, promptout)
|
||||||
|
|
||||||
|
if len(block):
|
||||||
|
rows, figure = self.shell.process_block(block)
|
||||||
|
for row in rows:
|
||||||
|
lines.extend([' %s'%line for line in row.split('\n')])
|
||||||
|
|
||||||
|
if figure is not None:
|
||||||
|
figures.append(figure)
|
||||||
|
|
||||||
|
#text = '\n'.join(lines)
|
||||||
|
#figs = '\n'.join(figures)
|
||||||
|
|
||||||
|
for figure in figures:
|
||||||
|
lines.append('')
|
||||||
|
lines.extend(figure.split('\n'))
|
||||||
|
lines.append('')
|
||||||
|
|
||||||
|
#print lines
|
||||||
|
if len(lines)>2:
|
||||||
|
if debug:
|
||||||
|
print '\n'.join(lines)
|
||||||
|
else: #NOTE: this raises some errors, what's it for?
|
||||||
|
#print 'INSERTING %d lines'%len(lines)
|
||||||
|
self.state_machine.insert_input(
|
||||||
|
lines, self.state_machine.input_lines.source(0))
|
||||||
|
|
||||||
|
text = '\n'.join(lines)
|
||||||
|
txtnode = nodes.literal_block(text, text)
|
||||||
|
txtnode['language'] = 'ipython'
|
||||||
|
#imgnode = nodes.image(figs)
|
||||||
|
|
||||||
|
# cleanup
|
||||||
|
self.teardown()
|
||||||
|
|
||||||
|
return []#, imgnode]
|
||||||
|
|
||||||
|
# Enable as a proper Sphinx directive
|
||||||
|
def setup(app):
|
||||||
|
setup.app = app
|
||||||
|
|
||||||
|
app.add_directive('ipython', IpythonDirective)
|
||||||
|
app.add_config_value('ipython_savefig_dir', None, True)
|
||||||
|
app.add_config_value('ipython_rgxin',
|
||||||
|
re.compile('In \[(\d+)\]:\s?(.*)\s*'), True)
|
||||||
|
app.add_config_value('ipython_rgxout',
|
||||||
|
re.compile('Out\[(\d+)\]:\s?(.*)\s*'), True)
|
||||||
|
app.add_config_value('ipython_promptin', 'In [%d]:', True)
|
||||||
|
app.add_config_value('ipython_promptout', 'Out[%d]:', True)
|
||||||
|
|
||||||
|
|
||||||
|
# Simple smoke test, needs to be converted to a proper automatic test.
|
||||||
|
def test():
|
||||||
|
|
||||||
|
examples = [
|
||||||
|
r"""
|
||||||
|
In [9]: pwd
|
||||||
|
Out[9]: '/home/jdhunter/py4science/book'
|
||||||
|
|
||||||
|
In [10]: cd bookdata/
|
||||||
|
/home/jdhunter/py4science/book/bookdata
|
||||||
|
|
||||||
|
In [2]: from pylab import *
|
||||||
|
|
||||||
|
In [2]: ion()
|
||||||
|
|
||||||
|
In [3]: im = imread('stinkbug.png')
|
||||||
|
|
||||||
|
@savefig mystinkbug.png width=4in
|
||||||
|
In [4]: imshow(im)
|
||||||
|
Out[4]: <matplotlib.image.AxesImage object at 0x39ea850>
|
||||||
|
|
||||||
|
""",
|
||||||
|
r"""
|
||||||
|
|
||||||
|
In [1]: x = 'hello world'
|
||||||
|
|
||||||
|
# string methods can be
|
||||||
|
# used to alter the string
|
||||||
|
@doctest
|
||||||
|
In [2]: x.upper()
|
||||||
|
Out[2]: 'HELLO WORLD'
|
||||||
|
|
||||||
|
@verbatim
|
||||||
|
In [3]: x.st<TAB>
|
||||||
|
x.startswith x.strip
|
||||||
|
""",
|
||||||
|
r"""
|
||||||
|
|
||||||
|
In [130]: url = 'http://ichart.finance.yahoo.com/table.csv?s=CROX\
|
||||||
|
.....: &d=9&e=22&f=2009&g=d&a=1&br=8&c=2006&ignore=.csv'
|
||||||
|
|
||||||
|
In [131]: print url.split('&')
|
||||||
|
['http://ichart.finance.yahoo.com/table.csv?s=CROX', 'd=9', 'e=22', 'f=2009', 'g=d', 'a=1', 'b=8', 'c=2006', 'ignore=.csv']
|
||||||
|
|
||||||
|
In [60]: import urllib
|
||||||
|
|
||||||
|
""",
|
||||||
|
r"""\
|
||||||
|
|
||||||
|
In [133]: import numpy.random
|
||||||
|
|
||||||
|
@suppress
|
||||||
|
In [134]: numpy.random.seed(2358)
|
||||||
|
|
||||||
|
@doctest
|
||||||
|
In [135]: numpy.random.rand(10,2)
|
||||||
|
Out[135]:
|
||||||
|
array([[ 0.64524308, 0.59943846],
|
||||||
|
[ 0.47102322, 0.8715456 ],
|
||||||
|
[ 0.29370834, 0.74776844],
|
||||||
|
[ 0.99539577, 0.1313423 ],
|
||||||
|
[ 0.16250302, 0.21103583],
|
||||||
|
[ 0.81626524, 0.1312433 ],
|
||||||
|
[ 0.67338089, 0.72302393],
|
||||||
|
[ 0.7566368 , 0.07033696],
|
||||||
|
[ 0.22591016, 0.77731835],
|
||||||
|
[ 0.0072729 , 0.34273127]])
|
||||||
|
|
||||||
|
""",
|
||||||
|
|
||||||
|
r"""
|
||||||
|
In [106]: print x
|
||||||
|
jdh
|
||||||
|
|
||||||
|
In [109]: for i in range(10):
|
||||||
|
.....: print i
|
||||||
|
.....:
|
||||||
|
.....:
|
||||||
|
0
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
||||||
|
5
|
||||||
|
6
|
||||||
|
7
|
||||||
|
8
|
||||||
|
9
|
||||||
|
""",
|
||||||
|
|
||||||
|
r"""
|
||||||
|
|
||||||
|
In [144]: from pylab import *
|
||||||
|
|
||||||
|
In [145]: ion()
|
||||||
|
|
||||||
|
# use a semicolon to suppress the output
|
||||||
|
@savefig test_hist.png width=4in
|
||||||
|
In [151]: hist(np.random.randn(10000), 100);
|
||||||
|
|
||||||
|
|
||||||
|
@savefig test_plot.png width=4in
|
||||||
|
In [151]: plot(np.random.randn(10000), 'o');
|
||||||
|
""",
|
||||||
|
|
||||||
|
r"""
|
||||||
|
# use a semicolon to suppress the output
|
||||||
|
In [151]: plt.clf()
|
||||||
|
|
||||||
|
@savefig plot_simple.png width=4in
|
||||||
|
In [151]: plot([1,2,3])
|
||||||
|
|
||||||
|
@savefig hist_simple.png width=4in
|
||||||
|
In [151]: hist(np.random.randn(10000), 100);
|
||||||
|
|
||||||
|
""",
|
||||||
|
r"""
|
||||||
|
# update the current fig
|
||||||
|
In [151]: ylabel('number')
|
||||||
|
|
||||||
|
In [152]: title('normal distribution')
|
||||||
|
|
||||||
|
|
||||||
|
@savefig hist_with_text.png
|
||||||
|
In [153]: grid(True)
|
||||||
|
|
||||||
|
""",
|
||||||
|
]
|
||||||
|
# skip local-file depending first example:
|
||||||
|
examples = examples[1:]
|
||||||
|
|
||||||
|
#ipython_directive.DEBUG = True # dbg
|
||||||
|
#options = dict(suppress=True) # dbg
|
||||||
|
options = dict()
|
||||||
|
for example in examples:
|
||||||
|
content = example.split('\n')
|
||||||
|
ipython_directive('debug', arguments=None, options=options,
|
||||||
|
content=content, lineno=0,
|
||||||
|
content_offset=None, block_text=None,
|
||||||
|
state=None, state_machine=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run test suite as a script
|
||||||
|
if __name__=='__main__':
|
||||||
|
if not os.path.isdir('_static'):
|
||||||
|
os.mkdir('_static')
|
||||||
|
test()
|
||||||
|
print 'All OK? Check figures in _static/'
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
|
|
||||||
*************************************
|
*************************************
|
||||||
Gaussian process regression tutorial
|
Gaussian process regression tutorial
|
||||||
*************************************
|
*************************************
|
||||||
|
|
@ -12,7 +11,7 @@ We first import the libraries we will need: ::
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import GPy
|
import GPy
|
||||||
|
|
||||||
1 dimensional model
|
1-dimensional model
|
||||||
===================
|
===================
|
||||||
|
|
||||||
For this toy example, we assume we have the following inputs and outputs::
|
For this toy example, we assume we have the following inputs and outputs::
|
||||||
|
|
@ -22,13 +21,11 @@ For this toy example, we assume we have the following inputs and outputs::
|
||||||
|
|
||||||
Note that the observations Y include some noise.
|
Note that the observations Y include some noise.
|
||||||
|
|
||||||
The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential) plus some white noise::
|
The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential)::
|
||||||
|
|
||||||
Gaussian = GPy.kern.rbf(D=1)
|
kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
|
||||||
noise = GPy.kern.white(D=1)
|
|
||||||
kernel = Gaussian + noise
|
|
||||||
|
|
||||||
The parameter ``D`` stands for the dimension of the input space. Note that many other kernels are implemented such as:
|
The parameter ``D`` stands for the dimension of the input space. The parameters ``variance`` and ``lengthscale`` are optional. Note that many other kernels are implemented such as:
|
||||||
|
|
||||||
* linear (``GPy.kern.linear``)
|
* linear (``GPy.kern.linear``)
|
||||||
* exponential kernel (``GPy.kern.exponential``)
|
* exponential kernel (``GPy.kern.exponential``)
|
||||||
|
|
@ -41,19 +38,19 @@ The inputs required for building the model are the observations and the kernel::
|
||||||
|
|
||||||
m = GPy.models.GP_regression(X,Y,kernel)
|
m = GPy.models.GP_regression(X,Y,kernel)
|
||||||
|
|
||||||
The functions ``print`` and ``plot`` give an insight of the model we have just build. The code::
|
By default, some observation noise is added to the modle. The functions ``print`` and ``plot`` give an insight of the model we have just build. The code::
|
||||||
|
|
||||||
print m
|
print m
|
||||||
m.plot()
|
m.plot()
|
||||||
|
|
||||||
gives the following output: ::
|
gives the following output: ::
|
||||||
|
|
||||||
Marginal log-likelihood: -2.281e+01
|
Marginal log-likelihood: -4.479e+00
|
||||||
Name | Value | Constraints | Ties | Prior
|
Name | Value | Constraints | Ties | Prior
|
||||||
-----------------------------------------------------------------
|
-----------------------------------------------------------------
|
||||||
rbf_variance | 1.0000 | | |
|
rbf_variance | 1.0000 | | |
|
||||||
rbf_lengthscale | 1.0000 | | |
|
rbf_lengthscale | 1.0000 | | |
|
||||||
white_variance | 1.0000 | | |
|
noise variance | 1.0000 | | |
|
||||||
|
|
||||||
.. figure:: Figures/tuto_GP_regression_m1.png
|
.. figure:: Figures/tuto_GP_regression_m1.png
|
||||||
:align: center
|
:align: center
|
||||||
|
|
@ -75,24 +72,24 @@ but it is also possible to set a range on to constrain one parameter to be fixed
|
||||||
m.unconstrain('') # Required to remove the previous constrains
|
m.unconstrain('') # Required to remove the previous constrains
|
||||||
m.constrain_positive('rbf_variance')
|
m.constrain_positive('rbf_variance')
|
||||||
m.constrain_bounded('lengthscale',1.,10. )
|
m.constrain_bounded('lengthscale',1.,10. )
|
||||||
m.constrain_fixed('white',0.0025)
|
m.constrain_fixed('noise',0.0025)
|
||||||
|
|
||||||
Once the constrains have been imposed, the model can be optimized::
|
Once the constrains have been imposed, the model can be optimized::
|
||||||
|
|
||||||
m.optimize()
|
m.optimize()
|
||||||
|
|
||||||
If we want to perform some restarts to try to improve the result of the optimization, we can use the optimize_restart function::
|
If we want to perform some restarts to try to improve the result of the optimization, we can use the ``optimize_restart`` function::
|
||||||
|
|
||||||
m.optimize_restarts(Nrestarts = 10)
|
m.optimize_restarts(Nrestarts = 10)
|
||||||
|
|
||||||
Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting model resulting model::
|
Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting model resulting model::
|
||||||
|
|
||||||
Marginal log-likelihood: 2.001e+01
|
Marginal log-likelihood: 3.603e+01
|
||||||
Name | Value | Constraints | Ties | Prior
|
Name | Value | Constraints | Ties | Prior
|
||||||
-----------------------------------------------------------------
|
-----------------------------------------------------------------
|
||||||
rbf_variance | 0.8033 | (+ve) | |
|
rbf_variance | 0.8151 | (+ve) | |
|
||||||
rbf_lengthscale | 1.8033 | (1.0, 10.0) | |
|
rbf_lengthscale | 1.8037 | (1.0, 10.0) | |
|
||||||
white_variance | 0.0025 | Fixed | |
|
noise variance | 0.0025 | Fixed | |
|
||||||
|
|
||||||
.. figure:: Figures/tuto_GP_regression_m2.png
|
.. figure:: Figures/tuto_GP_regression_m2.png
|
||||||
:align: center
|
:align: center
|
||||||
|
|
@ -101,7 +98,7 @@ Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting mo
|
||||||
GP regression model after optimization of the parameters.
|
GP regression model after optimization of the parameters.
|
||||||
|
|
||||||
|
|
||||||
2 dimensional example
|
2-dimensional example
|
||||||
=====================
|
=====================
|
||||||
|
|
||||||
Here is a 2 dimensional example::
|
Here is a 2 dimensional example::
|
||||||
|
|
@ -131,15 +128,16 @@ Here is a 2 dimensional example::
|
||||||
m.plot()
|
m.plot()
|
||||||
print(m)
|
print(m)
|
||||||
|
|
||||||
The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic). The output of the last 2 lines is::
|
The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic). The output of the last two lines is::
|
||||||
|
|
||||||
Marginal log-likelihood: 2.893e+01
|
Marginal log-likelihood: 6.682e+01
|
||||||
Name | Value | Constraints | Ties | Prior
|
Name | Value | Constraints | Ties | Prior
|
||||||
-------------------------------------------------------------------------
|
---------------------------------------------------------------------
|
||||||
Mat52_ARD_variance | 0.4094 | (+ve) | |
|
Mat52_variance | 0.3860 | (+ve) | |
|
||||||
Mat52_ARD_lengthscale_0 | 2.1060 | (+ve) | |
|
Mat52_lengthscale_0 | 2.0578 | (+ve) | |
|
||||||
Mat52_ARD_lengthscale_1 | 2.0546 | (+ve) | |
|
Mat52_lengthscale_1 | 1.8542 | (+ve) | |
|
||||||
white_variance | 0.0012 | (+ve) | |
|
white_variance | 0.0023 | (+ve) | |
|
||||||
|
noise variance | 0.0000 | (+ve) | |
|
||||||
|
|
||||||
.. figure:: Figures/tuto_GP_regression_m3.png
|
.. figure:: Figures/tuto_GP_regression_m3.png
|
||||||
:align: center
|
:align: center
|
||||||
|
|
|
||||||
177
doc/tuto_kernel_overview.rst
Normal file
|
|
@ -0,0 +1,177 @@
|
||||||
|
|
||||||
|
****************************
|
||||||
|
tutorial : A kernel overview
|
||||||
|
****************************
|
||||||
|
|
||||||
|
First we import the libraries we will need ::
|
||||||
|
|
||||||
|
import pylab as pb
|
||||||
|
import numpy as np
|
||||||
|
import GPy
|
||||||
|
pb.ion()
|
||||||
|
|
||||||
|
For most kernels, the dimension is the only mandatory parameter to define a kernel object. However, it is also possible to specify the values of the parameters. For example, the three following commands are valid for defining a squared exponential kernel (ie rbf or Gaussian) ::
|
||||||
|
|
||||||
|
ker1 = GPy.kern.rbf(1) # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
|
||||||
|
ker2 = GPy.kern.rbf(D=1, variance = 1.5, lengthscale=2.)
|
||||||
|
ker3 = GPy.kern.rbf(1, .5, .5)
|
||||||
|
|
||||||
|
A `plot` and a `print` functions are implemented to represent kernel objects ::
|
||||||
|
|
||||||
|
print ker1
|
||||||
|
|
||||||
|
ker1.plot()
|
||||||
|
ker2.plot()
|
||||||
|
ker3.plot()
|
||||||
|
|
||||||
|
.. figure:: Figures/tuto_kern_overview_basicdef.png
|
||||||
|
:align: center
|
||||||
|
:height: 350px
|
||||||
|
|
||||||
|
Implemented kernels
|
||||||
|
===================
|
||||||
|
|
||||||
|
Many kernels are already implemented in GPy. Here is a summary of most of them:
|
||||||
|
|
||||||
|
.. figure:: Figures/tuto_kern_overview_allkern.png
|
||||||
|
:align: center
|
||||||
|
:height: 800px
|
||||||
|
|
||||||
|
On the other hand, it is possible to use the `sympy` package to build new kernels. This will be the subject of another tutorial.
|
||||||
|
|
||||||
|
Operations to combine kernel
|
||||||
|
============================
|
||||||
|
|
||||||
|
In ``GPy``, kernel objects can be combined with the usual ``+`` and ``*`` operators. ::
|
||||||
|
|
||||||
|
k1 = GPy.kern.rbf(1,variance=1., lengthscale=2)
|
||||||
|
k2 = GPy.kern.Matern32(1,variance=1., lengthscale=2)
|
||||||
|
|
||||||
|
ker_add = k1 + k2
|
||||||
|
print ker_add
|
||||||
|
|
||||||
|
ker_prod = k1 * k2
|
||||||
|
print ker_prod
|
||||||
|
|
||||||
|
Note that by default, the operator ``+`` adds kernels defined on the same input space whereas ``*`` assumes that the kernels are defined on different input spaces. Here for example ``ker_add.D`` will return ``1`` whereas ``ker_prod.D`` will return ``2``.
|
||||||
|
|
||||||
|
In order to add kernels defined on the different input spaces, the required command is::
|
||||||
|
|
||||||
|
ker_add_orth = k1.add_orthogonal(k2)
|
||||||
|
|
||||||
|
.. figure:: Figures/tuto_kern_overview_add_orth.png
|
||||||
|
:align: center
|
||||||
|
:height: 350px
|
||||||
|
|
||||||
|
Output of ``ker_add_orth.plot(plot_limits=[[-10,-10],[10,10]])``.
|
||||||
|
|
||||||
|
Example : Building an ANOVA kernel
|
||||||
|
==================================
|
||||||
|
|
||||||
|
In two dimensions ANOVA kernels have the following form:
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
k_{ANOVA}(x,y) = \prod_{i=1}^2 (1 + k_i(x_i,y_i)) = 1 + k_1(x_1,y_1) + k_2(x_2,y_2) + k_1(x_1,y_1) \times k_2(x_2,y_2).
|
||||||
|
|
||||||
|
Let us assume that we want to define an ANOVA kernel with a Matern 3/2 kernel for :math:`k_i`. As seen previously, we can define this kernel as follows ::
|
||||||
|
|
||||||
|
k_cst = GPy.kern.bias(1,variance=1.)
|
||||||
|
k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
|
||||||
|
Kanova = (k_cst + k_mat) * (k_cst + k_mat)
|
||||||
|
print Kanova
|
||||||
|
|
||||||
|
Printing the resulting kernel outputs the following ::
|
||||||
|
|
||||||
|
Name | Value | Constraints | Ties
|
||||||
|
---------------------------------------------------------------------------
|
||||||
|
bias<times>bias_variance | 1.0000 | |
|
||||||
|
bias<times>Mat52_variance | 1.0000 | |
|
||||||
|
bias<times>Mat52_Mat52_lengthscale | 3.0000 | | (1)
|
||||||
|
Mat52<times>bias_variance | 1.0000 | |
|
||||||
|
Mat52<times>bias_Mat52_lengthscale | 3.0000 | | (0)
|
||||||
|
Mat52<times>Mat52_variance | 1.0000 | |
|
||||||
|
Mat52<times>Mat52_Mat52_lengthscale | 3.0000 | | (0)
|
||||||
|
Mat52<times>Mat52_Mat52_lengthscale | 3.0000 | | (1)
|
||||||
|
|
||||||
|
Note the ties between the lengthscales of ``Kanova`` to keep the number of lengthscales equal to 2. On the other hand, there are four variance terms in the new parameterization: one for each term of the right hand part of the above equation. We can illustrate the use of this kernel on a toy example::
|
||||||
|
|
||||||
|
# sample inputs and outputs
|
||||||
|
X = np.random.uniform(-3.,3.,(40,2))
|
||||||
|
Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
|
||||||
|
|
||||||
|
# Create GP regression model
|
||||||
|
m = GPy.models.GP_regression(X,Y,Kanova)
|
||||||
|
m.plot()
|
||||||
|
|
||||||
|
|
||||||
|
.. figure:: Figures/tuto_kern_overview_mANOVA.png
|
||||||
|
:align: center
|
||||||
|
:height: 350px
|
||||||
|
|
||||||
|
As :math:`k_{ANOVA}` corresponds to the sum of 4 kernels, the best predictor can be splited in a sum of 4 functions
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
bp(x) & = k(x)^t K^{-1} Y \\
|
||||||
|
& = (1 + k_1(x_1) + k_2(x_2) + k_1(x_1)k_2(x_2))^t K^{-1} Y \\
|
||||||
|
& = 1^t K^{-1} Y + k_1(x_1)^t K^{-1} Y + k_2(x_2)^t K^{-1} Y + (k_1(x_1)k_2(x_2))^t K^{-1} Y
|
||||||
|
|
||||||
|
The submodels can be represented with the option ``which_function`` of ``plot``: ::
|
||||||
|
|
||||||
|
pb.figure(figsize=(20,5))
|
||||||
|
pb.subplots_adjust(wspace=0.5)
|
||||||
|
pb.subplot(1,5,1)
|
||||||
|
m.plot()
|
||||||
|
pb.subplot(1,5,2)
|
||||||
|
pb.ylabel("= ",rotation='horizontal',fontsize='30')
|
||||||
|
pb.subplot(1,5,3)
|
||||||
|
m.plot(which_functions=[False,True,False,False])
|
||||||
|
pb.ylabel("cst +",rotation='horizontal',fontsize='30')
|
||||||
|
pb.subplot(1,5,4)
|
||||||
|
m.plot(which_functions=[False,False,True,False])
|
||||||
|
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
|
||||||
|
pb.subplot(1,5,5)
|
||||||
|
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
|
||||||
|
m.plot(which_functions=[False,False,False,True])
|
||||||
|
|
||||||
|
|
||||||
|
.. figure:: Figures/tuto_kern_overview_mANOVAdec.png
|
||||||
|
:align: center
|
||||||
|
:height: 200px
|
||||||
|
|
||||||
|
|
||||||
|
.. import pylab as pb
|
||||||
|
import numpy as np
|
||||||
|
import GPy
|
||||||
|
pb.ion()
|
||||||
|
|
||||||
|
ker1 = GPy.kern.rbf(D=1) # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
|
||||||
|
ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=3.)
|
||||||
|
ker3 = GPy.kern.rbf(1, .5, .25)
|
||||||
|
|
||||||
|
ker1.plot()
|
||||||
|
ker2.plot()
|
||||||
|
ker3.plot()
|
||||||
|
#pb.savefig("Figures/tuto_kern_overview_basicdef.png")
|
||||||
|
|
||||||
|
kernels = [GPy.kern.rbf(1), GPy.kern.exponential(1), GPy.kern.Matern32(1), GPy.kern.Matern52(1), GPy.kern.Brownian(1), GPy.kern.bias(1), GPy.kern.linear(1), GPy.kern.spline(1), GPy.kern.periodic_exponential(1), GPy.kern.periodic_Matern32(1), GPy.kern.periodic_Matern52(1), GPy.kern.white(1)]
|
||||||
|
kernel_names = ["GPy.kern.rbf", "GPy.kern.exponential", "GPy.kern.Matern32", "GPy.kern.Matern52", "GPy.kern.Brownian", "GPy.kern.bias", "GPy.kern.linear", "GPy.kern.spline", "GPy.kern.periodic_exponential", "GPy.kern.periodic_Matern32", "GPy.kern.periodic_Matern52", "GPy.kern.white"]
|
||||||
|
|
||||||
|
pb.figure(figsize=(16,12))
|
||||||
|
pb.subplots_adjust(wspace=.5, hspace=.5)
|
||||||
|
for i, kern in enumerate(kernels):
|
||||||
|
pb.subplot(3,4,i+1)
|
||||||
|
kern.plot(x=7.5,plot_limits=[0.00001,15.])
|
||||||
|
pb.title(kernel_names[i]+ '\n')
|
||||||
|
#pb.axes([.1,.1,.8,.7])
|
||||||
|
#pb.figtext(.5,.9,'Foo Bar', fontsize=18, ha='center')
|
||||||
|
#pb.figtext(.5,.85,'Lorem ipsum dolor sit amet, consectetur adipiscing elit',fontsize=10,ha='center')
|
||||||
|
|
||||||
|
# actual plot for the noise
|
||||||
|
i = 11
|
||||||
|
X = np.linspace(0.,15.,201)
|
||||||
|
WN = 0*X
|
||||||
|
WN[100] = 1.
|
||||||
|
pb.subplot(3,4,i+1)
|
||||||
|
pb.plot(X,WN,'b')
|
||||||
|
|
@ -1,64 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
import pylab as pb
|
|
||||||
pb.ion()
|
|
||||||
import sys
|
|
||||||
import GPy
|
|
||||||
|
|
||||||
pb.close('all')
|
|
||||||
|
|
||||||
N = 200
|
|
||||||
M = 15
|
|
||||||
resolution=5
|
|
||||||
|
|
||||||
X = np.linspace(0,12,N)[:,None]
|
|
||||||
Z = np.linspace(0,12,M)[:,None] # inducing points (fixed for now)
|
|
||||||
Y = np.sin(X) + np.random.randn(*X.shape)/np.sqrt(50.)
|
|
||||||
#k = GPy.kern.rbf(1)
|
|
||||||
k = GPy.kern.Matern32(1) + GPy.kern.white(1)
|
|
||||||
|
|
||||||
models = [GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
|
|
||||||
,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
|
|
||||||
,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
|
|
||||||
,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)]
|
|
||||||
models[0].scale_factor = 1.
|
|
||||||
models[1].scale_factor = 10.
|
|
||||||
models[2].scale_factor = 100.
|
|
||||||
models[3].scale_factor = 1000.
|
|
||||||
#GPy.models.sgp_debugB(X,Y,Z=Z,kernel=k),
|
|
||||||
#GPy.models.sgp_debugC(X,Y,Z=Z,kernel=k)]#,
|
|
||||||
#GPy.models.sgp_debugE(X,Y,Z=Z,kernel=k)]
|
|
||||||
|
|
||||||
[m.constrain_fixed('white',0.1) for m in models]
|
|
||||||
|
|
||||||
#xx,yy = np.mgrid[1.5:4:0+resolution*1j,-2:2:0+resolution*1j]
|
|
||||||
xx,yy = np.mgrid[3:16:0+resolution*1j,-2:1:0+resolution*1j]
|
|
||||||
|
|
||||||
lls = []
|
|
||||||
cgs = []
|
|
||||||
grads = []
|
|
||||||
count = 0
|
|
||||||
for l,v in zip(xx.flatten(),yy.flatten()):
|
|
||||||
count += 1
|
|
||||||
print count, 'of', resolution**2
|
|
||||||
sys.stdout.flush()
|
|
||||||
|
|
||||||
[m.set('lengthscale',l) for m in models]
|
|
||||||
[m.set('_variance',10.**v) for m in models]
|
|
||||||
lls.append([m.log_likelihood() for m in models])
|
|
||||||
grads.append([m.log_likelihood_gradients() for m in models])
|
|
||||||
cgs.append([m.checkgrad(verbose=0,return_ratio=True) for m in models])
|
|
||||||
|
|
||||||
lls = np.array(zip(*lls)).reshape(-1,resolution,resolution)
|
|
||||||
cgs = np.array(zip(*cgs)).reshape(-1,resolution,resolution)
|
|
||||||
|
|
||||||
for ll,cg in zip(lls,cgs):
|
|
||||||
pb.figure()
|
|
||||||
pb.contourf(xx,yy,ll,100,cmap=pb.cm.gray)
|
|
||||||
pb.colorbar()
|
|
||||||
try:
|
|
||||||
pb.contour(xx,yy,np.exp(ll),colors='k')
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
pb.scatter(xx.flatten(),yy.flatten(),20,np.log(np.abs(cg.flatten())),cmap=pb.cm.jet,linewidth=0)
|
|
||||||
pb.colorbar()
|
|
||||||
|
|
||||||
11
setup.py
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from numpy.distutils.core import Extension, setup
|
from numpy.distutils.core import Extension, setup
|
||||||
from sphinx.setup_command import BuildDoc
|
#from sphinx.setup_command import BuildDoc
|
||||||
|
|
||||||
# Version number
|
# Version number
|
||||||
version = '0.1.3'
|
version = '0.1.3'
|
||||||
|
|
@ -19,7 +19,7 @@ setup(name = 'GPy',
|
||||||
license = "BSD 3-clause",
|
license = "BSD 3-clause",
|
||||||
keywords = "machine-learning gaussian-processes kernels",
|
keywords = "machine-learning gaussian-processes kernels",
|
||||||
url = "http://ml.sheffield.ac.uk/GPy/",
|
url = "http://ml.sheffield.ac.uk/GPy/",
|
||||||
packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples'],
|
packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples', 'GPy.likelihoods'],
|
||||||
package_dir={'GPy': 'GPy'},
|
package_dir={'GPy': 'GPy'},
|
||||||
package_data = {'GPy': ['GPy/examples']},
|
package_data = {'GPy': ['GPy/examples']},
|
||||||
py_modules = ['GPy.__init__'],
|
py_modules = ['GPy.__init__'],
|
||||||
|
|
@ -27,8 +27,11 @@ setup(name = 'GPy',
|
||||||
#ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py',
|
#ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py',
|
||||||
# sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
|
# sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
|
||||||
install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
|
install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
|
||||||
setup_requires=['sphinx'],
|
extras_require = {
|
||||||
cmdclass = {'build_sphinx': BuildDoc},
|
'docs':['Sphinx', 'ipython'],
|
||||||
|
},
|
||||||
|
#setup_requires=['sphinx'],
|
||||||
|
#cmdclass = {'build_sphinx': BuildDoc},
|
||||||
classifiers=[
|
classifiers=[
|
||||||
"Development Status :: 1 - Alpha",
|
"Development Status :: 1 - Alpha",
|
||||||
"Topic :: Machine Learning",
|
"Topic :: Machine Learning",
|
||||||
|
|
|
||||||