Merge branch 'master' into new_warping

This commit is contained in:
Nicolo Fusi 2013-03-13 13:50:00 +00:00
commit d057825f43
76 changed files with 2781 additions and 999 deletions

View file

@ -9,3 +9,10 @@ import util
import examples
from core import priors
import likelihoods
import testing
from numpy.testing import Tester
from nose.tools import nottest
@nottest
def tests():
Tester(testing).test(verbose=10)

View file

@ -121,9 +121,6 @@ class model(parameterised):
else:
raise AttributeError, "no parameter matches %s"%name
def log_prior(self):
"""evaluate the prior"""
return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
@ -135,12 +132,11 @@ class model(parameterised):
[np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
return ret
def _log_likelihood_gradients_transformed(self):
def _transform_gradients(self, g):
"""
Use self.log_likelihood_gradients and self.prior_gradients to get the gradients of the model.
Adjust the gradient for constraints and ties, return.
Takes a list of gradients and return an array of transformed gradients (positive/negative/tied/and so on)
"""
g = self._log_likelihood_gradients() + self._log_prior_gradients()
x = self._get_params()
g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
@ -152,6 +148,7 @@ class model(parameterised):
else:
return g
def randomize(self):
"""
Randomize the model.
@ -241,6 +238,27 @@ class model(parameterised):
print "Warning! constraining %s postive"%name
def objective_function(self, x):
"""
The objective function passed to the optimizer. It combines the likelihood and the priors.
"""
self._set_params_transformed(x)
return -self.log_likelihood() - self.log_prior()
def objective_function_gradients(self, x):
"""
Gets the gradients from the likelihood and the priors.
"""
self._set_params_transformed(x)
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
prior_gradients = self._transform_gradients(self._log_prior_gradients())
return -LL_gradients - prior_gradients
def objective_and_gradients(self, x):
obj_f = self.objective_function(x)
obj_grads = self.objective_function_gradients(x)
return obj_f, obj_grads
def optimize(self, optimizer=None, start=None, **kwargs):
"""
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
@ -254,22 +272,12 @@ class model(parameterised):
if optimizer is None:
optimizer = self.preferred_optimizer
def f(x):
self._set_params_transformed(x)
return -self.log_likelihood()-self.log_prior()
def fp(x):
self._set_params_transformed(x)
return -self._log_likelihood_gradients_transformed()
def f_fp(x):
self._set_params_transformed(x)
return -self.log_likelihood()-self.log_prior(),-self._log_likelihood_gradients_transformed()
if start == None:
start = self._get_params_transformed()
optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model = self, **kwargs)
opt.run(f_fp=f_fp, f=f, fp=fp)
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
self.optimization_runs.append(opt)
self._set_params_transformed(opt.x_opt)
@ -357,12 +365,9 @@ class model(parameterised):
dx = step*np.sign(np.random.uniform(-1,1,x.size))
#evaulate around the point x
self._set_params_transformed(x+dx)
f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
self._set_params_transformed(x-dx)
f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
self._set_params_transformed(x)
gradient = self._log_likelihood_gradients_transformed()
f1, g1 = self.objective_and_gradients(x+dx)
f2, g2 = self.objective_and_gradients(x-dx)
gradient = self.objective_function_gradients(x)
numerical_gradient = (f1-f2)/(2*dx)
global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
@ -398,14 +403,10 @@ class model(parameterised):
for i in param_list:
xx = x.copy()
xx[i] += step
self._set_params_transformed(xx)
f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()[i]
f1, g1 = self.objective_and_gradients(xx)
xx[i] -= 2.*step
self._set_params_transformed(xx)
f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()[i]
self._set_params_transformed(x)
gradient = self._log_likelihood_gradients_transformed()[i]
f2, g2 = self.objective_and_gradients(xx)
gradient = self.objective_function_gradients(x)[i]
numerical_gradient = (f1-f2)/(2*step)
ratio = (f1-f2)/(2*step*gradient)

View file

@ -56,7 +56,7 @@ class parameterised(object):
return copy.deepcopy(self)
def tie_param(self, which):
def tie_params(self, which):
matches = self.grep_param_names(which)
assert matches.size > 0, "need at least something to tie together"
if len(self.tied_indices):

View file

@ -1,37 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import GPy
np.random.seed(123344)
N = 10
M = 3
Q = 2
D = 4
#generate GPLVM-like data
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
m.constrain_positive('(rbf|bias|noise|white|S)')
# m.constrain_fixed('S', 1)
# pb.figure()
# m.plot()
# pb.title('PCA initialisation')
# pb.figure()
# m.optimize(messages = 1)
# m.plot()
# pb.title('After optimisation')
m.ensure_default_constraints()
m.randomize()
m.checkgrad(verbose = 1)

View file

@ -1,8 +1,8 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
# Please don't delete this without explaining to Neil the right way of doing this. I want to be able to run:
# GPy.examples.regression.toy_rbf_1D() from ipython having imported GPy, and this seems to be the way to do it!
import classification
import regression
import unsupervised
import dimensionality_reduction
import non_gaussian
import tutorials

View file

@ -10,8 +10,7 @@ import numpy as np
import GPy
default_seed=10000
def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME
def crescent_data(seed=default_seed): #FIXME
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
@ -31,11 +30,8 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME
likelihood = GPy.likelihoods.EP(data['Y'],distribution)
if model_type=='Full':
m = GPy.models.GP(data['X'],likelihood,kernel)
else:
# create sparse GP EP model
m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
m = GPy.models.GP(data['X'],likelihood,kernel)
m.ensure_default_constraints()
m.update_likelihood_approximation()
print(m)
@ -65,7 +61,7 @@ def oil():
# Contrain all parameters to be positive
m.constrain_positive('')
m.tie_param('lengthscale')
m.tie_params('lengthscale')
m.update_likelihood_approximation()
# Optimize
@ -94,16 +90,13 @@ def toy_linear_1d_classification(seed=default_seed):
# Model definition
m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
m.ensure_default_constraints()
# Optimize
"""
EPEM runs a loop that consists of two steps:
1) EP likelihood approximation:
m.update_likelihood_approximation()
2) Parameters optimization:
m.optimize()
"""
m.EPEM()
m.update_likelihood_approximation()
# Parameters optimization:
m.optimize()
#m.EPEM() #FIXME
# Plot
pb.subplot(211)
@ -113,3 +106,81 @@ def toy_linear_1d_classification(seed=default_seed):
print(m)
return m
def sparse_toy_linear_1d_classification(seed=default_seed):
"""
Simple 1D classification example
:param seed : seed value for data generation (default is 4).
:type seed: int
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y == -1] = 0
# Kernel object
kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(Y,distribution)
Z = np.random.uniform(data['X'].min(),data['X'].max(),(10,1))
# Model definition
m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z,normalize_X=False)
m.set('len',2.)
m.ensure_default_constraints()
# Optimize
m.update_likelihood_approximation()
# Parameters optimization:
m.optimize()
#m.EPEM() #FIXME
# Plot
pb.subplot(211)
m.plot_f()
pb.subplot(212)
m.plot()
print(m)
return m
def sparse_crescent_data(inducing=10, seed=default_seed):
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation.
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
data = GPy.util.datasets.crescent_data(seed=seed)
# Kernel object
kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(data['Y'],distribution)
sample = np.random.randint(0,data['X'].shape[0],inducing)
Z = data['X'][sample,:]
#Z = (np.random.random_sample(2*inducing)*(data['X'].max()-data['X'].min())+data['X'].min()).reshape(inducing,-1)
# create sparse GP EP model
m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
m.ensure_default_constraints()
m.set('len',10.)
m.update_likelihood_approximation()
# optimize
m.optimize()
print(m)
# plot
m.plot()
return m

View file

@ -0,0 +1,57 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import GPy
default_seed = np.random.seed(123344)
def BGPLVM(seed = default_seed):
N = 10
M = 3
Q = 2
D = 4
#generate GPLVM-like data
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
m.constrain_positive('(rbf|bias|noise|white|S)')
# m.constrain_fixed('S', 1)
# pb.figure()
# m.plot()
# pb.title('PCA initialisation')
# pb.figure()
# m.optimize(messages = 1)
# m.plot()
# pb.title('After optimisation')
m.ensure_default_constraints()
m.randomize()
m.checkgrad(verbose = 1)
return m
def GPLVM_oil_100():
data = GPy.util.datasets.oil_100()
# create simple GP model
kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6)
m = GPy.models.GPLVM(data['X'], 6, kernel = kernel)
# optimize
m.ensure_default_constraints()
m.optimize(messages=1)
# plot
print(m)
m.plot_latent(labels=data['Y'].argmax(axis=1))
return m

View file

@ -11,7 +11,7 @@ import GPy
default_seed=10000
def toy_1d(seed=default_seed):
def toy_poisson_1d(seed=default_seed):
"""
Simple 1D classification example
:param seed : seed value for data generation (default is 4).

View file

@ -1,57 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import cPickle as pickle
import numpy as np
import pylab as pb
import GPy
import pylab as plt
np.random.seed(3)
def plot_oil(X, theta, labels, label):
plt.figure()
X = X[:,np.argsort(theta)[:2]]
flow_type = (X[labels[:,0]==1])
plt.plot(flow_type[:,0], flow_type[:,1], 'rx')
flow_type = (X[labels[:,1]==1])
plt.plot(flow_type[:,0], flow_type[:,1], 'gx')
flow_type = (X[labels[:,2]==1])
plt.plot(flow_type[:,0], flow_type[:,1], 'bx')
plt.title(label)
data = pickle.load(open('../../../GPy_assembla/datasets/oil_flow_3classes.pickle', 'r'))
Y = data['DataTrn']
N, D = Y.shape
selected = np.random.permutation(N)[:350]
labels = data['DataTrnLbls'][selected]
Y = Y[selected]
N, D = Y.shape
Y -= Y.mean(axis=0)
# Y /= Y.std(axis=0)
Q = 5
k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 20)
m.constrain_positive('(rbf|bias|S|linear|white|noise)')
# m.unconstrain('noise')
# m.constrain_fixed('noise_precision', 50.0)
# m.unconstrain('white')
# m.constrain_bounded('white', 1e-6, 10.0)
# plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization')
m.optimize(messages = True)
# m.optimize('tnc', messages = True)
# plot_oil(m.X, m.kern.parts[0].lengthscale, labels, 'B-GPLVM')
# # pb.figure()
# m.plot()
# pb.title('PCA initialisation')
# pb.figure()
# m.optimize(messages = 1)
# m.plot()
# pb.title('After optimisation')
# m = GPy.models.GPLVM(Y, Q)
# m.constrain_positive('(white|rbf|bias|noise)')
# m.optimize()
# plot_oil(m.X, np.array([1,1]), labels, 'GPLVM')

View file

@ -41,10 +41,6 @@ def rogers_girolami_olympics():
print(m)
return m
def della_gatta_TRP63_gene_expression(number=942):
"""Run a standard Gaussian process regression on the della Gatta et al TRP63 Gene Expression data set for a given gene number."""
def toy_rbf_1d_50():
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
data = GPy.util.datasets.toy_rbf_1d_50()
@ -75,6 +71,113 @@ def silhouette():
print(m)
return m
def coregionalisation_toy2():
"""
A simple demonstration of coregionalisation on two sinusoidal functions
"""
X1 = np.random.rand(50,1)*8
X2 = np.random.rand(30,1)*5
index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
X = np.hstack((np.vstack((X1,X2)),index))
Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
Y2 = np.sin(X2) + np.random.randn(*X2.shape)*0.05 + 2.
Y = np.vstack((Y1,Y2))
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalise(2,1)
k = k1.prod_orthogonal(k2)
m = GPy.models.GP_regression(X,Y,kernel=k)
m.constrain_fixed('rbf_var',1.)
m.constrain_positive('kappa')
m.ensure_default_constraints()
m.optimize('sim',max_f_eval=5000,messages=1)
#m.optimize()
pb.figure()
Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
Xtest2 = np.hstack((np.linspace(0,9,100)[:,None],np.ones((100,1))))
mean, var,low,up = m.predict(Xtest1)
GPy.util.plot.gpplot(Xtest1[:,0],mean,low,up)
mean, var,low,up = m.predict(Xtest2)
GPy.util.plot.gpplot(Xtest2[:,0],mean,low,up)
pb.plot(X1[:,0],Y1[:,0],'rx',mew=2)
pb.plot(X2[:,0],Y2[:,0],'gx',mew=2)
return m
def coregionalisation_toy():
"""
A simple demonstration of coregionalisation on two sinusoidal functions
"""
X1 = np.random.rand(50,1)*8
X2 = np.random.rand(30,1)*5
index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
X = np.hstack((np.vstack((X1,X2)),index))
Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
Y2 = -np.sin(X2) + np.random.randn(*X2.shape)*0.05
Y = np.vstack((Y1,Y2))
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(2,2)
k = k1.prod_orthogonal(k2)
m = GPy.models.GP_regression(X,Y,kernel=k)
m.constrain_fixed('rbf_var',1.)
m.constrain_positive('kappa')
m.ensure_default_constraints()
m.optimize()
pb.figure()
Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
Xtest2 = np.hstack((np.linspace(0,9,100)[:,None],np.ones((100,1))))
mean, var,low,up = m.predict(Xtest1)
GPy.util.plot.gpplot(Xtest1[:,0],mean,low,up)
mean, var,low,up = m.predict(Xtest2)
GPy.util.plot.gpplot(Xtest2[:,0],mean,low,up)
pb.plot(X1[:,0],Y1[:,0],'rx',mew=2)
pb.plot(X2[:,0],Y2[:,0],'gx',mew=2)
return m
def coregionalisation_sparse():
"""
A simple demonstration of coregionalisation on two sinusoidal functions
"""
X1 = np.random.rand(500,1)*8
X2 = np.random.rand(300,1)*5
index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
X = np.hstack((np.vstack((X1,X2)),index))
Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
Y2 = -np.sin(X2) + np.random.randn(*X2.shape)*0.05
Y = np.vstack((Y1,Y2))
M = 40
Z = np.hstack((np.random.rand(M,1)*8,np.random.randint(0,2,M)[:,None]))
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(2,2)
k = k1.prod_orthogonal(k2) + GPy.kern.white(2,0.001)
m = GPy.models.sparse_GP_regression(X,Y,kernel=k,Z=Z)
m.scale_factor = 10000.
m.constrain_fixed('rbf_var',1.)
m.constrain_positive('kappa')
m.constrain_fixed('iip')
m.ensure_default_constraints()
m.optimize_restarts(5,robust=True,messages=1)
pb.figure()
Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
Xtest2 = np.hstack((np.linspace(0,9,100)[:,None],np.ones((100,1))))
mean, var,low,up = m.predict(Xtest1)
GPy.util.plot.gpplot(Xtest1[:,0],mean,low,up)
mean, var,low,up = m.predict(Xtest2)
GPy.util.plot.gpplot(Xtest2[:,0],mean,low,up)
pb.plot(X1[:,0],Y1[:,0],'rx',mew=2)
pb.plot(X2[:,0],Y2[:,0],'gx',mew=2)
y = pb.ylim()[0]
pb.plot(Z[:,0][Z[:,1]==0],np.zeros(np.sum(Z[:,1]==0))+y,'r|',mew=2)
pb.plot(Z[:,0][Z[:,1]==1],np.zeros(np.sum(Z[:,1]==1))+y,'g|',mew=2)
return m
def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000):
"""Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher."""
@ -91,7 +194,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
# Remove the mean (no bias kernel to ensure signal/noise is in RBF/white)
data['Y'] = data['Y'] - np.mean(data['Y'])
lls = GPy.examples.regression.contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
pb.contour(length_scales, log_SNRs, np.exp(lls), 20)
ax = pb.gca()
pb.xlabel('length scale')
@ -99,7 +202,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# Now run a few optimizations
models = []
optim_point_x = np.empty(2)
@ -107,18 +210,18 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
np.random.seed(seed=seed)
for i in range(0, model_restarts):
kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + GPy.kern.white(1,variance=np.random.exponential(1.))
m = GPy.models.GP_regression(data['X'],data['Y'], kernel=kern)
optim_point_x[0] = m.get('rbf_lengthscale')
optim_point_y[0] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
# optimize
m.ensure_default_constraints()
m.optimize(xtol=1e-6,ftol=1e-6)
optim_point_x[1] = m.get('rbf_lengthscale')
optim_point_y[1] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1]-optim_point_x[0], optim_point_y[1]-optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
models.append(m)
@ -126,7 +229,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
ax.set_ylim(ylim)
return (models, lls)
def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf):
def _contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf):
"""Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales.
:data_set: A data set from the utils.datasets director.
@ -152,7 +255,7 @@ def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf)
total_var = (np.dot(np.dot(data['Y'].T,GPy.util.linalg.pdinv(K)[0]), data['Y'])/data['Y'].shape[0])[0,0]
noise_var *= total_var
signal_var *= total_var
kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale) + GPy.kern.white(1, variance=noise_var)
model = GPy.models.GP_regression(data['X'], data['Y'], kernel=kernel)
@ -160,3 +263,71 @@ def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf)
length_scale_lls.append(model.log_likelihood())
lls.append(length_scale_lls)
return np.array(lls)
def sparse_GP_regression_1D(N = 400, M = 5):
"""Run a 1D example of a sparse GP regression."""
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(N,1))
Y = np.sin(X)+np.random.randn(N,1)*0.05
# construct kernel
rbf = GPy.kern.rbf(1)
noise = GPy.kern.white(1)
kernel = rbf + noise
# create simple GP model
m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
m.constrain_positive('(variance|lengthscale|precision)')
m.checkgrad(verbose=1)
m.optimize('tnc', messages = 1)
m.plot()
return m
def sparse_GP_regression_2D(N = 400, M = 50):
"""Run a 2D example of a sparse GP regression."""
X = np.random.uniform(-3.,3.,(N,2))
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05
# construct kernel
rbf = GPy.kern.rbf(2)
noise = GPy.kern.white(2)
kernel = rbf + noise
# create simple GP model
m = GPy.models.sparse_GP_regression(X,Y,kernel, M = M)
# contrain all parameters to be positive (but not inducing inputs)
m.constrain_positive('(variance|lengthscale|precision)')
m.set('len',2.)
m.checkgrad()
# optimize and plot
pb.figure()
m.optimize('tnc', messages = 1)
m.plot()
print(m)
return m
def uncertain_inputs_sparse_regression():
"""Run a 1D example of a sparse GP regression with uncertain inputs."""
# sample inputs and outputs
S = np.ones((20,1))
X = np.random.uniform(-3.,3.,(20,1))
Y = np.sin(X)+np.random.randn(20,1)*0.05
likelihood = GPy.likelihoods.Gaussian(Y)
Z = np.random.uniform(-3.,3.,(7,1))
k = GPy.kern.rbf(1) + GPy.kern.white(1)
# create simple GP model
m = GPy.models.sparse_GP(X, likelihood, kernel=k, Z=Z, X_uncertainty=S)
# contrain all parameters to be positive
m.constrain_positive('(variance|prec)')
# optimize and plot
m.optimize('tnc', max_f_eval = 1000, messages=1)
m.plot()
print(m)
return m

View file

@ -1,30 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import GPy
np.random.seed(1)
print "sparse GPLVM with RBF kernel"
N = 100
M = 8
Q = 1
D = 2
#generate GPLVM-like data
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q, 1.0, 2.0) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
m = GPy.models.sparse_GPLVM(Y, Q, M=M)
m.constrain_positive('(rbf|bias|noise|white)')
pb.figure()
m.plot()
pb.title('PCA initialisation')
pb.figure()
m.optimize(messages = 1)
m.plot()
pb.title('After optimisation')

View file

@ -1,64 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
"""
Sparse Gaussian Processes regression with an RBF kernel
"""
import pylab as pb
import numpy as np
import GPy
np.random.seed(2)
pb.ion()
N = 400
M = 5
######################################
## 1 dimensional example
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(N,1))
Y = np.sin(X)+np.random.randn(N,1)*0.05
# construct kernel
rbf = GPy.kern.rbf(1)
noise = GPy.kern.white(1)
kernel = rbf + noise
# create simple GP model
m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
m.constrain_positive('(variance|lengthscale|precision)')
m.checkgrad(verbose=1)
m.optimize('tnc', messages = 1)
m.plot()
######################################
## 2 dimensional example
# # sample inputs and outputs
# X = np.random.uniform(-3.,3.,(N,2))
# Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05
# # construct kernel
# rbf = GPy.kern.rbf(2)
# noise = GPy.kern.white(2)
# kernel = rbf + noise
# # create simple GP model
# m2 = GPy.models.sparse_GP_regression(X,Y,kernel, M = 50)
# create simple GP model
# # contrain all parameters to be positive (but not inducing inputs)
# m2.constrain_positive('(variance|lengthscale|precision)')
# #check gradient FIXME unit test please
# m2.checkgrad()
# # optimize and plot
# pb.figure()
# m2.optimize('tnc', messages = 1)
# m2.plot()
# print(m2)

View file

@ -1,60 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
"""
Sparse Gaussian Processes regression with an RBF kernel
"""
import pylab as pb
import numpy as np
import GPy
np.random.seed(2)
pb.ion()
N = 500
M = 5
pb.close('all')
######################################
## 1 dimensional example
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(N,1))
#Y = np.sin(X)+np.random.randn(N,1)*0.05
F = np.sin(X)+np.random.randn(N,1)*0.05
Y = np.ones([F.shape[0],1])
Y[F<0] = -1
likelihood = GPy.inference.likelihoods.probit(Y)
# construct kernel
rbf = GPy.kern.rbf(1)
noise = GPy.kern.white(1)
kernel = rbf + noise
# create simple GP model
#m = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
# contrain all parameters to be positive
#m.constrain_fixed('prec',100.)
m = GPy.models.sparse_GP(X, Y, kernel, M=M)
m.ensure_default_constraints()
#if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian):
# m.approximate_likelihood()
print m.checkgrad()
m.optimize('tnc', messages = 1)
m.plot(samples=3)
print m
n = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
n.ensure_default_constraints()
if not isinstance(n.likelihood,GPy.inference.likelihoods.gaussian):
n.approximate_likelihood()
print n.checkgrad()
pb.figure()
n.plot()
"""
m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
m.ensure_default_constraints()
print m.checkgrad()
"""

200
GPy/examples/tutorials.py Normal file
View file

@ -0,0 +1,200 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Code of Tutorials
"""
import pylab as pb
pb.ion()
import numpy as np
import GPy
def tuto_GP_regression():
"""The detailed explanations of the commands used in this file can be found in the tutorial section"""
X = np.random.uniform(-3.,3.,(20,1))
Y = np.sin(X) + np.random.randn(20,1)*0.05
kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
m = GPy.models.GP_regression(X,Y,kernel)
print m
m.plot()
m.constrain_positive('')
m.unconstrain('') # Required to remove the previous constrains
m.constrain_positive('rbf_variance')
m.constrain_bounded('lengthscale',1.,10. )
m.constrain_fixed('noise',0.0025)
m.optimize()
m.optimize_restarts(Nrestarts = 10)
###########################
# 2-dimensional example #
###########################
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(50,2))
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
# define kernel
ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
# create simple GP model
m = GPy.models.GP_regression(X,Y,ker)
# contrain all parameters to be positive
m.constrain_positive('')
# optimize and plot
pb.figure()
m.optimize('tnc', max_f_eval = 1000)
m.plot()
print(m)
def tuto_kernel_overview():
"""The detailed explanations of the commands used in this file can be found in the tutorial section"""
pb.ion()
ker1 = GPy.kern.rbf(1) # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=2.)
ker3 = GPy.kern.rbf(1, .5, .5)
print ker2
ker1.plot()
ker2.plot()
ker3.plot()
k1 = GPy.kern.rbf(1,1.,2.)
k2 = GPy.kern.Matern32(1, 0.5, 0.2)
# Product of kernels
k_prod = k1.prod(k2)
k_prodorth = k1.prod_orthogonal(k2)
# Sum of kernels
k_add = k1.add(k2)
k_addorth = k1.add_orthogonal(k2)
pb.figure(figsize=(8,8))
pb.subplot(2,2,1)
k_prod.plot()
pb.title('prod')
pb.subplot(2,2,2)
k_prodorth.plot()
pb.title('prod_orthogonal')
pb.subplot(2,2,3)
k_add.plot()
pb.title('add')
pb.subplot(2,2,4)
k_addorth.plot()
pb.title('add_orthogonal')
pb.subplots_adjust(wspace=0.3, hspace=0.3)
k1 = GPy.kern.rbf(1,1.,2)
k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
k = k1 * k2 # equivalent to k = k1.prod(k2)
print k
# Simulate sample paths
X = np.linspace(-5,5,501)[:,None]
Y = np.random.multivariate_normal(np.zeros(501),k.K(X),1)
# plot
pb.figure(figsize=(10,4))
pb.subplot(1,2,1)
k.plot()
pb.subplot(1,2,2)
pb.plot(X,Y.T)
pb.ylabel("Sample path")
pb.subplots_adjust(wspace=0.3)
k = (k1+k2)*(k1+k2)
print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.Matern32(1)
k3 = GPy.kern.white(1)
k = k1 + k2 + k3
print k
k.constrain_positive('var')
k.constrain_fixed(np.array([1]),1.75)
k.tie_params('len')
k.unconstrain('white')
k.constrain_bounded('white',lower=1e-5,upper=.5)
print k
k_cst = GPy.kern.bias(1,variance=1.)
k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
Kanova = (k_cst + k_mat).prod_orthogonal(k_cst + k_mat)
print Kanova
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(40,2))
Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
# Create GP regression model
m = GPy.models.GP_regression(X,Y,Kanova)
pb.figure(figsize=(5,5))
m.plot()
pb.figure(figsize=(20,3))
pb.subplots_adjust(wspace=0.5)
pb.subplot(1,5,1)
m.plot()
pb.subplot(1,5,2)
pb.ylabel("= ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,3)
m.plot(which_functions=[False,True,False,False])
pb.ylabel("cst +",rotation='horizontal',fontsize='30')
pb.subplot(1,5,4)
m.plot(which_functions=[False,False,True,False])
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,5)
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
m.plot(which_functions=[False,False,False,True])
ker1 = GPy.kern.rbf(D=1) # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=3.)
ker3 = GPy.kern.rbf(1, .5, .25)
ker1.plot()
ker2.plot()
ker3.plot()
#pb.savefig("Figures/tuto_kern_overview_basicdef.png")
kernels = [GPy.kern.rbf(1), GPy.kern.exponential(1), GPy.kern.Matern32(1), GPy.kern.Matern52(1), GPy.kern.Brownian(1), GPy.kern.bias(1), GPy.kern.linear(1), GPy.kern.spline(1), GPy.kern.periodic_exponential(1), GPy.kern.periodic_Matern32(1), GPy.kern.periodic_Matern52(1), GPy.kern.white(1)]
kernel_names = ["GPy.kern.rbf", "GPy.kern.exponential", "GPy.kern.Matern32", "GPy.kern.Matern52", "GPy.kern.Brownian", "GPy.kern.bias", "GPy.kern.linear", "GPy.kern.spline", "GPy.kern.periodic_exponential", "GPy.kern.periodic_Matern32", "GPy.kern.periodic_Matern52", "GPy.kern.white"]
pb.figure(figsize=(16,12))
pb.subplots_adjust(wspace=.5, hspace=.5)
for i, kern in enumerate(kernels):
pb.subplot(3,4,i+1)
kern.plot(x=7.5,plot_limits=[0.00001,15.])
pb.title(kernel_names[i]+ '\n')
# actual plot for the noise
i = 11
X = np.linspace(0.,15.,201)
WN = 0*X
WN[100] = 1.
pb.subplot(3,4,i+1)
pb.plot(X,WN,'b')
def model_interaction():
X = np.random.randn(20,1)
Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5.
k = GPy.kern.rbf(1) + GPy.kern.bias(1)
return GPy.models.GP_regression(X,Y,kernel=k)

View file

@ -1,27 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import pylab as pb
import numpy as np
import GPy
pb.ion()
pb.close('all')
# sample inputs and outputs
S = np.ones((20,1))
X = np.random.uniform(-3.,3.,(20,1))
Y = np.sin(X)+np.random.randn(20,1)*0.05
k = GPy.kern.rbf(1) + GPy.kern.white(1)
# create simple GP model
m = GPy.models.sparse_GP_regression(X,Y,X_uncertainty=S,kernel=k)
# contrain all parameters to be positive
m.constrain_positive('(variance|prec)')
# optimize and plot
m.optimize('tnc', max_f_eval = 1000, messages=1)
m.plot()
print(m)

View file

@ -1,32 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
"""
Sparse Gaussian Processes regression with an RBF kernel,
using the uncollapsed sparse GP (where the distribution of the
inducing points is explicitley represented)
"""
import pylab as pb
import numpy as np
import GPy
np.random.seed(2)
pb.ion()
N = 500
M = 20
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(N,1))
Y = np.sin(X)+np.random.randn(N,1)*0.05
kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
# create simple GP model
m = GPy.models.uncollapsed_sparse_GP(X, Y, kernel=kernel, M=M)#, X_uncertainty=np.zeros_like(X)+0.01)
# contrain all parameters to be positive
m.ensure_default_constraints()
m.checkgrad()
# optimize and plot
m.plot()

View file

@ -1,25 +0,0 @@
"""
Usupervised learning with Gaussian Processes.
"""
import pylab as pb
import numpy as np
import GPy
######################################
## Oil data subsampled to 100 points.
def oil_100():
data = GPy.util.datasets.oil_100()
# create simple GP model
m = GPy.models.GPLVM(data['X'], 2)
# optimize
m.ensure_default_constraints()
m.optimize()
# plot
print(m)
return m

141
GPy/inference/SCG.py Normal file
View file

@ -0,0 +1,141 @@
#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import numpy as np
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6):
"""
Optimisation through Scaled Conjugate Gradients (SCG)
f: the objective function
gradf : the gradient function (should return a 1D np.ndarray)
x : the initial condition
Returns
x the optimal value for x
flog : a list of all the objective values
"""
sigma0 = 1.0e-4
fold = f(x, *optargs) # Initial function value.
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
gradold = gradnew.copy()
d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs.
nsuccess = 0 # nsuccess counts number of successes.
beta = 1.0 # Initial scale parameter.
betamin = 1.0e-15 # Lower bound on scale.
betamax = 1.0e100 # Upper bound on scale.
status = "Not converged"
flog = [fold]
iteration = 0
# Main optimization loop.
while iteration < maxiters:
# Calculate first and second directional derivatives.
if success:
mu = np.dot(d, gradnew)
if mu >= 0:
d = -gradnew
mu = np.dot(d, gradnew)
kappa = np.dot(d, d)
sigma = sigma0/np.sqrt(kappa)
xplus = x + sigma*d
gplus = gradf(xplus, *optargs)
theta = np.dot(d, (gplus - gradnew))/sigma
# Increase effective curvature and evaluate step size alpha.
delta = theta + beta*kappa
if delta <= 0:
delta = beta*kappa
beta = beta - theta/kappa
alpha = - mu/delta
# Calculate the comparison ratio.
xnew = x + alpha*d
fnew = f(xnew, *optargs)
function_eval += 1
if function_eval >= max_f_eval:
status = "Maximum number of function evaluations exceeded"
return x, flog, function_eval, status
Delta = 2.*(fnew - fold)/(alpha*mu)
if Delta >= 0.:
success = True
nsuccess += 1
x = xnew
fnow = fnew
else:
success = False
fnow = fold
# Store relevant variables
flog.append(fnow) # Current function value
iteration += 1
if display:
print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta
if success:
# Test for termination
if np.max(np.abs(alpha*d)) < xtol or np.max(np.abs(fnew-fold)) < ftol:
return x, flog, function_eval, status
else:
# Update variables for new position
fold = fnew
gradold = gradnew
gradnew = gradf(x, *optargs)
# If the gradient is zero then we are done.
if np.dot(gradnew,gradnew) == 0:
return x, flog, function_eval, status
# Adjust beta according to comparison ratio.
if Delta < 0.25:
beta = min(4.0*beta, betamax)
if Delta > 0.75:
beta = max(0.5*beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps.
if nsuccess == x.size:
d = -gradnew
nsuccess = 0
elif success:
gamma = np.dot(gradold - gradnew,gradnew)/(mu)
d = gamma*d - gradnew
# If we get here, then we haven't terminated in the given number of
# iterations.
status = "maxiter exceeded"
return x, flog, function_eval, status

View file

@ -1,18 +1,18 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import pdb
import pylab as pb
import datetime as dt
from scipy import optimize
import numpy as np
try:
import rasmussens_minimize as rasm
rasm_available = True
except ImportError:
rasm_available = False
import pdb
import pylab as pb
import datetime as dt
from SCG import SCG
class Optimizer():
"""
@ -29,7 +29,7 @@ class Optimizer():
:rtype: optimizer object.
"""
def __init__(self, x_init, messages=False, model = None, max_f_eval=1e4, ftol=None, gtol=None, xtol=None):
def __init__(self, x_init, messages=False, model = None, max_f_eval=1e4, max_iters = 1e3, ftol=None, gtol=None, xtol=None):
self.opt_name = None
self.x_init = x_init
self.messages = messages
@ -38,6 +38,7 @@ class Optimizer():
self.funct_eval = None
self.status = None
self.max_f_eval = int(max_f_eval)
self.max_iters = int(max_iters)
self.trace = None
self.time = "Not available"
self.xtol = xtol
@ -63,12 +64,13 @@ class Optimizer():
pb.xlabel('Iteration')
pb.ylabel('f(x)')
def diagnostics(self):
print "Optimizer: \t\t\t\t %s" % self.opt_name
print "f(x_opt): \t\t\t\t %.3f" % self.f_opt
print "Number of function evaluations: \t %d" % self.funct_eval
print "Optimization status: \t\t\t %s" % self.status
print "Time elapsed: \t\t\t\t %s" % self.time
def __str__(self):
diagnostics = "Optimizer: \t\t\t\t %s\n" % self.opt_name
diagnostics += "f(x_opt): \t\t\t\t %.3f\n" % self.f_opt
diagnostics += "Number of function evaluations: \t %d\n" % self.funct_eval
diagnostics += "Optimization status: \t\t\t %s\n" % self.status
diagnostics += "Time elapsed: \t\t\t\t %s\n" % self.time
return diagnostics
class opt_tnc(Optimizer):
def __init__(self, *args, **kwargs):
@ -161,7 +163,6 @@ class opt_simplex(Optimizer):
self.f_opt = opt_result[1]
self.funct_eval = opt_result[3]
self.status = statuses[opt_result[4]]
self.trace = None
@ -196,13 +197,28 @@ class opt_rasm(Optimizer):
self.trace = opt_result[1]
class opt_SCG(Optimizer):
def __init__(self, *args, **kwargs):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Scaled Conjugate Gradients"
def opt(self, f_fp = None, f = None, fp = None):
assert not f is None
assert not fp is None
opt_result = SCG(f,fp,self.x_init, display=self.messages, maxiters=self.max_iters, max_f_eval=self.max_f_eval, xtol=self.xtol, ftol=self.ftol)
self.x_opt = opt_result[0]
self.trace = opt_result[1]
self.f_opt = self.trace[-1]
self.funct_eval = opt_result[2]
self.status = opt_result[3]
def get_optimizer(f_min):
# import rasmussens_minimize as rasm
from SGD import opt_SGD
optimizers = {'fmin_tnc': opt_tnc,
'simplex': opt_simplex,
'lbfgsb': opt_lbfgsb,
'scg': opt_SCG,
'sgd': opt_SGD}
if rasm_available:

View file

@ -76,7 +76,7 @@ class Matern32(kernpart):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target)
def dK_dtheta(self,partial,X,X2,target):
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
@ -84,29 +84,29 @@ class Matern32(kernpart):
invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar*partial)
target[0] += np.sum(dvar*dL_dK)
if self.ARD == True:
dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
#dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
else:
dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
#dl = self.variance*dvar*dist2M.sum(-1)*invdist
target[1] += np.sum(dl*partial)
target[1] += np.sum(dl*dL_dK)
def dKdiag_dtheta(self,partial,X,target):
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(partial)
target[0] += np.sum(dL_dKdiag)
def dK_dX(self,partial,X,X2,target):
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0)
target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,partial,X,target):
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
def Gram_matrix(self,F,F1,F2,lower,upper):

View file

@ -74,7 +74,7 @@ class Matern52(kernpart):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target)
def dK_dtheta(self,partial,X,X2,target):
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
@ -82,29 +82,29 @@ class Matern52(kernpart):
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar*partial)
target[0] += np.sum(dvar*dL_dK)
if self.ARD:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
else:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
target[1] += np.sum(dl*partial)
target[1] += np.sum(dl*dL_dK)
def dKdiag_dtheta(self,X,target):
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(partial)
target[0] += np.sum(dL_dKdiag)
def dK_dX(self,partial,X,X2,target):
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0)
target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,partial,X,target):
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
def Gram_matrix(self,F,F1,F2,F3,lower,upper):

View file

@ -2,5 +2,5 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, product, product_orthogonal
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic
from kern import kern

View file

@ -35,16 +35,17 @@ class bias(kernpart):
def Kdiag(self,X,target):
target += self.variance
def dK_dtheta(self,partial,X,X2,target):
target += partial.sum()
def dK_dtheta(self,dL_dKdiag,X,X2,target):
target += dL_dKdiag.sum()
def dKdiag_dtheta(self,partial,X,target):
target += partial.sum()
def dK_dX(self, partial,X, X2, target):
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += dL_dKdiag.sum()
def dK_dX(self, dL_dK,X, X2, target):
pass
def dKdiag_dX(self,partial,X,target):
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
#---------------------------------------#
@ -60,30 +61,29 @@ class bias(kernpart):
def psi2(self, Z, mu, S, target):
target += self.variance**2
def dpsi0_dtheta(self, partial, Z, mu, S, target):
target += partial.sum()
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
target += dL_dpsi0.sum()
def dpsi1_dtheta(self, partial, Z, mu, S, target):
target += partial.sum()
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
target += dL_dpsi1.sum()
def dpsi2_dtheta(self, partial, Z, mu, S, target):
target += 2.*self.variance*partial.sum()
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
target += 2.*self.variance*dL_dpsi2.sum()
def dpsi0_dZ(self, partial, Z, mu, S, target):
def dpsi0_dZ(self, dL_dpsi0, Z, mu, S, target):
pass
def dpsi0_dmuS(self, partial, Z, mu, S, target_mu, target_S):
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass
def dpsi1_dZ(self, partial, Z, mu, S, target):
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
pass
def dpsi1_dmuS(self, partial, Z, mu, S, target_mu, target_S):
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
pass
def dpsi2_dZ(self, partial, Z, mu, S, target):
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
pass
def dpsi2_dmuS(self, partial, Z, mu, S, target_mu, target_S):
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
pass

View file

@ -18,8 +18,11 @@ from Brownian import Brownian as Brownianpart
from periodic_exponential import periodic_exponential as periodic_exponentialpart
from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
from product import product as productpart
from product_orthogonal import product_orthogonal as product_orthogonalpart
from prod import prod as prodpart
from prod_orthogonal import prod_orthogonal as prod_orthogonalpart
from symmetric import symmetric as symmetric_part
from coregionalise import coregionalise as coregionalise_part
from rational_quadratic import rational_quadratic as rational_quadraticpart
#TODO these s=constructors are not as clean as we'd like. Tidy the code up
#using meta-classes to make the objects construct properly wthout them.
@ -243,7 +246,7 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper)
return kern(D, [part])
def product(k1,k2):
def prod(k1,k2):
"""
Construct a product kernel over D from two kernels over D
@ -251,10 +254,10 @@ def product(k1,k2):
:type k1, k2: kernpart
:rtype: kernel object
"""
part = productpart(k1,k2)
part = prodpart(k1,k2)
return kern(k1.D, [part])
def product_orthogonal(k1,k2):
def prod_orthogonal(k1,k2):
"""
Construct a product kernel over D1 x D2 from a kernel over D1 and another over D2.
@ -262,5 +265,34 @@ def product_orthogonal(k1,k2):
:type k1, k2: kernpart
:rtype: kernel object
"""
part = product_orthogonalpart(k1,k2)
part = prod_orthogonalpart(k1,k2)
return kern(k1.D+k2.D, [part])
def symmetric(k):
"""
Construct a symmetrical kernel from an existing kernel
"""
k_ = k.copy()
k_.parts = [symmetric_part(p) for p in k.parts]
return k_
def coregionalise(Nout,R=1, W=None, kappa=None):
p = coregionalise_part(Nout,R,W,kappa)
return kern(1,[p])
def rational_quadratic(D,variance=1., lengthscale=1., power=1.):
"""
Construct rational quadratic kernel.
:param D: the number of input dimensions
:type D: int (D=1 is the only value currently supported)
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the lengthscale :math:`\ell`
:type lengthscale: float
:rtype: kern object
"""
part = rational_quadraticpart(D,variance, lengthscale, power)
return kern(D, [part])

90
GPy/kern/coregionalise.py Normal file
View file

@ -0,0 +1,90 @@
# Copyright (c) 2012, James Hensman and Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart
import numpy as np
from GPy.util.linalg import mdot, pdinv
import pdb
class coregionalise(kernpart):
"""
Kernel for Intrisec Corregionalization Models
"""
def __init__(self,Nout,R=1, W=None, kappa=None):
self.D = 1
self.name = 'coregion'
self.Nout = Nout
self.R = R
if W is None:
self.W = np.ones((self.Nout,self.R))
else:
assert W.shape==(self.Nout,self.R)
self.W = W
if kappa is None:
kappa = np.ones(self.Nout)
else:
assert kappa.shape==(self.Nout,)
self.kappa = kappa
self.Nparam = self.Nout*(self.R + 1)
self._set_params(np.hstack([self.W.flatten(),self.kappa]))
def _get_params(self):
return np.hstack([self.W.flatten(),self.kappa])
def _set_params(self,x):
assert x.size == self.Nparam
self.kappa = x[-self.Nout:]
self.W = x[:-self.Nout].reshape(self.Nout,self.R)
self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa)
def _get_param_names(self):
return sum([['W%i_%i'%(i,j) for j in range(self.R)] for i in range(self.Nout)],[]) + ['kappa_%i'%i for i in range(self.Nout)]
def K(self,index,index2,target):
index = np.asarray(index,dtype=np.int)
if index2 is None:
index2 = index
else:
index2 = np.asarray(index2,dtype=np.int)
ii,jj = np.meshgrid(index,index2)
ii,jj = ii.T, jj.T
target += self.B[ii,jj]
def Kdiag(self,index,target):
target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
def dK_dtheta(self,dL_dK,index,index2,target):
index = np.asarray(index,dtype=np.int)
if index2 is None:
index2 = index
else:
index2 = np.asarray(index2,dtype=np.int)
ii,jj = np.meshgrid(index,index2)
ii,jj = ii.T, jj.T
dL_dK_small = np.zeros_like(self.B)
for i in range(self.Nout):
for j in range(self.Nout):
tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
dL_dK_small[i,j] = tmp
dkappa = np.diag(dL_dK_small)
dL_dK_small += dL_dK_small.T
dW = (self.W[:,None,:]*dL_dK_small[:,:,None]).sum(0)
target += np.hstack([dW.flatten(),dkappa])
def dKdiag_dtheta(self,dL_dKdiag,index,target):
index = np.asarray(index,dtype=np.int).flatten()
dL_dKdiag_small = np.zeros(self.Nout)
for i in range(self.Nout):
dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
dW = 2.*self.W*dL_dKdiag_small[:,None]
dkappa = dL_dKdiag_small
target += np.hstack([dW.flatten(),dkappa])
def dK_dX(self,dL_dK,X,X2,target):
pass

View file

@ -74,35 +74,35 @@ class exponential(kernpart):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target)
def dK_dtheta(self,partial,X,X2,target):
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
dvar = np.exp(-dist)
target[0] += np.sum(dvar*partial)
target[0] += np.sum(dvar*dL_dK)
if self.ARD == True:
dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
else:
dl = self.variance*dvar*dist2M.sum(-1)*invdist
target[1] += np.sum(dl*partial)
target[1] += np.sum(dl*dL_dK)
def dKdiag_dtheta(self,partial,X,target):
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
#NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(partial)
target[0] += np.sum(dL_dKdiag)
def dK_dX(self,partial,X,X2,target):
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*np.exp(-dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0)
target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,partial,X,target):
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
def Gram_matrix(self,F,F1,lower,upper):

View file

@ -7,8 +7,8 @@ import pylab as pb
from ..core.parameterised import parameterised
from kernpart import kernpart
import itertools
from product_orthogonal import product_orthogonal
from product import product
from prod_orthogonal import prod_orthogonal
from prod import prod
class kern(parameterised):
def __init__(self,D,parts=[], input_slices=None):
@ -161,7 +161,7 @@ class kern(parameterised):
K1 = self.copy()
K2 = other.copy()
newkernparts = [product(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
newkernparts = [prod(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
slices = []
for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
@ -183,7 +183,7 @@ class kern(parameterised):
K1 = self.copy()
K2 = other.copy()
newkernparts = [product_orthogonal(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
newkernparts = [prod_orthogonal(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
slices = []
for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
@ -237,7 +237,7 @@ class kern(parameterised):
for i in range(K1.Nparam + K2.Nparam):
index = np.where(index_param==i)[0]
if index.size > 1:
self.tie_param(index)
self.tie_params(index)
for i in prev_constr_pos:
self.constrain_positive(np.where(index_param==i)[0])
for i in prev_constr_neg:
@ -271,10 +271,10 @@ class kern(parameterised):
[p.K(X[s1,i_s],X2[s2,i_s],target=target[s1,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
return target
def dK_dtheta(self,partial,X,X2=None,slices1=None,slices2=None):
def dK_dtheta(self,dL_dK,X,X2=None,slices1=None,slices2=None):
"""
:param partial: An array of partial derivaties, dL_dK
:type partial: Np.ndarray (N x M)
:param dL_dK: An array of dL_dK derivaties, dL_dK
:type dL_dK: Np.ndarray (N x M)
:param X: Observed data inputs
:type X: np.ndarray (N x D)
:param X2: Observed dara inputs (optional, defaults to X)
@ -288,16 +288,16 @@ class kern(parameterised):
if X2 is None:
X2 = X
target = np.zeros(self.Nparam)
[p.dK_dtheta(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
[p.dK_dtheta(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
return self._transform_gradients(target)
def dK_dX(self,partial,X,X2=None,slices1=None,slices2=None):
def dK_dX(self,dL_dK,X,X2=None,slices1=None,slices2=None):
if X2 is None:
X2 = X
slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros_like(X)
[p.dK_dX(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[s1,i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
[p.dK_dX(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[s1,i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
return target
def Kdiag(self,X,slices=None):
@ -307,20 +307,20 @@ class kern(parameterised):
[p.Kdiag(X[s,i_s],target=target[s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
return target
def dKdiag_dtheta(self,partial,X,slices=None):
def dKdiag_dtheta(self,dL_dKdiag,X,slices=None):
assert X.shape[1]==self.D
assert len(partial.shape)==1
assert partial.size==X.shape[0]
assert len(dL_dKdiag.shape)==1
assert dL_dKdiag.size==X.shape[0]
slices = self._process_slices(slices,False)
target = np.zeros(self.Nparam)
[p.dKdiag_dtheta(partial[s],X[s,i_s],target[ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)]
[p.dKdiag_dtheta(dL_dKdiag[s],X[s,i_s],target[ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)]
return self._transform_gradients(target)
def dKdiag_dX(self, partial, X, slices=None):
def dKdiag_dX(self, dL_dKdiag, X, slices=None):
assert X.shape[1]==self.D
slices = self._process_slices(slices,False)
target = np.zeros_like(X)
[p.dKdiag_dX(partial[s],X[s,i_s],target[s,i_s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
[p.dKdiag_dX(dL_dKdiag[s],X[s,i_s],target[s,i_s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
return target
def psi0(self,Z,mu,S,slices=None):
@ -329,16 +329,16 @@ class kern(parameterised):
[p.psi0(Z,mu[s],S[s],target[s]) for p,s in zip(self.parts,slices)]
return target
def dpsi0_dtheta(self,partial,Z,mu,S,slices=None):
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,slices=None):
slices = self._process_slices(slices,False)
target = np.zeros(self.Nparam)
[p.dpsi0_dtheta(partial[s],Z,mu[s],S[s],target[ps]) for p,ps,s in zip(self.parts, self.param_slices,slices)]
[p.dpsi0_dtheta(dL_dpsi0[s],Z,mu[s],S[s],target[ps]) for p,ps,s in zip(self.parts, self.param_slices,slices)]
return self._transform_gradients(target)
def dpsi0_dmuS(self,partial,Z,mu,S,slices=None):
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,slices=None):
slices = self._process_slices(slices,False)
target_mu,target_S = np.zeros_like(mu),np.zeros_like(S)
[p.dpsi0_dmuS(partial,Z,mu[s],S[s],target_mu[s],target_S[s]) for p,s in zip(self.parts,slices)]
[p.dpsi0_dmuS(dL_dpsi0,Z,mu[s],S[s],target_mu[s],target_S[s]) for p,s in zip(self.parts,slices)]
return target_mu,target_S
def psi1(self,Z,mu,S,slices1=None,slices2=None):
@ -348,96 +348,163 @@ class kern(parameterised):
[p.psi1(Z[s2],mu[s1],S[s1],target[s1,s2]) for p,s1,s2 in zip(self.parts,slices1,slices2)]
return target
def dpsi1_dtheta(self,partial,Z,mu,S,slices1=None,slices2=None):
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
"""N,M,(Ntheta)"""
slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros((self.Nparam))
[p.dpsi1_dtheta(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,ps,s1,s2,i_s in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices)]
[p.dpsi1_dtheta(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,ps,s1,s2,i_s in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices)]
return self._transform_gradients(target)
def dpsi1_dZ(self,partial,Z,mu,S,slices1=None,slices2=None):
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
"""N,M,Q"""
slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros_like(Z)
[p.dpsi1_dZ(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
[p.dpsi1_dZ(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
return target
def dpsi1_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None):
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
"""return shapes are N,M,Q"""
slices1, slices2 = self._process_slices(slices1,slices2)
target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
[p.dpsi1_dmuS(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
[p.dpsi1_dmuS(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
return target_mu, target_S
def psi2(self,Z,mu,S,slices1=None,slices2=None):
"""
:Z: np.ndarray of inducing inputs (M x Q)
: mu, S: np.ndarrays of means and variacnes (each N x Q)
:returns psi2: np.ndarray (N,M,M,Q) """
:param Z: np.ndarray of inducing inputs (M x Q)
:param mu, S: np.ndarrays of means and variances (each N x Q)
:returns psi2: np.ndarray (N,M,M)
"""
target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0]))
slices1, slices2 = self._process_slices(slices1,slices2)
[p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
#compute the "cross" terms
for p1, p2 in itertools.combinations(self.parts,2):
#white doesn;t combine with anything
if p1.name=='white' or p2.name=='white':
pass
#rbf X bias
elif p1.name=='bias' and p2.name=='rbf':
target += p1.variance*(p2._psi1[:,:,None]+p2._psi1[:,None,:])
elif p2.name=='bias' and p1.name=='rbf':
target += p2.variance*(p1._psi1[:,:,None]+p1._psi1[:,None,:])
#linear X bias
elif p1.name=='bias' and p2.name=='linear':
tmp = np.zeros((mu.shape[0],Z.shape[0]))
p2.psi1(Z,mu,S,tmp)
target += p1.variance*(tmp[:,:,None] + tmp[:,None,:])
elif p2.name=='bias' and p1.name=='linear':
tmp = np.zeros((mu.shape[0],Z.shape[0]))
p1.psi1(Z,mu,S,tmp)
target += p2.variance*(tmp[:,:,None] + tmp[:,None,:])
#rbf X linear
elif p1.name=='linear' and p2.name=='rbf':
raise NotImplementedError #TODO
elif p2.name=='linear' and p1.name=='rbf':
raise NotImplementedError #TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return target
# "crossterms". Here we are recomputing psi1 for white (we don't need to), but it's
# not really expensive, since it's just a matrix of zeroes.
# psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
# [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
crossterms = 0.0
# for 3 kernels this returns something like
# [(0,1), (0,2), (1,2)]
# in theory, we should also account for (1,0), (2,0) and so on, but
# the transpose deals exactly with that
# for a,b in itertools.combinations(psi1_matrices, 2):
# tmp = np.multiply(a,b)
# crossterms += tmp[:,None,:] + tmp[:, :,None]
return target + crossterms
def dpsi2_dtheta(self,partial,partial1,Z,mu,S,slices1=None,slices2=None):
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
"""Returns shape (N,M,M,Ntheta)"""
slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros(self.Nparam)
[p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
[p.dpsi2_dtheta(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
# # "crossterms"
# # 1. get all the psi1 statistics
# psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
# [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
#compute the "cross" terms
#TODO: better looping
for i1, i2 in itertools.combinations(range(len(self.parts)),2):
p1,p2 = self.parts[i1], self.parts[i2]
ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
# partial1 = np.ones_like(partial1)
# # 2. get all the dpsi1/dtheta gradients
# psi1_gradients = [np.zeros(self.Nparam) for p in self.parts]
# [p.dpsi1_dtheta(partial1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],psi1g_target[ps]) for p,ps,s1,s2,i_s,psi1g_target in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices,psi1_gradients)]
#white doesn;t combine with anything
if p1.name=='white' or p2.name=='white':
pass
#rbf X bias
elif p1.name=='bias' and p2.name=='rbf':
p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance*2.,Z,mu,S,target[ps2])
p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2._psi1*2.,Z,mu,S,target[ps1])
elif p2.name=='bias' and p1.name=='rbf':
p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance*2.,Z,mu,S,target[ps1])
p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1._psi1*2.,Z,mu,S,target[ps2])
#linear X bias
elif p1.name=='bias' and p2.name=='linear':
p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance*2., Z, mu, S, target[ps1])
elif p2.name=='bias' and p1.name=='linear':
p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance*2., Z, mu, S, target[ps1])
#rbf X linear
elif p1.name=='linear' and p2.name=='rbf':
raise NotImplementedError #TODO
elif p2.name=='linear' and p1.name=='rbf':
raise NotImplementedError #TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
# # 3. multiply them somehow
# for a,b in itertools.combinations(range(len(psi1_matrices)), 2):
# tmp = (psi1_gradients[a][None, None] * psi1_matrices[b][:,:, None])
# # target += (tmp[None] + tmp[:,None]).sum(0).sum(0).sum(0)
# # gne = (psi1_gradients[a].sum()*psi1_matrices[b].sum())
# # target += gne
# #target += (gne[None] + gne[:, None]).sum(0)
# target += (partial.sum(0)[:,:,None] * (tmp[:, None] + tmp[:,:,None]).sum(0)).sum(0).sum(0)
return self._transform_gradients(target)
def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None):
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros_like(Z)
[p.dpsi2_dZ(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
[p.dpsi2_dZ(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
#compute the "cross" terms
for p1, p2 in itertools.combinations(self.parts,2):
#white doesn;t combine with anything
if p1.name=='white' or p2.name=='white':
pass
#rbf X bias
elif p1.name=='bias' and p2.name=='rbf':
p2.dpsi1_dX(dL_dpsi2.sum(1).T*p1.variance,Z,mu,S,target)
elif p2.name=='bias' and p1.name=='rbf':
p1.dpsi1_dZ(dL_dpsi2.sum(1).T*p2.variance,Z,mu,S,target)
#linear X bias
elif p1.name=='bias' and p2.name=='linear':
p2.dpsi1_dZ(dL_dpsi2.sum(1).T*p1.variance, Z, mu, S, target)
elif p2.name=='bias' and p1.name=='linear':
p1.dpsi1_dZ(dL_dpsi2.sum(1).T*p2.variance, Z, mu, S, target)
#rbf X linear
elif p1.name=='linear' and p2.name=='rbf':
raise NotImplementedError #TODO
elif p2.name=='linear' and p1.name=='rbf':
raise NotImplementedError #TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return target
def dpsi2_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None):
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
"""return shapes are N,M,M,Q"""
slices1, slices2 = self._process_slices(slices1,slices2)
target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
[p.dpsi2_dmuS(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
[p.dpsi2_dmuS(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
#compute the "cross" terms
for p1, p2 in itertools.combinations(self.parts,2):
#white doesn;t combine with anything
if p1.name=='white' or p2.name=='white':
pass
#rbf X bias
elif p1.name=='bias' and p2.name=='rbf':
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T*p1.variance*2.,Z,mu,S,target_mu,target_S)
elif p2.name=='bias' and p1.name=='rbf':
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T*p2.variance*2.,Z,mu,S,target_mu,target_S)
#linear X bias
elif p1.name=='bias' and p2.name=='linear':
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T*p1.variance*2., Z, mu, S, target_mu, target_S)
elif p2.name=='bias' and p1.name=='linear':
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T*p2.variance*2., Z, mu, S, target_mu, target_S)
#rbf X linear
elif p1.name=='linear' and p2.name=='rbf':
raise NotImplementedError #TODO
elif p2.name=='linear' and p1.name=='rbf':
raise NotImplementedError #TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
#TODO: there are some extra terms to compute here!
return target_mu, target_S
def plot(self, x = None, plot_limits=None,which_functions='all',resolution=None,*args,**kwargs):

View file

@ -26,31 +26,31 @@ class kernpart(object):
raise NotImplementedError
def Kdiag(self,X,target):
raise NotImplementedError
def dK_dtheta(self,partial,X,X2,target):
def dK_dtheta(self,dL_dK,X,X2,target):
raise NotImplementedError
def dKdiag_dtheta(self,partial,X,target):
def dKdiag_dtheta(self,dL_dKdiag,X,target):
raise NotImplementedError
def psi0(self,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dtheta(self,partial,Z,mu,S,target):
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi1(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dtheta(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dZ(self,partial,Z,mu,S,target):
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi2(self,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dZ(self,partial,Z,mu,S,target):
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dtheta(self,partial,Z,mu,S,target):
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def dK_dX(self,X,X2,target):
raise NotImplementedError

View file

@ -74,16 +74,23 @@ class linear(kernpart):
def Kdiag(self,X,target):
np.add(target,np.sum(self.variances*np.square(X),-1),target)
def dK_dtheta(self,partial,X,X2,target):
def dK_dtheta(self,dL_dK,X,X2,target):
if self.ARD:
product = X[:,None,:]*X2[None,:,:]
target += (partial[:,:,None]*product).sum(0).sum(0)
target += (dL_dK[:,:,None]*product).sum(0).sum(0)
else:
self._K_computations(X, X2)
target += np.sum(self._dot_product*partial)
target += np.sum(self._dot_product*dL_dK)
def dK_dX(self,partial,X,X2,target):
target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0)
def dKdiag_dtheta(self,dL_dKdiag, X, target):
tmp = dL_dKdiag[:,None]*X**2
if self.ARD:
target += tmp.sum(0)
else:
target += tmp.sum()
def dK_dX(self,dL_dK,X,X2,target):
target += (((X2[:, None, :] * self.variances)) * dL_dK[:,:, None]).sum(0)
#---------------------------------------#
# PSI statistics #
@ -93,33 +100,33 @@ class linear(kernpart):
self._psi_computations(Z,mu,S)
target += np.sum(self.variances*self.mu2_S,1)
def dpsi0_dtheta(self,partial,Z,mu,S,target):
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
self._psi_computations(Z,mu,S)
tmp = partial[:, None] * self.mu2_S
tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD:
target += tmp.sum(0)
else:
target += tmp.sum()
def dpsi0_dmuS(self,partial, Z,mu,S,target_mu,target_S):
target_mu += partial[:, None] * (2.0*mu*self.variances)
target_S += partial[:, None] * self.variances
def dpsi0_dmuS(self,dL_dpsi0, Z,mu,S,target_mu,target_S):
target_mu += dL_dpsi0[:, None] * (2.0*mu*self.variances)
target_S += dL_dpsi0[:, None] * self.variances
def psi1(self,Z,mu,S,target):
"""the variance, it does nothing"""
self.K(mu,Z,target)
def dpsi1_dtheta(self,partial,Z,mu,S,target):
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
"""the variance, it does nothing"""
self.dK_dtheta(partial,mu,Z,target)
self.dK_dtheta(dL_dpsi1,mu,Z,target)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
"""Do nothing for S, it does not affect psi1"""
self._psi_computations(Z,mu,S)
target_mu += (partial.T[:,:, None]*(Z*self.variances)).sum(1)
target_mu += (dL_dpsi1.T[:,:, None]*(Z*self.variances)).sum(1)
def dpsi1_dZ(self,partial,Z,mu,S,target):
self.dK_dX(partial.T,Z,mu,target)
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
self.dK_dX(dL_dpsi1.T,Z,mu,target)
def psi2(self,Z,mu,S,target):
"""
@ -128,26 +135,27 @@ class linear(kernpart):
self._psi_computations(Z,mu,S)
psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
target += psi2.sum(-1)
#TODO: this could be faster using np.tensordot
def dpsi2_dtheta(self,partial,Z,mu,S,target):
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
self._psi_computations(Z,mu,S)
tmp = (partial[:,:,:,None]*(2.*self.ZZ*self.mu2_S[:,None,None,:]*self.variances))
tmp = (dL_dpsi2[:,:,:,None]*(2.*self.ZZ*self.mu2_S[:,None,None,:]*self.variances))
if self.ARD:
target += tmp.sum(0).sum(0).sum(0)
else:
target += tmp.sum()
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """
self._psi_computations(Z,mu,S)
tmp = self.ZZ*np.square(self.variances) # M,M,Q
target_mu += (partial[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1)
target_S += (partial[:,:,:,None]*tmp).sum(1).sum(1)
target_mu += (dL_dpsi2[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1)
target_S += (dL_dpsi2[:,:,:,None]*tmp).sum(1).sum(1)
def dpsi2_dZ(self,partial,Z,mu,S,target):
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
self._psi_computations(Z,mu,S)
mu2_S = np.sum(self.mu2_S,0)# Q,
target += (partial[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
#---------------------------------------#
# Precomputations #

View file

@ -1,6 +1,11 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart
import numpy as np
from GPy.util.linalg import mdot, pdinv
from GPy.util.decorators import silence_errors
class periodic_Matern32(kernpart):
"""
@ -39,12 +44,16 @@ class periodic_Matern32(kernpart):
def f(x):
return alpha*np.cos(omega*x+phase)
return f
@silence_errors
def _cos_factorization(self,alpha,omega,phase):
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
r = np.sqrt(r1**2 + r2**2)
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi
@silence_errors
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
@ -55,6 +64,7 @@ class periodic_Matern32(kernpart):
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale,self.period))
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size==3
@ -101,7 +111,8 @@ class periodic_Matern32(kernpart):
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
def dK_dtheta(self,partial,X,X2,target):
@silence_errors
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@ -166,8 +177,72 @@ class periodic_Matern32(kernpart):
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
# np.add(target[:,:,0],dK_dvar, target[:,:,0])
target[0] += np.sum(dK_dvar*partial)
target[0] += np.sum(dK_dvar*dL_dK)
#np.add(target[:,:,1],dK_dlen, target[:,:,1])
target[1] += np.sum(dK_dlen*partial)
target[1] += np.sum(dK_dlen*dL_dK)
#np.add(target[:,:,2],dK_dper, target[:,:,2])
target[2] += np.sum(dK_dper*partial)
target[2] += np.sum(dK_dper*dL_dK)
@silence_errors
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal covariance matrix with respect to the parameters"""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2))
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX.T)
#dK_dlen
da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.]
db_dlen = [0.,2*self.lengthscale/3.]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T)
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
dK_dper = 2* mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)

View file

@ -1,6 +1,11 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart
import numpy as np
from GPy.util.linalg import mdot, pdinv
from GPy.util.decorators import silence_errors
class periodic_Matern52(kernpart):
"""
@ -40,13 +45,14 @@ class periodic_Matern52(kernpart):
return alpha*np.cos(omega*x+phase)
return f
@silence_errors
def _cos_factorization(self,alpha,omega,phase):
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
r = np.sqrt(r1**2 + r2**2)
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
@ -57,6 +63,7 @@ class periodic_Matern52(kernpart):
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale,self.period))
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size==3
@ -105,7 +112,8 @@ class periodic_Matern52(kernpart):
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
def dK_dtheta(self,partial,X,X2,target):
@silence_errors
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@ -178,8 +186,80 @@ class periodic_Matern52(kernpart):
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
# np.add(target[:,:,0],dK_dvar, target[:,:,0])
target[0] += np.sum(dK_dvar*partial)
target[0] += np.sum(dK_dvar*dL_dK)
#np.add(target[:,:,1],dK_dlen, target[:,:,1])
target[1] += np.sum(dK_dlen*partial)
target[1] += np.sum(dK_dlen*dL_dK)
#np.add(target[:,:,2],dK_dper, target[:,:,2])
target[2] += np.sum(dK_dper*partial)
target[2] += np.sum(dK_dper*dL_dK)
@silence_errors
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters"""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
#dK_dlen
da_dlen = [-3*self.a[0]/self.lengthscale, -2*self.a[1]/self.lengthscale, -self.a[2]/self.lengthscale, 0.]
db_dlen = [0., 4*self.b[1]/self.lengthscale, 2*self.b[2]/self.lengthscale, 2*self.b[3]/self.lengthscale, 2*self.b[4]/self.lengthscale]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)), da_dlen[1]*self.basis_omega, da_dlen[2]*self.basis_omega**2, da_dlen[3]*self.basis_omega**3))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dlower_terms_dlen = db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F2lower,F2lower.T) + db_dlen[2]*np.dot(F1lower,F1lower.T) + db_dlen[3]*np.dot(F2lower,Flower.T) + db_dlen[4]*np.dot(Flower,F2lower.T)
dG_dlen = 15*self.lengthscale**4/(400*np.sqrt(5))*Gint + 3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dlen + dlower_terms_dlen
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period, -self.a[3]*self.basis_omega**4/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2,self.basis_phi))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + .5*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + .5*self.lower**2*np.cos(phi-phi1.T)
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF2lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**3/self.period,self.basis_omega,self.basis_phi+np.pi*3/2)(self.lower) + self._cos(-2*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
dlower_terms_dper = self.b[0] * (np.dot(dFlower_dper,Flower.T) + np.dot(Flower.T,dFlower_dper))
dlower_terms_dper += self.b[1] * (np.dot(dF2lower_dper,F2lower.T) + np.dot(F2lower,dF2lower_dper.T)) - 4*self.b[1]/self.period*np.dot(F2lower,F2lower.T)
dlower_terms_dper += self.b[2] * (np.dot(dF1lower_dper,F1lower.T) + np.dot(F1lower,dF1lower_dper.T)) - 2*self.b[2]/self.period*np.dot(F1lower,F1lower.T)
dlower_terms_dper += self.b[3] * (np.dot(dF2lower_dper,Flower.T) + np.dot(F2lower,dFlower_dper.T)) - 2*self.b[3]/self.period*np.dot(F2lower,Flower.T)
dlower_terms_dper += self.b[4] * (np.dot(dFlower_dper,F2lower.T) + np.dot(Flower,dF2lower_dper.T)) - 2*self.b[4]/self.period*np.dot(Flower,F2lower.T)
dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)

View file

@ -1,6 +1,11 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart
import numpy as np
from GPy.util.linalg import mdot, pdinv
from GPy.util.decorators import silence_errors
class periodic_exponential(kernpart):
"""
@ -40,6 +45,7 @@ class periodic_exponential(kernpart):
return alpha*np.cos(omega*x+phase)
return f
@silence_errors
def _cos_factorization(self,alpha,omega,phase):
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
@ -57,6 +63,7 @@ class periodic_exponential(kernpart):
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale,self.period))
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size==3
@ -101,7 +108,8 @@ class periodic_exponential(kernpart):
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
def dK_dtheta(self,partial,X,X2,target):
@silence_errors
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@ -162,9 +170,67 @@ class periodic_exponential(kernpart):
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
# np.add(target[:,:,0],dK_dvar, target[:,:,0])
target[0] += np.sum(dK_dvar*partial)
#np.add(target[:,:,1],dK_dlen, target[:,:,1])
target[1] += np.sum(dK_dlen*partial)
#np.add(target[:,:,2],dK_dper, target[:,:,2])
target[2] += np.sum(dK_dper*partial)
target[0] += np.sum(dK_dvar*dL_dK)
target[1] += np.sum(dK_dlen*dL_dK)
target[2] += np.sum(dK_dper*dL_dK)
@silence_errors
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters"""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
Lo = np.column_stack((self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX.T)
#dK_dlen
da_dlen = [-1./self.lengthscale**2,0.]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))
r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)

View file

@ -6,7 +6,7 @@ import numpy as np
import hashlib
#from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
class product(kernpart):
class prod(kernpart):
"""
Computes the product of 2 kernels that are defined on the same space
@ -55,7 +55,7 @@ class product(kernpart):
self.k2.Kdiag(X,target2)
target += target1 * target2
def dK_dtheta(self,partial,X,X2,target):
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0]))
@ -65,13 +65,13 @@ class product(kernpart):
k1_target = np.zeros(self.k1.Nparam)
k2_target = np.zeros(self.k2.Nparam)
self.k1.dK_dtheta(partial*K2, X, X2, k1_target)
self.k2.dK_dtheta(partial*K1, X, X2, k2_target)
self.k1.dK_dtheta(dL_dK*K2, X, X2, k1_target)
self.k2.dK_dtheta(dL_dK*K1, X, X2, k2_target)
target[:self.k1.Nparam] += k1_target
target[self.k1.Nparam:] += k2_target
def dK_dX(self,partial,X,X2,target):
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0]))
@ -79,19 +79,19 @@ class product(kernpart):
self.k1.K(X,X2,K1)
self.k2.K(X,X2,K2)
self.k1.dK_dX(partial*K2, X, X2, target)
self.k2.dK_dX(partial*K1, X, X2, target)
self.k1.dK_dX(dL_dK*K2, X, X2, target)
self.k2.dK_dX(dL_dK*K1, X, X2, target)
def dKdiag_dX(self,partial,X,target):
def dKdiag_dX(self,dL_dKdiag,X,target):
target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2)
self.k1.dKdiag_dX(partial*target2, X, target)
self.k2.dKdiag_dX(partial*target1, X, target)
self.k1.dKdiag_dX(dL_dKdiag*target2, X, target)
self.k2.dKdiag_dX(dL_dKdiag*target1, X, target)
def dKdiag_dtheta(self,partial,X,target):
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],))
@ -100,8 +100,8 @@ class product(kernpart):
k1_target = np.zeros(self.k1.Nparam)
k2_target = np.zeros(self.k2.Nparam)
self.k1.dKdiag_dtheta(partial*target2, X, k1_target)
self.k2.dKdiag_dtheta(partial*target1, X, k2_target)
self.k1.dKdiag_dtheta(dL_dKdiag*target2, X, k1_target)
self.k2.dKdiag_dtheta(dL_dKdiag*target1, X, k2_target)
target[:self.k1.Nparam] += k1_target
target[self.k1.Nparam:] += k2_target

View file

@ -6,7 +6,7 @@ import numpy as np
import hashlib
#from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
class product_orthogonal(kernpart):
class prod_orthogonal(kernpart):
"""
Computes the product of 2 kernels
@ -36,41 +36,44 @@ class product_orthogonal(kernpart):
def _get_param_names(self):
"""return parameter names."""
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
target1 = np.zeros((X.shape[0],X2.shape[0]))
target2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],target1)
target1 = np.zeros_like(target)
target2 = np.zeros_like(target)
self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],target1)
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
target += target1 * target2
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X[:,0:self.k1.D],target1)
self.k2.Kdiag(X[:,self.k1.D:],target2)
target += target1 * target2
def dK_dtheta(self,partial,X,X2,target):
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0]))
K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1)
self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1)
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
k1_target = np.zeros(self.k1.Nparam)
k2_target = np.zeros(self.k2.Nparam)
self.k1.dK_dtheta(partial*K2, X[:,:self.k1.D], X2[:,:self.k1.D], k1_target)
self.k2.dK_dtheta(partial*K1, X[:,self.k1.D:], X2[:,self.k1.D:], k2_target)
self.k1.dK_dtheta(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
self.k2.dK_dtheta(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
target[:self.k1.Nparam] += k1_target
target[self.k1.Nparam:] += k2_target
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros(X.shape[0])
target2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,:self.k1.D],target1)
self.k2.Kdiag(X[:,self.k1.D:],target2)
target += target1 * target2
def dK_dX(self,partial,X,X2,target):
def dKdiag_dtheta(self,dL_dKdiag,X,target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,:self.k1.D],K1)
self.k2.Kdiag(X[:,self.k1.D:],K2)
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.D],target[:self.k1.Nparam])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.D:],target[self.k1.Nparam:])
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0]))
@ -78,8 +81,15 @@ class product_orthogonal(kernpart):
self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1)
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
self.k1.dK_dX(partial*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
self.k2.dK_dX(partial*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
self.k1.dK_dX(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
self.k2.dK_dX(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,0:self.k1.D],K1)
self.k2.Kdiag(X[:,self.k1.D:],K2)
self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.D], target)
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.D:], target)
def dKdiag_dX(self,X,target):
pass

View file

@ -0,0 +1,80 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart
import numpy as np
class rational_quadratic(kernpart):
"""
rational quadratic kernel
.. math::
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2 \ell^2} \\bigg)^{- \\alpha} \ \ \ \ \ \\text{ where } r^2 = (x-y)^2
:param D: the number of input dimensions
:type D: int (D=1 is the only value currently supported)
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the lengthscale :math:`\ell`
:type lengthscale: float
:param power: the power :math:`\\alpha`
:type power: float
:rtype: kernpart object
"""
def __init__(self,D,variance=1.,lengthscale=1.,power=1.):
assert D == 1, "For this kernel we assume D=1"
self.D = D
self.Nparam = 3
self.name = 'rat_quad'
self.variance = variance
self.lengthscale = lengthscale
self.power = power
def _get_params(self):
return np.hstack((self.variance,self.lengthscale,self.power))
def _set_params(self,x):
self.variance = x[0]
self.lengthscale = x[1]
self.power = x[2]
def _get_param_names(self):
return ['variance','lengthscale','power']
def K(self,X,X2,target):
if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale)
target += self.variance*(1 + dist2/2.)**(-self.power)
def Kdiag(self,X,target):
target += self.variance
def dK_dtheta(self,dL_dK,X,X2,target):
if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale)
dvar = (1 + dist2/2.)**(-self.power)
dl = self.power * self.variance * dist2 * self.lengthscale**(-3) * (1 + dist2/2./self.power)**(-self.power-1)
dp = - self.variance * np.log(1 + dist2/2.) * (1 + dist2/2.)**(-self.power)
target[0] += np.sum(dvar*dL_dK)
target[1] += np.sum(dl*dL_dK)
target[2] += np.sum(dp*dL_dK)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target[0] += np.sum(dL_dKdiag)
# here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale)
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
target += np.sum(dL_dK*dX,1)[:,np.newaxis]
def dKdiag_dX(self,dL_dKdiag,X,target):
pass

View file

@ -12,7 +12,7 @@ class rbf(kernpart):
.. math::
k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}}
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
@ -82,27 +82,27 @@ class rbf(kernpart):
def Kdiag(self,X,target):
np.add(target,self.variance,target)
def dK_dtheta(self,partial,X,X2,target):
def dK_dtheta(self,dL_dK,X,X2,target):
self._K_computations(X,X2)
target[0] += np.sum(self._K_dvar*partial)
target[0] += np.sum(self._K_dvar*dL_dK)
if self.ARD == True:
dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscale
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
else:
target[1] += np.sum(self._K_dvar*self.variance*(self._K_dist2.sum(-1))/self.lengthscale*partial)
#np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial)
target[1] += np.sum(self._K_dvar*self.variance*(self._K_dist2.sum(-1))/self.lengthscale*dL_dK)
#np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*dL_dK)
def dKdiag_dtheta(self,partial,X,target):
def dKdiag_dtheta(self,dL_dKdiag,X,target):
#NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(partial)
target[0] += np.sum(dL_dKdiag)
def dK_dX(self,partial,X,X2,target):
def dK_dX(self,dL_dK,X,X2,target):
self._K_computations(X,X2)
_K_dist = X[:,None,:]-X2[None,:,:]
dK_dX = np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0)
target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,partial,X,target):
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
@ -113,69 +113,69 @@ class rbf(kernpart):
def psi0(self,Z,mu,S,target):
target += self.variance
def dpsi0_dtheta(self,partial,Z,mu,S,target):
target[0] += np.sum(partial)
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
target[0] += np.sum(dL_dpsi0)
def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
pass
def psi1(self,Z,mu,S,target):
self._psi_computations(Z,mu,S)
target += self._psi1
def dpsi1_dtheta(self,partial,Z,mu,S,target):
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
self._psi_computations(Z,mu,S)
denom_deriv = S[:,None,:]/(self.lengthscale**3+self.lengthscale*S[:,None,:])
d_length = self._psi1[:,:,None]*(self.lengthscale*np.square(self._psi1_dist/(self.lengthscale2+S[:,None,:])) + denom_deriv)
target[0] += np.sum(partial*self._psi1/self.variance)
dpsi1_dlength = d_length*partial[:,:,None]
target[0] += np.sum(dL_dpsi1*self._psi1/self.variance)
dpsi1_dlength = d_length*dL_dpsi1[:,:,None]
if not self.ARD:
target[1] += dpsi1_dlength.sum()
else:
target[1:] += dpsi1_dlength.sum(0).sum(0)
def dpsi1_dZ(self,partial,Z,mu,S,target):
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
self._psi_computations(Z,mu,S)
denominator = (self.lengthscale2*(self._psi1_denom))
dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator))
target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0)
target += np.sum(dL_dpsi1.T[:,:,None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
self._psi_computations(Z,mu,S)
tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom
target_mu += np.sum(partial.T[:, :, None]*tmp*self._psi1_dist,1)
target_S += np.sum(partial.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1)
target_mu += np.sum(dL_dpsi1.T[:, :, None]*tmp*self._psi1_dist,1)
target_S += np.sum(dL_dpsi1.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1)
def psi2(self,Z,mu,S,target):
self._psi_computations(Z,mu,S)
target += self._psi2
def dpsi2_dtheta(self,partial,Z,mu,S,target):
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
"""Shape N,M,M,Ntheta"""
self._psi_computations(Z,mu,S)
d_var = 2.*self._psi2/self.variance
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
target[0] += np.sum(partial*d_var)
dpsi2_dlength = d_length*partial[:,:,:,None]
target[0] += np.sum(dL_dpsi2*d_var)
dpsi2_dlength = d_length*dL_dpsi2[:,:,:,None]
if not self.ARD:
target[1] += dpsi2_dlength.sum()
else:
target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0)
def dpsi2_dZ(self,partial,Z,mu,S,target):
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
self._psi_computations(Z,mu,S)
term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q
term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q
dZ = self._psi2[:,:,:,None] * (term1[None] + term2)
target += (partial[:,:,:,None]*dZ).sum(0).sum(0)
target += (dL_dpsi2[:,:,:,None]*dZ).sum(0).sum(0)
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """
self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
target_mu += (partial[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
target_S += (partial[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
target_mu += (dL_dpsi2[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
#---------------------------------------#

92
GPy/kern/symmetric.py Normal file
View file

@ -0,0 +1,92 @@
# Copyright (c) 2012 James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart
import numpy as np
class symmetric(kernpart):
"""
Symmetrical kernels
:param k: the kernel to symmetrify
:type k: kernpart
:param transform: the transform to use in symmetrification (allows symmetry on specified axes)
:type transform: A numpy array (D x D) specifiying the transform
:rtype: kernpart
"""
def __init__(self,k,transform=None):
if transform is None:
transform = np.eye(k.D)*-1.
assert transform.shape == (k.D, k.D)
self.transform = transform
self.D = k.D
self.Nparam = k.Nparam
self.name = k.name + '_symm'
self.k = k
self._set_params(k._get_params())
def _get_params(self):
"""return the value of the parameters."""
return self.k._get_params()
def _set_params(self,x):
"""set the value of the parameters."""
self.k._set_params(x)
def _get_param_names(self):
"""return parameter names."""
return self.k._get_param_names()
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
AX = np.dot(X,self.transform)
if X2 is None:
X2 = X
AX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k.K(X,X2,target)
self.k.K(AX,X2,target)
self.k.K(X,AX2,target)
self.k.K(AX,AX2,target)
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
AX = np.dot(X,self.transform)
if X2 is None:
X2 = X
ZX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k.dK_dtheta(dL_dK,X,X2,target)
self.k.dK_dtheta(dL_dK,AX,X2,target)
self.k.dK_dtheta(dL_dK,X,AX2,target)
self.k.dK_dtheta(dL_dK,AX,AX2,target)
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
AX = np.dot(X,self.transform)
if X2 is None:
X2 = X
ZX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k.dK_dX(dL_dK, X, X2, target)
self.k.dK_dX(dL_dK, AX, X2, target)
self.k.dK_dX(dL_dK, X, AX2, target)
self.k.dK_dX(dL_dK, AX ,AX2, target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
foo = np.zeros((X.shape[0],X.shape[0]))
self.K(X,X,foo)
target += np.diag(foo)
def dKdiag_dX(self,dL_dKdiag,X,target):
raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
raise NotImplementedError

View file

@ -37,50 +37,50 @@ class white(kernpart):
def Kdiag(self,X,target):
target += self.variance
def dK_dtheta(self,partial,X,X2,target):
def dK_dtheta(self,dL_dK,X,X2,target):
if X.shape==X2.shape:
if np.all(X==X2):
target += np.trace(partial)
target += np.trace(dL_dK)
def dKdiag_dtheta(self,partial,X,target):
target += np.sum(partial)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.sum(dL_dKdiag)
def dK_dX(self,partial,X,X2,target):
def dK_dX(self,dL_dK,X,X2,target):
pass
def dKdiag_dX(self,partial,X,target):
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
def psi0(self,Z,mu,S,target):
target += self.variance
def dpsi0_dtheta(self,partial,Z,mu,S,target):
target += partial.sum()
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
target += dL_dpsi0.sum()
def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
pass
def psi1(self,Z,mu,S,target):
pass
def dpsi1_dtheta(self,partial,Z,mu,S,target):
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
pass
def dpsi1_dZ(self,partial,Z,mu,S,target):
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
pass
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
pass
def psi2(self,Z,mu,S,target):
pass
def dpsi2_dZ(self,partial,Z,mu,S,target):
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
pass
def dpsi2_dtheta(self,partial,Z,mu,S,target):
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
pass
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
pass

View file

@ -17,7 +17,7 @@ class EP(likelihood):
self.epsilon = epsilon
self.eta, self.delta = power_ep
self.data = data
self.N = self.data.size
self.N, self.D = self.data.shape
self.is_heteroscedastic = True
self.Nparams = 0
@ -29,7 +29,7 @@ class EP(likelihood):
#initial values for the GP variables
self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)
self.precision = np.ones(self.N)[:,None]
self.Z = 0
self.YYT = None
@ -54,18 +54,14 @@ class EP(likelihood):
self.Y = mu_tilde[:,None]
self.YYT = np.dot(self.Y,self.Y.T)
self.precision = self.tau_tilde
self.covariance_matrix = np.diag(1./self.precision)
self.covariance_matrix = np.diag(1./self.tau_tilde)
self.precision = self.tau_tilde[:,None]
def fit_full(self,K):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006.
"""
#Prior distribution parameters: p(f|X) = N(f|0,K)
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.N)
Sigma = K.copy()
@ -114,7 +110,7 @@ class EP(likelihood):
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B)
V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
V,info = linalg.lapack.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
@ -124,13 +120,14 @@ class EP(likelihood):
return self._compute_GP_variables()
def fit_DTC(self, Knn_diag, Kmn, Kmm):
#def fit_DTC(self, Knn_diag, Kmn, Kmm):
def fit_DTC(self, Kmm, Kmn):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013.
"""
#TODO: this doesn;t work with uncertain inputs!
#TODO: this doesn't work with uncertain inputs!
"""
Prior approximation parameters:
@ -158,12 +155,12 @@ class EP(likelihood):
sigma_ = 1./tau_
mu_ = v_/tau_
"""
tau_ = np.empty(self.N,dtype=float)
v_ = np.empty(self.N,dtype=float)
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
Z_hat = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
@ -172,49 +169,50 @@ class EP(likelihood):
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
np1 = [tau_tilde.copy()]
np2 = [v_tilde.copy()]
np1 = [self.tau_tilde.copy()]
np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
tau_[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
v_[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],tau_[i],v_[i])
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
tau_tilde[i] = tau_tilde[i] + Delta_tau
v_tilde[i] = v_tilde[i] + Delta_v
self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update
LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
L = jitchol(LLT)
V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu = mu + (Delta_v-Delta_tau*mu[i])*si
self.iterations += 1
#Sigma recomputation with Cholesky decompositon
LLT0 = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
LLT0 = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1)
V2,info = linalg.flapack.dtrtrs(L.T,V,lower=0)
V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
V2,info = linalg.lapack.flapack.dtrtrs(L.T,V,lower=0)
Sigma_diag = np.sum(V*V,-2)
Knmv_tilde = np.dot(Kmn,v_tilde)
Knmv_tilde = np.dot(Kmn,self.v_tilde)
mu = np.dot(V2.T,Knmv_tilde)
epsilon_np1 = sum((tau_tilde-np1[-1])**2)/self.N
epsilon_np2 = sum((v_tilde-np2[-1])**2)/self.N
np1.append(tau_tilde.copy())
np2.append(v_tilde.copy())
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N
np1.append(self.tau_tilde.copy())
np2.append(self.v_tilde.copy())
self._compute_GP_variables()
def fit_FITC(self, Knn_diag, Kmn):
def fit_FITC(self, Kmm, Kmn, Knn_diag):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008.
"""
M = Kmm.shape[0]
"""
Prior approximation parameters:
@ -235,7 +233,7 @@ class EP(likelihood):
mu = w + P*gamma
"""
self.w = np.zeros(self.N)
self.gamma = np.zeros(self.M)
self.gamma = np.zeros(M)
mu = np.zeros(self.N)
P = P0.copy()
R = R0.copy()
@ -271,7 +269,7 @@ class EP(likelihood):
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i])
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
@ -281,10 +279,10 @@ class EP(likelihood):
dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i]
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = P[i,:].reshape(1,self.M)
pi_ = P[i,:].reshape(1,M)
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
RTR = np.dot(R.T,np.dot(np.eye(M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T
self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
@ -296,8 +294,8 @@ class EP(likelihood):
Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
P = (Diag / Diag0)[:,None] * P0
RPT0 = np.dot(R0,P0.T)
L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
R,info = linalg.flapack.dtrtrs(L,R0,lower=1)
L = jitchol(np.eye(M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
self.w = Diag * self.v_tilde

View file

@ -8,7 +8,7 @@ class Gaussian(likelihood):
self.Z = 0. # a correction factor which accounts for the approximation made
N, self.D = data.shape
#normalisation
#normaliztion
if normalize:
self._mean = data.mean(0)[None,:]
self._std = data.std(0)[None,:]
@ -45,7 +45,7 @@ class Gaussian(likelihood):
def predictive_values(self,mu,var):
"""
Un-normalise the prediction and add the likelihood variance, then return the 5%, 95% interval
Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
"""
mean = mu*self._std + self._mean
true_var = (var + self._variance)*self._std**2

View file

@ -37,8 +37,8 @@ class probit(likelihood_function):
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
# TODO: some version of assert np.sum(np.abs(Y)-1) == 0, "Output values must be either -1 or 1"
if data_i == 0: data_i = -1 #NOTE Binary classification works better classes {-1,1}, 1D-plotting works better with classes {0,1}.
if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
# TODO: some version of assert
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = stats.norm.cdf(z)
phi = stats.norm.pdf(z)

View file

@ -41,7 +41,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
def _get_param_names(self):
X_names = sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[])
S_names = sum([['S_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[])
S_names = sum([['X_variance_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[])
return (X_names + S_names + sparse_GP._get_param_names(self))
def _get_params(self):
@ -83,3 +83,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self)))
def plot_latent(self, *args, **kwargs):
input_1, input_2 = GPLVM.plot_latent(self, *args, **kwargs)
pb.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')

View file

@ -30,7 +30,6 @@ class GP(model):
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
#FIXME normalize vs normalise
def __init__(self, X, likelihood, kernel, normalize_X=False, Xslices=None):
# parse arguments
@ -41,7 +40,7 @@ class GP(model):
assert isinstance(kernel, kern.kern)
self.kern = kernel
#here's some simple normalisation for the inputs
#here's some simple normalization for the inputs
if normalize_X:
self._Xmean = X.mean(0)[None,:]
self._Xstd = X.std(0)[None,:]
@ -60,13 +59,19 @@ class GP(model):
model.__init__(self)
def dL_dZ(self):
"""
TODO: one day we might like to learn Z by gradient methods?
"""
return np.zeros_like(self.Z)
def _set_params(self,p):
self.kern._set_params_transformed(p[:self.kern.Nparam])
#self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas
self.K = self.kern.K(self.X,slices1=self.Xslices)
self.K = self.kern.K(self.X,slices1=self.Xslices,slices2=self.Xslices)
self.K += self.likelihood.covariance_matrix
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
@ -123,12 +128,12 @@ class GP(model):
For the likelihood parameters, pass in alpha = K^-1 y
"""
return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK,X=self.X,slices1=self.Xslices,slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self,_Xnew,slices=None, full_cov=False):
"""
Internal helper function for making predictions, does not account
for normalisation or likelihood
for normalization or likelihood
"""
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
@ -166,10 +171,10 @@ class GP(model):
- If a list of booleans, specifying which kernel parts are active
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
This is to allow for different normalisations of the output dimensions.
This is to allow for different normalizations of the output dimensions.
"""
#normalise X values
#normalize X values
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
mu, var = self._raw_predict(Xnew, slices, full_cov)
@ -181,7 +186,7 @@ class GP(model):
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, full_cov=False):
"""
Plot the GP's view of the world, where the data is normalised and the likelihood is Gaussian
Plot the GP's view of the world, where the data is normalized and the likelihood is Gaussian
:param samples: the number of a posteriori samples to plot
:param which_data: which if the training data to plot (default all)
@ -197,7 +202,7 @@ class GP(model):
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions using which_data and which_functions
Plot the data's view of the world, with non-normalised values and GP predictions passed through the likelihood
Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood
"""
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
@ -215,7 +220,7 @@ class GP(model):
Ysim = np.random.multivariate_normal(m.flatten(),v,samples)
gpplot(Xnew,m,m-2*np.sqrt(np.diag(v)[:,None]),m+2*np.sqrt(np.diag(v))[:,None])
for i in range(samples):
pb.plot(Xnew,Ysim[i,:],Tango.coloursHex['darkBlue'],linewidth=0.25)
pb.plot(Xnew,Ysim[i,:],Tango.colorsHex['darkBlue'],linewidth=0.25)
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
pb.xlim(xmin,xmax)
ymin,ymax = min(np.append(self.likelihood.Y,m-2*np.sqrt(np.diag(v)[:,None]))), max(np.append(self.likelihood.Y,m+2*np.sqrt(np.diag(v)[:,None])))
@ -236,7 +241,12 @@ class GP(model):
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,full_cov=False):
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,levels=20):
"""
TODO: Docstrings!
:param levels: for 2D plotting, the number of contour levels to use
"""
# TODO include samples
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
@ -258,6 +268,8 @@ class GP(model):
if hasattr(self,'Z'):
Zu = self.Z*self._Xstd + self._Xmean
pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
elif self.X.shape[1]==2: #FIXME
resolution = resolution or 50
@ -265,10 +277,13 @@ class GP(model):
x, y = np.linspace(xmin[0],xmax[0],resolution), np.linspace(xmin[1],xmax[1],resolution)
m, var, lower, upper = self.predict(Xnew, slices=which_functions)
m = m.reshape(resolution,resolution).T
pb.contour(x,y,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
pb.contour(x,y,m,levels,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
Yf = self.likelihood.Y.flatten()
pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1])
if hasattr(self,'Z'):
pb.plot(self.Z[:,0],self.Z[:,1],'wo')
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

View file

@ -10,6 +10,7 @@ from ..core import model
from ..util.linalg import pdinv, PCA
from GP import GP
from ..likelihoods import Gaussian
from .. import util
class GPLVM(GP):
"""
@ -59,5 +60,66 @@ class GPLVM(GP):
mu, var, upper, lower = self.predict(Xnew)
pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
def plot_latent(self):
raise NotImplementedError
def plot_latent(self,labels=None, which_indices=None, resolution=50):
"""
:param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc)
:param resolution: the resolution of the grid on which to evaluate the predictive variance
"""
util.plot.Tango.reset()
if labels is None:
labels = np.ones(self.N)
if which_indices is None:
if self.Q==1:
input_1 = 0
input_2 = None
if self.Q==2:
input_1, input_2 = 0,1
else:
#try to find a linear of RBF kern in the kernel
k = [p for p in self.kern.parts if p.name in ['rbf','linear']]
if (not len(k)==1) or (not k[0].ARD):
raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
k = k[0]
if k.name=='rbf':
input_1, input_2 = np.argsort(k.lengthscale)[:2]
elif k.name=='linear':
input_1, input_2 = np.argsort(k.variances)[::-1][:2]
#first, plot the output variance as a function of the latent space
Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution)
Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1]))
Xtest_full[:, :2] = Xtest
mu, var, low, up = self.predict(Xtest_full)
var = var[:, :2]
pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear')
for i,ul in enumerate(np.unique(labels)):
if type(ul) is np.string_:
this_label = ul
elif type(ul) is np.int64:
this_label = 'class %i'%ul
else:
this_label = 'class %i'%i
index = np.nonzero(labels==ul)[0]
if self.Q==1:
x = self.X[index,input_1]
y = np.zeros(index.size)
else:
x = self.X[index,input_1]
y = self.X[index,input_2]
pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0)
pb.xlabel('latent dimension %i'%input_1)
pb.ylabel('latent dimension %i'%input_2)
if not np.all(labels==1.):
pb.legend(loc=0,numpoints=1)
pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1])
return input_1, input_2

View file

@ -10,4 +10,4 @@ from GPLVM import GPLVM
from warped_GP import warpedGP
from sparse_GPLVM import sparse_GPLVM
from uncollapsed_sparse_GP import uncollapsed_sparse_GP
from BGPLVM import Bayesian_GPLVM
from Bayesian_GPLVM import Bayesian_GPLVM

View file

@ -3,7 +3,7 @@
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
from ..util.plot import gpplot
from .. import kern
from GP import GP
@ -54,37 +54,44 @@ class sparse_GP(GP):
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
#normalise X uncertainty also
#normalize X uncertainty also
if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd)
def _computations(self):
# TODO find routine to multiply triangular matrices
#TODO: slices for psi statistics (easy enough)
sf = self.scale_factor
sf2 = sf**2
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty)
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
if self.likelihood.is_heteroscedastic:
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.reshape(self.N,1,1)/sf2)).sum(0)
#TODO: what is the likelihood is heterscedatic and there are multiple independent outputs?
else:
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
self.psi1 = self.kern.K(self.Z,self.X)
if self.likelihood.is_heteroscedastic:
tmp = self.psi1*(np.sqrt(self.likelihood.precision.reshape(self.N,1))/sf)
self.psi2 = None
def _computations(self):
#TODO: find routine to multiply triangular matrices
#TODO: slices for psi statistics (easy enough)
sf = self.scale_factor
sf2 = sf**2
#The rather complex computations of psi2_beta_scaled
if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
if self.has_uncertain_inputs:
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
else:
if self.has_uncertain_inputs:
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:] # TODO: remove me for efficiency and stability
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
@ -95,28 +102,44 @@ class sparse_GP(GP):
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
tmp = np.dot(self.Lmi.T, self.LBi.T)
self.C = np.dot(tmp,tmp.T)
self.Cpsi1V = np.dot(self.C,self.psi1V)
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2
# Compute dL_dpsi # FIXME: this is untested for the het. case
self.dL_dpsi0 = - 0.5 * self.D * self.likelihood.precision * np.ones(self.N)
self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
#self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
if self.likelihood.is_heteroscedastic:
self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
if self.has_uncertain_inputs:
self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
else:
self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
self.dL_dpsi2 = None
else:
self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
#repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0)
if self.has_uncertain_inputs:
#repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0)
else:
self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2,self.psi1)
self.dL_dpsi2 = None
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
#self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
#self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) + np.dot(np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1,self.Kmmi) # d(C+D)
#the partial derivative vector for the likelihood
if self.likelihood.Nparams ==0:
@ -130,18 +153,31 @@ class sparse_GP(GP):
#self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
else:
#likelihood is not heterscedatic
beta = self.likelihood.precision
dbeta = 0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y))
dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2)
dbeta += - 0.5 * self.D * np.sum(self.Bi*self.A)/beta
dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta
self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2
if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2)
else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = 0.5*np.trace(self.Cpsi1VVpsi1)
return A+B+C+D
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
self._compute_kernel_matrices()
self._computations()
def _get_params(self):
@ -150,17 +186,20 @@ class sparse_GP(GP):
def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self)
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2
if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
return A+B+C+D
self.likelihood.fit_DTC(self.Kmm,self.psi1)
#self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
@ -173,15 +212,9 @@ class sparse_GP(GP):
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z,self.X, self.X_uncertainty)
else:
#re-cast computations in psi2 back to psi1:
#dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
if not self.likelihood.is_heteroscedastic:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2[0,:,:],self.psi1)
else:
raise NotImplementedError, "TODO"
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta
@ -195,16 +228,11 @@ class sparse_GP(GP):
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
else:
#re-cast computations in psi2 back to psi1:
if not self.likelihood.is_heteroscedastic:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2[0,:,:],self.psi1)
else:
raise NotImplementedError, "TODO"
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X)
return dL_dZ
def _raw_predict(self, Xnew, slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation"""
"""Internal helper function for making predictions, does not account for normalization"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
@ -216,14 +244,3 @@ class sparse_GP(GP):
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
def plot(self, *args, **kwargs):
"""
Plot the fitted model: just call the GP plot function and then add inducing inputs
"""
GP.plot(self,*args,**kwargs)
if self.Q==1:
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
if self.Q==2:
pb.plot(self.Z[:,0],self.Z[:,1],'wo')

View file

@ -42,11 +42,8 @@ class sparse_GPLVM(sparse_GP_regression, GPLVM):
return sparse_GP_regression.log_likelihood(self)
def dL_dX(self):
#dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1)
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2[0,:,:],self.psi1)
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0,self.X)
dL_dX += self.kern.dK_dX(dL_dpsi1.T,self.X,self.Z)
dL_dX += self.kern.dK_dX(self.dL_dpsi1.T,self.X,self.Z)
return dL_dX
@ -58,3 +55,7 @@ class sparse_GPLVM(sparse_GP_regression, GPLVM):
#passing Z without a small amout of jitter will induce the white kernel where we don;t want it!
mu, var, upper, lower = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001)
pb.plot(mu[:, 0] , mu[:, 1], 'ko')
def plot_latent(self, *args, **kwargs):
input_1, input_2 = GPLVM.plot_latent(*args, **kwargs)
pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w')

View file

@ -93,7 +93,7 @@ class uncollapsed_sparse_GP(sparse_GP):
return A+B+C+D+E
def _raw_predict(self, Xnew, slices,full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation"""
"""Internal helper function for making predictions, does not account for normalization"""
Kx = self.kern.K(Xnew,self.Z)
mu = mdot(Kx,self.Kmmi,self.q_u_expectation[0])

View file

@ -1,42 +1,61 @@
Fails in weird ways if you pass a integer as the input instead of a double to the kernel.
the predict method for GP_regression returns a covariance matrix which is a bad idea as this takes a lot to compute, it's also confusing for first time users. Should only be returned if the user explicitly requests it.
FIXED
The Matern kernels (at least the 52) still is working in the ARD manner which means it wouldn't run for very large input dimension. Needs to be fixed to match the RBF.
Implementing new covariances is too complicated at the moment. We need a barebones example of what to implement and where. Commenting in the covariance matrices needs to be improved. It's not clear to a user what all the psi parts are for. Maybe we need a cut down and simplified example to help with this (perhaps a cut down version of the RBF?). And then we should provide a simple list of what you need to do to get a new kernel going.
Missing kernels: polynomial, rational quadratic.
Kernel implementations are far to obscure. Need to be easily readable for a first time user.
Need an implementation of scaled conjugate gradients for the optimizers.
Need an implementation of gradient descent for the optimizers (works well with GP-LVM for small random initializations)
Need Carl Rasmussen's permission to add his conjugate gradients algorithm. In fact, we can just provide a hook for it, and post a separate python implementation of his algorithm.
Change get_param and set_param to get_params and set_params
Get constrain param by default inside model creation.
Randomize doesn't seem to cover a wide enough range for restarts ... try it for a model where inputs are widely spaced apart and length scale is too short. Sampling from N(0,1) is too conservative. Dangerous for people who naively use restarts. Since we have the model we could maybe come up with some sensible heuristics for setting these things. Maybe we should also consider having '.initialize()'. If we can't do this well we should disable the restart method.
Tolerances for optimizers, do we need to introduce some standardization? At the moment does each have its own defaults?
Do all optimizers work only in terms of function evaluations? Do we need to check for one that uses iterations?
When computing kernel.K for kernels like rbf, you can't compute a version with rbf.K(X) you have to do rbf.K(X, X)
FIXED
Change Youter to YYT (Youter doesn't mean anything for matrices).
FIXED
Change get_param and set_param to get_params and set_params
FIXED
Fails in weird ways if you pass a integer as the input instead of a double to the kernel.
FIXED
The Matern kernels (at least the 52) still is working in the ARD manner which means it wouldn't run for very large input dimension. Needs to be fixed to match the RBF.
FIXED
Implementing new covariances is too complicated at the moment. We need a barebones example of what to implement and where. Commenting in the covariance matrices needs to be improved. It's not clear to a user what all the psi parts are for. Maybe we need a cut down and simplified example to help with this (perhaps a cut down version of the RBF?). And then we should provide a simple list of what you need to do to get a new kernel going.
TODO, a priority for this release
Missing kernels: polynomial, rational quadratic.
TODO, should be straightforward when the above is fixed.
Need an implementation of scaled conjugate gradients for the optimizers.
UPSTREAM: scipy are tidying up the optimize module. let's wait for their next release.
Need an implementation of gradient descent for the optimizers (works well with GP-LVM for small random initializations)
As above.
Need Carl Rasmussen's permission to add his conjugate gradients algorithm. In fact, we can just provide a hook for it, and post a separate python implementation of his algorithm.
Any word from Carl yet?
Get constrain param by default inside model creation.
Well, we have ensure_default_constraints. There are some techinical difficulties in doing it inside model creation, so perhaps this is something for a later release.
Bug when running classification.crescent_data()
TODO.
Do all optimizers work only in terms of function evaluations? Do we need to check for one that uses iterations?
Upstream: Waiting for the new scipy, where the optimisers have been unified. Obviously it's be much better to be able to specify a unified set of args.
Tolerances for optimizers, do we need to introduce some standardization? At the moment does each have its own defaults?
Upstream, as above
A dictionary for parameter storage? So we can go through names easily?
Wontfix. Dictionaries bring up all kinds of problems since they're not ordered. it's easy enough to do:
for val, name in zip(m._get_params(), m._get_param_names()): foobar
A flag on covariance functions that indicates when they are not associated with an underlying function (like white noise or a coregionalization matrix).
TODO, agree this would be helpful.
Diagonal noise covariance function
TODO this is now straightforward using the likelihood framework, or as a kern. NF also requires a similar kind of kern function (a fixed form kernel)
Long term: automatic Lagrange multiplier calculation for optimizers: constrain two parameters in an unusual way and the model automatically does the Lagrangian. Also augment the parameters with new ones, so define data variance to be white noise plus RBF variance and optimize over that and signal to noise ratio ... for example constrain the sum of variances to equal the known variance of the data.
When computing kernel.K for kernels like rbf, you can't compute a version with rbf.K(X) you have to do rbf.K(X, X)
Randomize doesn't seem to cover a wide enough range for restarts ... try it for a model where inputs are widely spaced apart and length scale is too short. Sampling from N(0,1) is too conservative. Dangerous for people who naively use restarts. Since we have the model we could maybe come up with some sensible heuristics for setting these things. Maybe we should also consider having '.initialize()'. If we can't do this well we should disable the restart method.
Excellent proposal, but lots of work: suggest leaving for the next release?
the predict method for GP_regression returns a covariance matrix which is a bad idea as this takes a lot to compute, it's also confusing for first time users. Should only be returned if the user explicitly requests it.

0
GPy/testing/__init__.py Normal file
View file

View file

@ -12,9 +12,10 @@ class BGPLVMTests(unittest.TestCase):
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y -= Y.mean(axis=0)
k = GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
m.constrain_positive('(rbf|bias|noise|white|S)')
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
@ -24,9 +25,10 @@ class BGPLVMTests(unittest.TestCase):
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y -= Y.mean(axis=0)
k = GPy.kern.linear(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
m.constrain_positive('(linear|bias|noise|white|S)')
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
@ -36,13 +38,41 @@ class BGPLVMTests(unittest.TestCase):
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y -= Y.mean(axis=0)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
m.constrain_positive('(rbf|bias|noise|white|S)')
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_bias_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y -= Y.mean(axis=0)
k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
#@unittest.skip('psi2 cross terms are NotImplemented for this combination')
def test_linear_bias_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
Y -= Y.mean(axis=0)
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -0,0 +1,81 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
import inspect
import pkgutil
import os
import random
from nose.tools import nottest
class ExamplesTests(unittest.TestCase):
def _checkgrad(self, model):
self.assertTrue(model.checkgrad())
def _model_instance(self, model):
self.assertTrue(isinstance(model, GPy.models))
"""
def model_instance_generator(model):
def check_model_returned(self):
self._model_instance(model)
return check_model_returned
def checkgrads_generator(model):
def model_checkgrads(self):
self._checkgrad(model)
return model_checkgrads
"""
def model_checkgrads(model):
model.randomize()
assert model.checkgrad()
def model_instance(model):
assert isinstance(model, GPy.core.model)
@nottest
def test_models():
examples_path = os.path.dirname(GPy.examples.__file__)
#Load modules
for loader, module_name, is_pkg in pkgutil.iter_modules([examples_path]):
#Load examples
module_examples = loader.find_module(module_name).load_module(module_name)
print "MODULE", module_examples
print "Before"
print inspect.getmembers(module_examples, predicate=inspect.isfunction)
functions = [ func for func in inspect.getmembers(module_examples, predicate=inspect.isfunction) if func[0].startswith('_') is False ][::-1]
print "After"
print functions
for example in functions:
if example[0] in ['oil', 'silhouette', 'GPLVM_oil_100']:
print "SKIPPING"
continue
print "Testing example: ", example[0]
#Generate model
model = example[1]()
print model
#Create tests for instance check
"""
test = model_instance_generator(model)
test.__name__ = 'test_instance_%s' % example[0]
setattr(ExamplesTests, test.__name__, test)
#Create tests for checkgrads check
test = checkgrads_generator(model)
test.__name__ = 'test_checkgrads_%s' % example[0]
setattr(ExamplesTests, test.__name__, test)
"""
model_checkgrads.description = 'test_checkgrads_%s' % example[0]
yield model_checkgrads, model
model_instance.description = 'test_instance_%s' % example[0]
yield model_instance, model
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -0,0 +1,47 @@
# Copyright (c) 2012, Nicolo Fusi
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class GPLVMTests(unittest.TestCase):
def test_bias_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.GPLVM(Y, Q, kernel = k)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
def test_linear_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.linear(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.GPLVM(Y, Q, kernel = k)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.GPLVM(Y, Q, kernel = k)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -8,14 +8,29 @@ import GPy
class KernelTests(unittest.TestCase):
def test_kerneltie(self):
K = GPy.kern.rbf(5, ARD=True)
K.tie_param('[01]')
K.tie_params('[01]')
K.constrain_fixed('2')
X = np.random.rand(5,5)
Y = np.ones((5,1))
m = GPy.models.GP_regression(X,Y,K)
print m
self.assertTrue(m.checkgrad())
def test_coregionalisation(self):
X1 = np.random.rand(50,1)*8
X2 = np.random.rand(30,1)*5
index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
X = np.hstack((np.vstack((X1,X2)),index))
Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
Y2 = np.sin(X2) + np.random.randn(*X2.shape)*0.05 + 2.
Y = np.vstack((Y1,Y2))
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalise(2,1)
k = k1.prod_orthogonal(k2)
m = GPy.models.GP_regression(X,Y,kernel=k)
self.assertTrue(m.checkgrad())
if __name__ == "__main__":

View file

@ -0,0 +1,48 @@
# Copyright (c) 2012, Nicolo Fusi, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class sparse_GPLVMTests(unittest.TestCase):
def test_bias_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.sparse_GPLVM(Y, Q, kernel = k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
@unittest.skip('linear kernels do not have dKdiag_dX')
def test_linear_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.linear(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.sparse_GPLVM(Y, Q, kernel = k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.sparse_GPLVM(Y, Q, kernel = k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -157,13 +157,28 @@ class GradientTests(unittest.TestCase):
def test_GP_EP_probit(self):
N = 20
X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None]
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.models.GP(X, likelihood, kernel)
m.ensure_default_constraints()
self.assertTrue(m.EPEM)
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
#self.assertTrue(m.EPEM)
def test_sparse_EP_DTC_probit(self):
N = 20
X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
Z = np.linspace(0,15,4)[:,None]
kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.models.sparse_GP(X, likelihood, kernel,Z)
m.ensure_default_constraints()
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
@unittest.skip("FITC will be broken for a while")
def test_generalized_FITC(self):
@ -177,17 +192,6 @@ class GradientTests(unittest.TestCase):
m.approximate_likelihood()
self.assertTrue(m.checkgrad())
def test_warped_GP(self):
xmin, xmax = 1, 2.5*np.pi
b, C, SNR = 1, 0, 0.1
X = np.linspace(xmin, xmax, 500)
y = b*X + C + 1*np.sin(X)
y += 0.05*np.random.randn(len(X))
X, y = X[:, None], y[:, None]
m = GPy.models.warpedGP(X, y, warping_terms = 3)
m.constrain_positive('(tanh_a|tanh_b|rbf|white|bias)')
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."

View file

@ -25,7 +25,7 @@ def fewerXticks(ax=None,divideby=2):
ax.set_xticks(ax.get_xticks()[::divideby])
coloursHex = {\
colorsHex = {\
"Aluminium6":"#2e3436",\
"Aluminium5":"#555753",\
"Aluminium4":"#888a85",\
@ -54,9 +54,9 @@ coloursHex = {\
"mediumButter":"#edd400",\
"darkButter":"#c4a000"}
darkList = [coloursHex['darkBlue'],coloursHex['darkRed'],coloursHex['darkGreen'], coloursHex['darkOrange'], coloursHex['darkButter'], coloursHex['darkPurple'], coloursHex['darkChocolate'], coloursHex['Aluminium6']]
mediumList = [coloursHex['mediumBlue'], coloursHex['mediumRed'],coloursHex['mediumGreen'], coloursHex['mediumOrange'], coloursHex['mediumButter'], coloursHex['mediumPurple'], coloursHex['mediumChocolate'], coloursHex['Aluminium5']]
lightList = [coloursHex['lightBlue'], coloursHex['lightRed'],coloursHex['lightGreen'], coloursHex['lightOrange'], coloursHex['lightButter'], coloursHex['lightPurple'], coloursHex['lightChocolate'], coloursHex['Aluminium4']]
darkList = [colorsHex['darkBlue'],colorsHex['darkRed'],colorsHex['darkGreen'], colorsHex['darkOrange'], colorsHex['darkButter'], colorsHex['darkPurple'], colorsHex['darkChocolate'], colorsHex['Aluminium6']]
mediumList = [colorsHex['mediumBlue'], colorsHex['mediumRed'],colorsHex['mediumGreen'], colorsHex['mediumOrange'], colorsHex['mediumButter'], colorsHex['mediumPurple'], colorsHex['mediumChocolate'], colorsHex['Aluminium5']]
lightList = [colorsHex['lightBlue'], colorsHex['lightRed'],colorsHex['lightGreen'], colorsHex['lightOrange'], colorsHex['lightButter'], colorsHex['lightPurple'], colorsHex['lightChocolate'], colorsHex['Aluminium4']]
def currentDark():
return darkList[-1]
@ -76,85 +76,85 @@ def nextLight():
return lightList[-1]
def reset():
while not darkList[0]==coloursHex['darkBlue']:
while not darkList[0]==colorsHex['darkBlue']:
darkList.append(darkList.pop(0))
while not mediumList[0]==coloursHex['mediumBlue']:
while not mediumList[0]==colorsHex['mediumBlue']:
mediumList.append(mediumList.pop(0))
while not lightList[0]==coloursHex['lightBlue']:
while not lightList[0]==colorsHex['lightBlue']:
lightList.append(lightList.pop(0))
def setLightFigures():
mpl.rcParams['axes.edgecolor']=coloursHex['Aluminium6']
mpl.rcParams['axes.facecolor']=coloursHex['Aluminium2']
mpl.rcParams['axes.labelcolor']=coloursHex['Aluminium6']
mpl.rcParams['figure.edgecolor']=coloursHex['Aluminium6']
mpl.rcParams['figure.facecolor']=coloursHex['Aluminium2']
mpl.rcParams['grid.color']=coloursHex['Aluminium6']
mpl.rcParams['savefig.edgecolor']=coloursHex['Aluminium2']
mpl.rcParams['savefig.facecolor']=coloursHex['Aluminium2']
mpl.rcParams['text.color']=coloursHex['Aluminium6']
mpl.rcParams['xtick.color']=coloursHex['Aluminium6']
mpl.rcParams['ytick.color']=coloursHex['Aluminium6']
mpl.rcParams['axes.edgecolor']=colorsHex['Aluminium6']
mpl.rcParams['axes.facecolor']=colorsHex['Aluminium2']
mpl.rcParams['axes.labelcolor']=colorsHex['Aluminium6']
mpl.rcParams['figure.edgecolor']=colorsHex['Aluminium6']
mpl.rcParams['figure.facecolor']=colorsHex['Aluminium2']
mpl.rcParams['grid.color']=colorsHex['Aluminium6']
mpl.rcParams['savefig.edgecolor']=colorsHex['Aluminium2']
mpl.rcParams['savefig.facecolor']=colorsHex['Aluminium2']
mpl.rcParams['text.color']=colorsHex['Aluminium6']
mpl.rcParams['xtick.color']=colorsHex['Aluminium6']
mpl.rcParams['ytick.color']=colorsHex['Aluminium6']
def setDarkFigures():
mpl.rcParams['axes.edgecolor']=coloursHex['Aluminium2']
mpl.rcParams['axes.facecolor']=coloursHex['Aluminium6']
mpl.rcParams['axes.labelcolor']=coloursHex['Aluminium2']
mpl.rcParams['figure.edgecolor']=coloursHex['Aluminium2']
mpl.rcParams['figure.facecolor']=coloursHex['Aluminium6']
mpl.rcParams['grid.color']=coloursHex['Aluminium2']
mpl.rcParams['savefig.edgecolor']=coloursHex['Aluminium6']
mpl.rcParams['savefig.facecolor']=coloursHex['Aluminium6']
mpl.rcParams['text.color']=coloursHex['Aluminium2']
mpl.rcParams['xtick.color']=coloursHex['Aluminium2']
mpl.rcParams['ytick.color']=coloursHex['Aluminium2']
mpl.rcParams['axes.edgecolor']=colorsHex['Aluminium2']
mpl.rcParams['axes.facecolor']=colorsHex['Aluminium6']
mpl.rcParams['axes.labelcolor']=colorsHex['Aluminium2']
mpl.rcParams['figure.edgecolor']=colorsHex['Aluminium2']
mpl.rcParams['figure.facecolor']=colorsHex['Aluminium6']
mpl.rcParams['grid.color']=colorsHex['Aluminium2']
mpl.rcParams['savefig.edgecolor']=colorsHex['Aluminium6']
mpl.rcParams['savefig.facecolor']=colorsHex['Aluminium6']
mpl.rcParams['text.color']=colorsHex['Aluminium2']
mpl.rcParams['xtick.color']=colorsHex['Aluminium2']
mpl.rcParams['ytick.color']=colorsHex['Aluminium2']
def hex2rgb(hexcolor):
hexcolor = [hexcolor[1+2*i:1+2*(i+1)] for i in range(3)]
r,g,b = [int(n,16) for n in hexcolor]
return (r,g,b)
coloursRGB = dict([(k,hex2rgb(i)) for k,i in coloursHex.items()])
colorsRGB = dict([(k,hex2rgb(i)) for k,i in colorsHex.items()])
cdict_RB = {'red' :((0.,coloursRGB['mediumRed'][0]/256.,coloursRGB['mediumRed'][0]/256.),
(.5,coloursRGB['mediumPurple'][0]/256.,coloursRGB['mediumPurple'][0]/256.),
(1.,coloursRGB['mediumBlue'][0]/256.,coloursRGB['mediumBlue'][0]/256.)),
'green':((0.,coloursRGB['mediumRed'][1]/256.,coloursRGB['mediumRed'][1]/256.),
(.5,coloursRGB['mediumPurple'][1]/256.,coloursRGB['mediumPurple'][1]/256.),
(1.,coloursRGB['mediumBlue'][1]/256.,coloursRGB['mediumBlue'][1]/256.)),
'blue':((0.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.),
(.5,coloursRGB['mediumPurple'][2]/256.,coloursRGB['mediumPurple'][2]/256.),
(1.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.))}
cdict_RB = {'red' :((0.,colorsRGB['mediumRed'][0]/256.,colorsRGB['mediumRed'][0]/256.),
(.5,colorsRGB['mediumPurple'][0]/256.,colorsRGB['mediumPurple'][0]/256.),
(1.,colorsRGB['mediumBlue'][0]/256.,colorsRGB['mediumBlue'][0]/256.)),
'green':((0.,colorsRGB['mediumRed'][1]/256.,colorsRGB['mediumRed'][1]/256.),
(.5,colorsRGB['mediumPurple'][1]/256.,colorsRGB['mediumPurple'][1]/256.),
(1.,colorsRGB['mediumBlue'][1]/256.,colorsRGB['mediumBlue'][1]/256.)),
'blue':((0.,colorsRGB['mediumRed'][2]/256.,colorsRGB['mediumRed'][2]/256.),
(.5,colorsRGB['mediumPurple'][2]/256.,colorsRGB['mediumPurple'][2]/256.),
(1.,colorsRGB['mediumBlue'][2]/256.,colorsRGB['mediumBlue'][2]/256.))}
cdict_BGR = {'red' :((0.,coloursRGB['mediumBlue'][0]/256.,coloursRGB['mediumBlue'][0]/256.),
(.5,coloursRGB['mediumGreen'][0]/256.,coloursRGB['mediumGreen'][0]/256.),
(1.,coloursRGB['mediumRed'][0]/256.,coloursRGB['mediumRed'][0]/256.)),
'green':((0.,coloursRGB['mediumBlue'][1]/256.,coloursRGB['mediumBlue'][1]/256.),
(.5,coloursRGB['mediumGreen'][1]/256.,coloursRGB['mediumGreen'][1]/256.),
(1.,coloursRGB['mediumRed'][1]/256.,coloursRGB['mediumRed'][1]/256.)),
'blue':((0.,coloursRGB['mediumBlue'][2]/256.,coloursRGB['mediumBlue'][2]/256.),
(.5,coloursRGB['mediumGreen'][2]/256.,coloursRGB['mediumGreen'][2]/256.),
(1.,coloursRGB['mediumRed'][2]/256.,coloursRGB['mediumRed'][2]/256.))}
cdict_BGR = {'red' :((0.,colorsRGB['mediumBlue'][0]/256.,colorsRGB['mediumBlue'][0]/256.),
(.5,colorsRGB['mediumGreen'][0]/256.,colorsRGB['mediumGreen'][0]/256.),
(1.,colorsRGB['mediumRed'][0]/256.,colorsRGB['mediumRed'][0]/256.)),
'green':((0.,colorsRGB['mediumBlue'][1]/256.,colorsRGB['mediumBlue'][1]/256.),
(.5,colorsRGB['mediumGreen'][1]/256.,colorsRGB['mediumGreen'][1]/256.),
(1.,colorsRGB['mediumRed'][1]/256.,colorsRGB['mediumRed'][1]/256.)),
'blue':((0.,colorsRGB['mediumBlue'][2]/256.,colorsRGB['mediumBlue'][2]/256.),
(.5,colorsRGB['mediumGreen'][2]/256.,colorsRGB['mediumGreen'][2]/256.),
(1.,colorsRGB['mediumRed'][2]/256.,colorsRGB['mediumRed'][2]/256.))}
cdict_Alu = {'red' :((0./5,coloursRGB['Aluminium1'][0]/256.,coloursRGB['Aluminium1'][0]/256.),
(1./5,coloursRGB['Aluminium2'][0]/256.,coloursRGB['Aluminium2'][0]/256.),
(2./5,coloursRGB['Aluminium3'][0]/256.,coloursRGB['Aluminium3'][0]/256.),
(3./5,coloursRGB['Aluminium4'][0]/256.,coloursRGB['Aluminium4'][0]/256.),
(4./5,coloursRGB['Aluminium5'][0]/256.,coloursRGB['Aluminium5'][0]/256.),
(5./5,coloursRGB['Aluminium6'][0]/256.,coloursRGB['Aluminium6'][0]/256.)),
'green' :((0./5,coloursRGB['Aluminium1'][1]/256.,coloursRGB['Aluminium1'][1]/256.),
(1./5,coloursRGB['Aluminium2'][1]/256.,coloursRGB['Aluminium2'][1]/256.),
(2./5,coloursRGB['Aluminium3'][1]/256.,coloursRGB['Aluminium3'][1]/256.),
(3./5,coloursRGB['Aluminium4'][1]/256.,coloursRGB['Aluminium4'][1]/256.),
(4./5,coloursRGB['Aluminium5'][1]/256.,coloursRGB['Aluminium5'][1]/256.),
(5./5,coloursRGB['Aluminium6'][1]/256.,coloursRGB['Aluminium6'][1]/256.)),
'blue' :((0./5,coloursRGB['Aluminium1'][2]/256.,coloursRGB['Aluminium1'][2]/256.),
(1./5,coloursRGB['Aluminium2'][2]/256.,coloursRGB['Aluminium2'][2]/256.),
(2./5,coloursRGB['Aluminium3'][2]/256.,coloursRGB['Aluminium3'][2]/256.),
(3./5,coloursRGB['Aluminium4'][2]/256.,coloursRGB['Aluminium4'][2]/256.),
(4./5,coloursRGB['Aluminium5'][2]/256.,coloursRGB['Aluminium5'][2]/256.),
(5./5,coloursRGB['Aluminium6'][2]/256.,coloursRGB['Aluminium6'][2]/256.))}
cdict_Alu = {'red' :((0./5,colorsRGB['Aluminium1'][0]/256.,colorsRGB['Aluminium1'][0]/256.),
(1./5,colorsRGB['Aluminium2'][0]/256.,colorsRGB['Aluminium2'][0]/256.),
(2./5,colorsRGB['Aluminium3'][0]/256.,colorsRGB['Aluminium3'][0]/256.),
(3./5,colorsRGB['Aluminium4'][0]/256.,colorsRGB['Aluminium4'][0]/256.),
(4./5,colorsRGB['Aluminium5'][0]/256.,colorsRGB['Aluminium5'][0]/256.),
(5./5,colorsRGB['Aluminium6'][0]/256.,colorsRGB['Aluminium6'][0]/256.)),
'green' :((0./5,colorsRGB['Aluminium1'][1]/256.,colorsRGB['Aluminium1'][1]/256.),
(1./5,colorsRGB['Aluminium2'][1]/256.,colorsRGB['Aluminium2'][1]/256.),
(2./5,colorsRGB['Aluminium3'][1]/256.,colorsRGB['Aluminium3'][1]/256.),
(3./5,colorsRGB['Aluminium4'][1]/256.,colorsRGB['Aluminium4'][1]/256.),
(4./5,colorsRGB['Aluminium5'][1]/256.,colorsRGB['Aluminium5'][1]/256.),
(5./5,colorsRGB['Aluminium6'][1]/256.,colorsRGB['Aluminium6'][1]/256.)),
'blue' :((0./5,colorsRGB['Aluminium1'][2]/256.,colorsRGB['Aluminium1'][2]/256.),
(1./5,colorsRGB['Aluminium2'][2]/256.,colorsRGB['Aluminium2'][2]/256.),
(2./5,colorsRGB['Aluminium3'][2]/256.,colorsRGB['Aluminium3'][2]/256.),
(3./5,colorsRGB['Aluminium4'][2]/256.,colorsRGB['Aluminium4'][2]/256.),
(4./5,colorsRGB['Aluminium5'][2]/256.,colorsRGB['Aluminium5'][2]/256.),
(5./5,colorsRGB['Aluminium6'][2]/256.,colorsRGB['Aluminium6'][2]/256.))}
# cmap_Alu = mpl.colors.LinearSegmentedColormap('TangoAluminium',cdict_Alu,256)
# cmap_BGR = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_BGR,256)
# cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256)

View file

@ -10,3 +10,4 @@ import Tango
import misc
import warping_functions
import datasets
import decorators

View file

@ -46,7 +46,7 @@ def oil_100(seed=default_seed):
return {'X': X, 'Y': Y, 'info': "Subsample of the oil data extracting 100 values randomly without replacement."}
def pumadyn(seed=default_seed):
# Data is variance 1, no need to normalise.
# Data is variance 1, no need to normalize.
data = np.loadtxt(os.path.join(data_path, 'pumadyn-32nm/Dataset.data.gz'))
indices = np.random.permutation(data.shape[0])
indicesTrain = indices[0:7168]

16
GPy/util/decorators.py Normal file
View file

@ -0,0 +1,16 @@
import numpy as np
from functools import wraps
def silence_errors(f):
"""
This wraps a function and it silences numpy errors that
happen during the execution. After the function has exited, it restores
the previous state of the warnings.
"""
@wraps(f)
def wrapper(*args, **kwds):
status = np.seterr(all='ignore')
result = f(*args, **kwds)
np.seterr(**status)
return result
return wrapper

View file

@ -11,9 +11,15 @@ import re
import pdb
import cPickle
import types
import scipy.lib.lapack.flapack
#import scipy.lib.lapack.flapack
import scipy as sp
def trace_dot(a,b):
"""
efficiently compute the trace of the matrix product of a and b
"""
return np.sum(a*b)
def mdot(*args):
"""Multiply all the arguments using matrix product rules.
The output is equivalent to multiplying the arguments one by one
@ -101,7 +107,7 @@ def chol_inv(L):
"""
return linalg.flapack.dtrtri(L, lower = True)[0]
return linalg.lapack.flapack.dtrtri(L, lower = True)[0]
def multiple_pdinv(A):
@ -118,7 +124,7 @@ def multiple_pdinv(A):
N = A.shape[-1]
chols = [jitchol(A[:,:,i]) for i in range(N)]
halflogdets = [np.sum(np.log(np.diag(L[0]))) for L in chols]
invs = [linalg.flapack.dpotri(L[0],True)[0] for L in chols]
invs = [linalg.lapack.flapack.dpotri(L[0],True)[0] for L in chols]
invs = [np.triu(I)+np.triu(I,1).T for I in invs]
return np.dstack(invs),np.array(halflogdets)

View file

@ -6,7 +6,7 @@ import Tango
import pylab as pb
import numpy as np
def gpplot(x,mu,lower,upper,edgecol=Tango.coloursHex['darkBlue'],fillcol=Tango.coloursHex['lightBlue'],axes=None,**kwargs):
def gpplot(x,mu,lower,upper,edgecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'],axes=None,**kwargs):
if axes is None:
axes = pb.gca()
mu = mu.flatten()

View file

@ -4,4 +4,7 @@ GPy
A Gaussian processes framework in python
* [Online documentation](https://gpy.readthedocs.org/en/latest/)
* [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy)
* [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy)
Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png)

BIN
doc/Figures/tick.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 175 B

View file

@ -9,14 +9,6 @@ examples Package
:undoc-members:
:show-inheritance:
:mod:`BGPLVM_demo` Module
-------------------------
.. automodule:: GPy.examples.BGPLVM_demo
:members:
:undoc-members:
:show-inheritance:
:mod:`classification` Module
----------------------------
@ -25,18 +17,18 @@ examples Package
:undoc-members:
:show-inheritance:
:mod:`oil_flow_demo` Module
---------------------------
:mod:`dimensionality_reduction` Module
--------------------------------------
.. automodule:: GPy.examples.oil_flow_demo
.. automodule:: GPy.examples.dimensionality_reduction
:members:
:undoc-members:
:show-inheritance:
:mod:`poisson` Module
---------------------
:mod:`non_gaussian` Module
--------------------------
.. automodule:: GPy.examples.poisson
.. automodule:: GPy.examples.non_gaussian
:members:
:undoc-members:
:show-inheritance:
@ -49,50 +41,10 @@ examples Package
:undoc-members:
:show-inheritance:
:mod:`sparse_GPLVM_demo` Module
-------------------------------
:mod:`tutorials` Module
-----------------------
.. automodule:: GPy.examples.sparse_GPLVM_demo
:members:
:undoc-members:
:show-inheritance:
:mod:`sparse_GP_regression_demo` Module
---------------------------------------
.. automodule:: GPy.examples.sparse_GP_regression_demo
:members:
:undoc-members:
:show-inheritance:
:mod:`sparse_ep_fix` Module
---------------------------
.. automodule:: GPy.examples.sparse_ep_fix
:members:
:undoc-members:
:show-inheritance:
:mod:`uncertain_input_GP_regression_demo` Module
------------------------------------------------
.. automodule:: GPy.examples.uncertain_input_GP_regression_demo
:members:
:undoc-members:
:show-inheritance:
:mod:`uncollapsed_GP_demo` Module
---------------------------------
.. automodule:: GPy.examples.uncollapsed_GP_demo
:members:
:undoc-members:
:show-inheritance:
:mod:`unsupervised` Module
--------------------------
.. automodule:: GPy.examples.unsupervised
.. automodule:: GPy.examples.tutorials
:members:
:undoc-members:
:show-inheritance:

View file

@ -49,6 +49,14 @@ kern Package
:undoc-members:
:show-inheritance:
:mod:`coregionalise` Module
---------------------------
.. automodule:: GPy.kern.coregionalise
:members:
:undoc-members:
:show-inheritance:
:mod:`exponential` Module
-------------------------
@ -113,18 +121,26 @@ kern Package
:undoc-members:
:show-inheritance:
:mod:`product` Module
---------------------
:mod:`prod` Module
------------------
.. automodule:: GPy.kern.product
.. automodule:: GPy.kern.prod
:members:
:undoc-members:
:show-inheritance:
:mod:`product_orthogonal` Module
:mod:`prod_orthogonal` Module
-----------------------------
.. automodule:: GPy.kern.prod_orthogonal
:members:
:undoc-members:
:show-inheritance:
:mod:`rational_quadratic` Module
--------------------------------
.. automodule:: GPy.kern.product_orthogonal
.. automodule:: GPy.kern.rational_quadratic
:members:
:undoc-members:
:show-inheritance:
@ -145,6 +161,14 @@ kern Package
:undoc-members:
:show-inheritance:
:mod:`symmetric` Module
-----------------------
.. automodule:: GPy.kern.symmetric
:members:
:undoc-members:
:show-inheritance:
:mod:`sympykern` Module
-----------------------

View file

@ -9,10 +9,10 @@ models Package
:undoc-members:
:show-inheritance:
:mod:`BGPLVM` Module
--------------------
:mod:`Bayesian_GPLVM` Module
----------------------------
.. automodule:: GPy.models.BGPLVM
.. automodule:: GPy.models.Bayesian_GPLVM
:members:
:undoc-members:
:show-inheritance:

View file

@ -20,5 +20,6 @@ Subpackages
GPy.kern
GPy.likelihoods
GPy.models
GPy.testing
GPy.util

59
doc/GPy.testing.rst Normal file
View file

@ -0,0 +1,59 @@
testing Package
===============
:mod:`bgplvm_tests` Module
--------------------------
.. automodule:: GPy.testing.bgplvm_tests
:members:
:undoc-members:
:show-inheritance:
:mod:`examples_tests` Module
----------------------------
.. automodule:: GPy.testing.examples_tests
:members:
:undoc-members:
:show-inheritance:
:mod:`gplvm_tests` Module
-------------------------
.. automodule:: GPy.testing.gplvm_tests
:members:
:undoc-members:
:show-inheritance:
:mod:`kernel_tests` Module
--------------------------
.. automodule:: GPy.testing.kernel_tests
:members:
:undoc-members:
:show-inheritance:
:mod:`prior_tests` Module
-------------------------
.. automodule:: GPy.testing.prior_tests
:members:
:undoc-members:
:show-inheritance:
:mod:`sparse_gplvm_tests` Module
--------------------------------
.. automodule:: GPy.testing.sparse_gplvm_tests
:members:
:undoc-members:
:show-inheritance:
:mod:`unit_tests` Module
------------------------
.. automodule:: GPy.testing.unit_tests
:members:
:undoc-members:
:show-inheritance:

View file

@ -8,9 +8,9 @@ Welcome to GPy's documentation!
For a quick start, you can have a look at one of the tutorials:
* `Basic Gaussian process regression <tuto_GP_regression.html>`_
* `Interacting with models <tuto_interacting_with_models.html>`_
* `A kernel overview <tuto_kernel_overview.html>`_
* Advanced GP regression (Forthcoming)
* Writting kernels (Forthcoming)
* `Writing new kernels <tuto_creating_new_kernels.html>`_
You may also be interested by some examples in the GPy/examples folder.

View file

@ -0,0 +1,49 @@
***************************
List of implemented kernels
***************************
The following table shows the implemented kernels in GPy and gives the details of the implemented function for each kernel.
==================== =========== ===== =========== ====== ======= =========== =============== ======= =========== ====== ====== =======
NAME Dimension ARD get/set K Kdiag dK_dtheta dKdiag_dtheta dK_dX dKdiag_dX psi0 psi1 psi2
==================== =========== ===== =========== ====== ======= =========== =============== ======= =========== ====== ====== =======
bias n |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick|
-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ -------
Brownian 1 |tick| |tick| |tick| |tick| |tick| |tick| |tick|
-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ -------
exponential n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick|
-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ -------
finite_dimensional n |tick| |tick| |tick| |tick| |tick|
-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ -------
linear n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick|
-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ -------
Matern32 n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick|
-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ -------
Matern52 n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick|
-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ -------
periodic_exponential 1 |tick| |tick| |tick| |tick| |tick|
-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ -------
periodic_Matern32 1 |tick| |tick| |tick| |tick| |tick|
-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ -------
periodic_Matern52 1 |tick| |tick| |tick| |tick| |tick|
-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ -------
rational quadratic 1 |tick| |tick| |tick| |tick| |tick| |tick| |tick|
-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ -------
rbf n yes |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick|
-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ -------
spline 1 |tick| |tick| |tick| |tick| |tick| |tick|
-------------------- ----------- ----- ----------- ------ ------- ----------- --------------- ------- ----------- ------ ------ -------
white n |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick| |tick|
==================== =========== ===== =========== ====== ======= =========== =============== ======= =========== ====== ====== =======
Depending on the use, all functions may not be required
* ``get/set, K, Kdiag``: compulsory
* ``dK_dtheta``: necessary to optimize the model
* ``dKdiag_dtheta``: sparse models, BGPLVM, GPs with uncertain inputs
* ``dK_dX``: sparse models, GPLVM, BGPLVM, GPs with uncertain inputs
* ``dKdiag_dX``: sparse models, BGPLVM, GPs with uncertain inputs
* ``psi0, psi1, psi2``: BGPLVM, GPs with uncertain inputs
.. |tick| image:: Figures/tick.png

View file

@ -2,7 +2,7 @@
Gaussian process regression tutorial
*************************************
We will see in this tutorial the basics for building a 1 dimensional and a 2 dimensional Gaussian process regression model, also known as a kriging model.
We will see in this tutorial the basics for building a 1 dimensional and a 2 dimensional Gaussian process regression model, also known as a kriging model. The code shown in this tutorial can be obtained at GPy/examples/tutorials.py, or by running ``GPy.examples.tutorials.tuto_GP_regression()``.
We first import the libraries we will need: ::

View file

@ -0,0 +1,183 @@
********************
Creating new kernels
********************
We will see in this tutorial how to create new kernels in GPy. We will also give details on how to implement each function of the kernel and illustrate with a running example: the rational quadratic kernel.
Structure of a kernel in GPy
============================
In GPy a kernel object is made of a list of kernpart objects, which correspond to symetric positive definite functions. More precisely, the kernel should be understood as the sum of the kernparts. In order to implement a new covariance, the following steps must be followed
1. implement the new covariance as a kernpart object
2. update the constructors that allow to use the kernpart as a kern object
3. update the __init__.py file
Theses three steps are detailed below.
Implementing a kernpart object
==============================
We advise the reader to start with copy-pasting an existing kernel and to modify the new file. We will now give a description of the various functions that can be found in a kernpart object.
**Header**
The header is similar to all kernels: ::
from kernpart import kernpart
import numpy as np
class rational_quadratic(kernpart):
**__init__(self,D, param1, param2, ...)**
The implementation of this function in mandatory.
For all kernparts the first parameter ``D`` corresponds to the dimension of the input space, and the following parameters stand for the parameterization of the kernel.
The following attributes are compulsory: ``self.D`` (the dimension, integer), ``self.name`` (name of the kernel, string), ``self.Nparam`` (number of parameters, integer). ::
def __init__(self,D,variance=1.,lengthscale=1.,power=1.):
assert D == 1, "For this kernel we assume D=1"
self.D = D
self.Nparam = 3
self.name = 'rat_quad'
self.variance = variance
self.lengthscale = lengthscale
self.power = power
**_get_params(self)**
The implementation of this function in mandatory.
This function returns a one dimensional array of length ``self.Nparam`` containing the value of the parameters. ::
def _get_params(self):
return np.hstack((self.variance,self.lengthscale,self.power))
**_set_params(self,x)**
The implementation of this function in mandatory.
The input is a one dimensional array of length ``self.Nparam`` containing the value of the parameters. The function has no output but it updates the values of the attribute associated to the parameters (such as ``self.variance``, ``self.lengthscale``, ...). ::
def _set_params(self,x):
self.variance = x[0]
self.lengthscale = x[1]
self.power = x[2]
**_get_param_names(self)**
The implementation of this function in mandatory.
It returns a list of strings of length ``self.Nparam`` corresponding to the parameter names. ::
def _get_param_names(self):
return ['variance','lengthscale','power']
**K(self,X,X2,target)**
The implementation of this function in mandatory.
This function is used to compute the covariance matrix associated with the inputs X, X2 (np.arrays with arbitrary number of line (say :math:`n_1`, :math:`n_2`) and ``self.D`` columns). This function does not returns anything but it adds the :math:`n_1 \times n_2` covariance matrix to the kernpart to the object ``target`` (a :math:`n_1 \times n_2` np.array). This trick allows to compute the covariance matrix of a kernel containing many kernparts with a limited memory use. ::
def K(self,X,X2,target):
if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale)
target += self.variance*(1 + dist2/2.)**(-self.power)
**Kdiag(self,X,target)**
The implementation of this function in mandatory.
This function is similar to ``K`` but it computes only the values of the kernel on the diagonal. Thus, ``target`` is a 1-dimensional np.array of length :math:`n_1`. ::
def Kdiag(self,X,target):
target += self.variance
**dK_dtheta(self,dL_dK,X,X2,target)**
This function is required for the optimization of the parameters.
Computes the derivative of the likelihood. As previously, the values are added to the object target which is a 1-dimensional np.array of length ``self.Nparam``. For example, if the kernel is parameterized by :math:`\sigma^2,\ \theta`, then :math:`\frac{dL}{d\sigma^2} = \frac{dL}{d K} \frac{dK}{d\sigma^2}` is added to the first element of target and :math:`\frac{dL}{d\theta} = \frac{dL}{d K} \frac{dK}{d\theta}` to the second. ::
def dK_dtheta(self,dL_dK,X,X2,target):
if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale)
dvar = (1 + dist2/2.)**(-self.power)
dl = self.power * self.variance * dist2 * self.lengthscale**(-3) * (1 + dist2/2./self.power)**(-self.power-1)
dp = - self.variance * np.log(1 + dist2/2.) * (1 + dist2/2.)**(-self.power)
target[0] += np.sum(dvar*dL_dK)
target[1] += np.sum(dl*dL_dK)
target[2] += np.sum(dp*dL_dK)
**dKdiag_dtheta(self,dL_dKdiag,X,target)**
This function is required for BGPLVM, sparse models and uncertain inputs.
As previously, target is an ``self.Nparam`` array and :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dparam}` is added to each element. ::
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target[0] += np.sum(dL_dKdiag)
# here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged
**dK_dX(self,dL_dK,X,X2,target)**
This function is required for GPLVM, BGPLVM, sparse models and uncertain inputs.
Computes the derivative of the likelihood with respect to the inputs ``X`` (a :math:`n \times D` np.array). The result is added to target which is a :math:`n \times D` np.array. ::
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale)
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
target += np.sum(dL_dK*dX,1)[:,np.newaxis]
**dKdiag_dX(self,dL_dKdiag,X,target)**
This function is required for BGPLVM, sparse models and uncertain inputs. As for ``dKdiag_dtheta``, :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dX}` is added to each element of target. ::
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
**Psi statistics**
The psi statistics and their derivatives are required for BGPLVM and GPS with uncertain inputs.
The expressions of the psi statistics are:
TODO
For the rational quadratic we have:
TODO
Update the constructor
======================
Once the required functions have been implemented as a kernpart object, the file GPy/kern/constructors.py has to be updated to allow to build a kernel based on the kernpart object.
The following line should be added in the preamble of the file::
from rational_quadratic import rational_quadratic as rational_quadratic_part
as well as the following block ::
def rational_quadratic(D,variance=1., lengthscale=1., power=1.):
part = rational_quadraticpart(D,variance, lengthscale, power)
return kern(D, [part])
Update initialization
=====================
The last step is to update the list of kernels imported from constructor in GPy/kern/__init__.py.

View file

@ -0,0 +1,217 @@
*************************************
Interacting with models
*************************************
The GPy model class has a set of features which are
designed to make it simple to explore the parameter
space of the model. By default, the scipy optimisers
are used to fit GPy models (via model.optimize()),
for which we provide mechanisms for 'free' optimisation:
GPy can ensure that naturally positive parameters
(such as variances) remain positive. But these mechanisms
are much more powerful than simple reparameterisation,
as we shall see.
Along this tutorial we'll use a sparse GP regression model
as example. This example can be in ``GPy.examples.regression``.
All of the examples included in GPy return an instance
of a model class, and therefore they can be called in
the following way: ::
import pylab as pb
pb.ion()
import GPy
m = GPy.examples.regression.sparse_GP_regression_1D()
Examining the model using print
===============================
To see the current state of the model parameters,
and the model's (marginal) likelihood just print the model ::
print m
The first thing displayed on the screen is the log-likelihood
value of the model with its current parameters. Below the
log-likelihood, a table with all the model's parameters
is shown. For each parameter, the table contains the name
of the parameter, the current value, and in case there are
defined: constraints, ties and prior distrbutions associated. ::
Log-likelihood: 6.309e+02
Name | Value | Constraints | Ties | Prior
------------------------------------------------------------------
iip_0_0 | -1.4671 | | |
iip_1_0 | 2.6378 | | |
iip_2_0 | -0.0396 | | |
iip_3_0 | -2.6372 | | |
iip_4_0 | 1.4704 | | |
rbf_variance | 1.5672 | (+ve) | |
rbf_lengthscale | 2.5625 | (+ve) | |
white_variance | 0.0000 | (+ve) | |
noise_variance | 0.0022 | (+ve) | |
In this case the kernel parameters (``rbf_variance``,
``rbf_lengthscale`` and ``white_variance``) as well as
the noise parameter (``noise_variance``), are constrained
to be positive, while the inducing inputs have not
constraints associated. Also there are no ties or prior defined.
Setting and fetching parameters by name
=======================================
Another way to interact with the model's parameters is through
the functions ``_get_param_names()``, ``_get_params()`` and
``_set_params()``.
``_get_param_names()`` returns a list of the parameters names ::
['iip_0_0',
'iip_1_0',
'iip_2_0',
'iip_3_0',
'iip_4_0',
'rbf_variance',
'rbf_lengthscale',
'white_variance',
'noise_variance']
``_get_params()`` returns an array of the parameters values ::
array([ -1.46705227e+00, 2.63782176e+00, -3.96422982e-02,
-2.63715255e+00, 1.47038653e+00, 1.56724596e+00,
2.56248679e+00, 2.20963633e-10, 2.18379922e-03])
``_set_params()`` takes an array as input and substitutes
the current values of the parameters for those of the array. For example,
we can define a new array of values and change the parameters as follows: ::
new_params = np.array([1.,2.,3.,4.,1.,1.,1.,1.,1.])
m._set_params(new_params)
If we call the function ``_get_params()`` again, we will obtain the new
parameters we have just set.
Parameters can be also set by name using the function ``_set()``. For example,
lets change the lengthscale to .5: ::
m.set('rbf_lengthscale',.5)
``_set()`` function accepts regular expression as it first
input, and therefore all parameters matching that regular
expression are set to the given value. In this case rather
than passing as second output a single value, we can also
use a list of arrays. For example, lets change the inducing
inputs: ::
m.set('iip',np.arange(-4,0))
Getting the model's likelihood and gradients
===========================================
Appart form the printing the model, the marginal
log-likelihood can be obtained by using the function
``log_likelihood()``. Also, the log-likelihood gradients
wrt. each parameter can be obtained with the funcion
``_log_likelihood_gradients()``. ::
m.log_likelihood()
-791.15371409346153
m._log_likelihood_gradients()
array([ 7.08278455e-03, 1.37118783e+01, 2.66948031e+00,
3.50184014e+00, 7.08278455e-03, -1.43501702e+02,
6.10662266e+01, -2.18472649e+02, 2.14663691e+02])
Removing the model's constraints
================================
When we initially call the example, it was optimized and hence the
log-likelihood gradients were close to zero. However, since
we have been changing the parameters, the gradients are far from zero now.
Next we are going to show how to optimize the model setting different
restrictions on the parameters.
Once a constrain has been set on a parameter, it is not possible to
define a new constraint for it unless we explicitly remove the previous
one. The command to remove the constraints is ``unconstrain()``, and
just as the ``set()`` command, it also accepts regular expression.
In this case we will remove all the constraints: ::
m.unconstrain('')
Constraining and optimising the model
=====================================
A requisite needed for some parameters, such as variances,
is to be positive. This is constraint is easily set
with the function ``constrain_positive()``. Regular expressions
are also accepted. ::
m.constrain_positive('var')
For convenience, GPy also provides a catch all function
which ensures that anything which appears to require
positivity is constrianed appropriately::
m.ensure_default_constraints()
Fixing parameters
=================
Parameters values can be fixed using ``constrain_fixed()``.
For example we can define the first inducing input to be
fixed on zero: ::
m.constrain_fixed('iip_0',0)
Bounding parameters
===================
Defining bounding constraints is an easily task in GPy too,
it only requires to use the function ``constrain_bounded()``.
For example, lets bound inducing inputs 2 and 3 to have
values between -4 and -1: ::
m.constrain_bounded('iip_(1|2)',-4,-1)
Tying Parameters
================
The values of two or more parameters can be tied together,
so that they share the same value during optimization.
The function to do so is ``tie_params()``. For the example
we are using, it doesn't make sense to tie parameters together,
however for the sake of the example we will tie the white noise
and the variance together. See `A kernel overview <tuto_kernel_overview.html>`_.
for a proper use of the tying capabilities.::
m.tie_params('e_var')
Optimizing the model
====================
Once we have finished defining the constraints,
we can now optimize the model with the function
``optimize``.::
m.optimize()
We can print again the model and check the new results.
The table now shows that ``iip_0_0`` is fixed, ``iip_1_0``
and ``iip_2_0`` are bounded and the kernel parameters are constrained to
be positive. In addition the table now indicates that
white_variance and noise_variance are tied together.::
Log-likelihood: 9.967e+01
Name | Value | Constraints | Ties | Prior
------------------------------------------------------------------
iip_0_0 | 0.0000 | Fixed | |
iip_1_0 | -2.8834 | (-4, -1) | |
iip_2_0 | -1.9152 | (-4, -1) | |
iip_3_0 | 1.5034 | | |
iip_4_0 | -1.0162 | | |
rbf_variance | 0.0158 | (+ve) | |
rbf_lengthscale | 0.9760 | (+ve) | |
white_variance | 0.0049 | (+ve) | (0) |
noise_variance | 0.0049 | (+ve) | (0) |
Further Reading
===============
All of the mechansiams for dealing with parameters are baked right into GPy.core.model, from which all of the classes in GPy.models inherrit. To learn how to construct your own model, you might want to read ??link?? creating_new_models.
By deafult, GPy uses the tnc optimizer (from scipy.optimize.tnc). To use other optimisers, and to control the setting of those optimisers, as well as other funky features like automated restarts and diagnostics, you can read the optimization tutorial ??link??.

View file

@ -2,6 +2,7 @@
****************************
tutorial : A kernel overview
****************************
The aim of this tutorial is to give a better understanding of the kernel objects in GPy and to list the ones that are already implemented. The code shown in this tutorial can be obtained at GPy/examples/tutorials.py or by running ``GPy.examples.tutorials.tuto_kernel_overview()``.
First we import the libraries we will need ::
@ -38,7 +39,7 @@ return::
Implemented kernels
===================
Many kernels are already implemented in GPy. Here is a summary of most of them:
Many kernels are already implemented in GPy. The following figure gives a summary of most of them (a comprehensive list can be list can be found `here <kernel_implementation.html>`_):
.. figure:: Figures/tuto_kern_overview_allkern.png
:align: center
@ -132,7 +133,7 @@ Various constrains can be applied to the parameters of a kernel
* ``constrain_fixed`` to fix the value of a parameter (the value will not be modified during optimisation)
* ``constrain_positive`` to make sure the parameter is greater than 0.
* ``constrain_bounded`` to impose the parameter to be in a given range.
* ``tie_param`` to impose the value of two (or more) parameters to be equal.
* ``tie_params`` to impose the value of two (or more) parameters to be equal.
When calling one of these functions, the parameters to constrain can either by specified by a regular expression that matches its name or by a number that corresponds to the rank of the parameter. Here is an example ::
@ -145,7 +146,7 @@ When calling one of these functions, the parameters to constrain can either by s
k.constrain_positive('var')
k.constrain_fixed(np.array([1]),1.75)
k.tie_param('len')
k.tie_params('len')
k.unconstrain('white')
k.constrain_bounded('white',lower=1e-5,upper=.5)
print k

View file

@ -2,8 +2,7 @@
# -*- coding: utf-8 -*-
import os
from numpy.distutils.core import Extension, setup
#from sphinx.setup_command import BuildDoc
from setuptools import setup
# Version number
version = '0.2'
@ -13,12 +12,12 @@ def read(fname):
setup(name = 'GPy',
version = version,
author = 'James Hensman, Nicolo Fusi, Ricardo Andrade, Nicolas Durrande, Alan Saul, Neil D. Lawrence',
author = read('AUTHORS.txt'),
author_email = "james.hensman@gmail.com",
description = ("The Gaussian Process Toolbox"),
license = "BSD 3-clause",
keywords = "machine-learning gaussian-processes kernels",
url = "http://ml.sheffield.ac.uk/GPy/",
url = "http://sheffieldml.github.com/GPy/",
packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples', 'GPy.likelihoods'],
package_dir={'GPy': 'GPy'},
package_data = {'GPy': ['GPy/examples']},
@ -26,14 +25,12 @@ setup(name = 'GPy',
long_description=read('README.md'),
#ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py',
# sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1', 'nose'],
extras_require = {
'docs':['Sphinx', 'ipython'],
},
#setup_requires=['sphinx'],
#cmdclass = {'build_sphinx': BuildDoc},
classifiers=[
"Development Status :: 1 - Alpha",
"Topic :: Machine Learning",
"License :: OSI Approved :: BSD License"],
)