merged in kern chancges

This commit is contained in:
James Hensman 2014-02-20 14:07:54 +00:00
commit 847fdef9c8
54 changed files with 914 additions and 1280 deletions

View file

@ -43,7 +43,8 @@ class GP(Model):
else:
self.Y_metadata = None
assert isinstance(kernel, kern.kern)
assert isinstance(kernel, kern.Kern)
assert self.input_dim == kernel.input_dim
self.kern = kernel
assert isinstance(likelihood, likelihoods.Likelihood)
@ -70,7 +71,7 @@ class GP(Model):
def log_likelihood(self):
return self._log_marginal_likelihood
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
def _raw_predict(self, _Xnew, full_cov=False):
"""
Internal helper function for making predictions, does not account
for normalization or likelihood
@ -80,29 +81,27 @@ class GP(Model):
diagonal of the covariance is returned.
"""
Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T
Kx = self.kern.K(_Xnew, self.X).T
#LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1)
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
Kxx = self.kern.K(_Xnew)
#var = Kxx - tdot(LiKx.T)
var = np.dot(Kx.T, WiKx)
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
Kxx = self.kern.Kdiag(_Xnew)
#var = Kxx - np.sum(LiKx*LiKx, 0)
var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1)
return mu, var
def predict(self, Xnew, which_parts='all', full_cov=False, **likelihood_args):
def predict(self, Xnew, full_cov=False, **likelihood_args):
"""
Predict the function(s) at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just
the diagonal
:type full_cov: bool
@ -118,13 +117,13 @@ class GP(Model):
"""
#predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
mu, var = self._raw_predict(Xnew, full_cov=full_cov)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
return mean, var, _025pm, _975pm
def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True):
def posterior_samples_f(self,X,size=10, full_cov=True):
"""
Samples the posterior GP at the points X.
@ -132,13 +131,11 @@ class GP(Model):
:type X: np.ndarray, Nnew x self.input_dim.
:param size: the number of a posteriori samples.
:type size: int.
:param which_parts: which of the kernel functions to use (additively).
:type which_parts: 'all', or list of bools.
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool.
:returns: Ysim: set of simulations, a Numpy array (N x samples).
"""
m, v = self._raw_predict(X, which_parts=which_parts, full_cov=full_cov)
m, v = self._raw_predict(X, full_cov=full_cov)
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
if not full_cov:
Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
@ -147,7 +144,7 @@ class GP(Model):
return Ysim
def posterior_samples(self,X,size=10,which_parts='all',full_cov=True,noise_model=None):
def posterior_samples(self,X,size=10, full_cov=True,noise_model=None):
"""
Samples the posterior GP at the points X.
@ -155,15 +152,13 @@ class GP(Model):
:type X: np.ndarray, Nnew x self.input_dim.
:param size: the number of a posteriori samples.
:type size: int.
:param which_parts: which of the kernel functions to use (additively).
:type which_parts: 'all', or list of bools.
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool.
:param noise_model: for mixed noise likelihood, the noise model to use in the samples.
:type noise_model: integer.
:returns: Ysim: set of simulations, a Numpy array (N x samples).
"""
Ysim = self.posterior_samples_f(X, size, which_parts=which_parts, full_cov=full_cov)
Ysim = self.posterior_samples_f(X, size, full_cov=full_cov)
if isinstance(self.likelihood, Gaussian):
noise_std = np.sqrt(self.likelihood._get_params())
Ysim += np.random.normal(0,noise_std,Ysim.shape)

View file

@ -33,12 +33,12 @@ class SparseGP(GP):
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, X_variance=None, name='sparse gp'):
# pick a sensible inference method
#pick a sensible inference method
if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian):
inference_method = var_dtc.VarDTC()
else:
# inference_method = ??
#inference_method = ??
raise NotImplementedError, "what to do what to do?"
print "defaulting to ", inference_method, "for latent function inference"
@ -53,38 +53,35 @@ class SparseGP(GP):
self.add_parameter(self.Z, index=0)
self.parameters_changed()
def _update_gradients_Z(self, add=False):
# The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed)
def _gradients_Z(self):
#The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed)
if not self.Z.is_fixed:
if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
if self.X_variance is None:
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict)
else:
self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
self.Z.gradient = self.kern.gradients_Z_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict)
print self.Z.gradient
print id(self.Z)
def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
self._update_gradients_Z(add=False)
self.Z.gradient = self._gradients_Z()
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False):
"""
Make a prediction for the latent function values
"""
if X_variance_new is None:
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
Kx = self.kern.K(self.Z, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx), Kx, [1,0]).T
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx)
else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)[:, None]
#import ipdb;ipdb.set_trace()
var = Kxx - (np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T * Kx.T[:,:,None]).sum(1)
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0)
else:
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new)
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
raise NotImplementedError, "TODO"
@ -92,7 +89,7 @@ class SparseGP(GP):
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var
return mu, var[:,None]
def _getstate(self):

View file

@ -22,18 +22,18 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
# generate GPLVM-like data
X = _np.random.rand(num_inputs, input_dim)
lengthscales = _np.random.rand(input_dim)
k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True)
k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
#+ GPy.kern.white(input_dim, 0.01)
)
K = k.K(X)
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
# k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
# k = GPy.kern.RBF_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
#k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.RBF(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.RBF(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
# k = GPy.kern.RBF(input_dim, .5, 2., ARD=0) + GPy.kern.RBF(input_dim, .3, .2, ARD=0)
# k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
p = .3
@ -73,7 +73,7 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True):
data = GPy.util.datasets.oil_100()
Y = data['X']
# create simple GP model
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.bias(6)
m = GPy.models.GPLVM(Y, 6, kernel=kernel)
m.data_labels = data['Y'].argmax(axis=1)
if optimize: m.optimize('scg', messages=verbose)
@ -88,7 +88,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci
Y = Y - Y.mean(0)
Y /= Y.std(0)
# Create the model
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q)
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q)
m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing)
m.data_labels = data['Y'][:N].argmax(axis=1)
@ -138,7 +138,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
(1 - var))) + .001
Z = _np.random.permutation(X)[:num_inducing]
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
m.data_colors = c
@ -164,7 +164,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
_np.random.seed(0)
data = GPy.util.datasets.oil()
kernel = GPy.kern.rbf_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2))
kernel = GPy.kern.RBF_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2))
Y = data['X'][:N]
Yn = Gaussian(Y, normalize=True)
m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k)
@ -439,7 +439,7 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
data = GPy.util.datasets.osu_run1()
# optimize
back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.)
back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.)
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
@ -474,7 +474,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
data = GPy.util.datasets.osu_run1()
Q = 6
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
# optimize
m.ensure_default_constraints()

View file

@ -41,7 +41,7 @@ def coregionalization_toy2(optimize=True, plot=True):
Y = np.vstack((Y1, Y2))
#build the kernel
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k1 = GPy.kern.RBF(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalize(2,1)
k = k1**k2
m = GPy.models.GPRegression(X, Y, kernel=k)
@ -68,7 +68,7 @@ def coregionalization_toy2(optimize=True, plot=True):
# Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
# Y = np.vstack((Y1, Y2))
#
# k1 = GPy.kern.rbf(1)
# k1 = GPy.kern.RBF(1)
# m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
# m.constrain_fixed('.*rbf_var', 1.)
# m.optimize(max_iters=100)
@ -127,7 +127,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None],
np.random.randint(0, 4, num_inducing)[:, None]))
k1 = GPy.kern.rbf(1)
k1 = GPy.kern.RBF(1)
k2 = GPy.kern.coregionalize(output_dim=5, rank=5)
k = k1**k2
@ -156,7 +156,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
data['Y'] = data['Y'] - np.mean(data['Y'])
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.RBF)
if plot:
pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet)
ax = pb.gca()
@ -172,8 +172,8 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
optim_point_y = np.empty(2)
np.random.seed(seed=seed)
for i in range(0, model_restarts):
# kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
# kern = GPy.kern.RBF(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
kern = GPy.kern.RBF(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern)
m['noise_variance'] = np.random.uniform(1e-3, 1)
@ -196,7 +196,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
ax.set_ylim(ylim)
return m # (models, lls)
def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf):
def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF):
"""
Evaluate the GP objective function for a given data set for a range of
signal to noise ratios and a range of lengthscales.
@ -278,10 +278,10 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True):
optimizer='scg'
x_len = 30
X = np.linspace(0, 10, x_len)[:, None]
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.RBF(1).K(X))
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
kern = GPy.kern.rbf(1)
kern = GPy.kern.RBF(1)
poisson_lik = GPy.likelihoods.Poisson()
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
@ -319,10 +319,10 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize
if kernel_type == 'linear':
kernel = GPy.kern.linear(X.shape[1], ARD=1)
elif kernel_type == 'rbf_inv':
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1)
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
else:
kernel = GPy.kern.rbf(X.shape[1], ARD=1)
kernel += GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
kernel = GPy.kern.RBF(X.shape[1], ARD=1)
kernel += GPy.kern.White(X.shape[1]) + GPy.kern.bias(X.shape[1])
m = GPy.models.GPRegression(X, Y, kernel)
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
# m.set_prior('.*lengthscale',len_prior)
@ -358,9 +358,9 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
if kernel_type == 'linear':
kernel = GPy.kern.linear(X.shape[1], ARD=1)
elif kernel_type == 'rbf_inv':
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1)
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
else:
kernel = GPy.kern.rbf(X.shape[1], ARD=1)
kernel = GPy.kern.RBF(X.shape[1], ARD=1)
#kernel += GPy.kern.bias(X.shape[1])
X_variance = np.ones(X.shape) * 0.5
m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance)
@ -421,7 +421,7 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
X = np.random.uniform(-3., 3., (num_samples, 1))
Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05
# construct kernel
rbf = GPy.kern.rbf(1)
rbf = GPy.kern.RBF(1)
# create simple GP Model
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
m.checkgrad(verbose=1)
@ -444,7 +444,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt
Y[inan] = np.nan
# construct kernel
rbf = GPy.kern.rbf(2)
rbf = GPy.kern.RBF(2)
# create simple GP Model
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
@ -476,9 +476,9 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
# likelihood = GPy.likelihoods.Gaussian(Y)
Z = np.random.uniform(-3., 3., (7, 1))
k = GPy.kern.rbf(1)
k = GPy.kern.RBF(1)
# create simple GP Model - no input uncertainty on this one
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z)
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z)
if optimize:
m.optimize('scg', messages=1, max_iters=max_iters)
@ -489,7 +489,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
print m
# the same Model with uncertainty
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z, X_variance=S)
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S)
if optimize:
m.optimize('scg', messages=1, max_iters=max_iters)
if plot:

View file

@ -1,9 +1,33 @@
# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import *
try:
from constructors import rbf_sympy, sympykern # these depend on sympy
except:
pass
from kern import *
from _src.rbf import RBF
from _src.white import White
from _src.kern import Kern
from _src.linear import Linear
#import bias
#import Brownian
#import coregionalize
#import exponential
#import eq_ode1
#import finite_dimensional
#import fixed
#import gibbs
#import hetero
#import hierarchical
#import independent_outputs
#import linear
#import Matern32
#import Matern52
#import mlp
#import ODE_1
#import periodic_exponential
#import periodic_Matern32
#import periodic_Matern52
#import poly
#import prod_orthogonal
#import prod
#import rational_quadratic
#import rbfcos
#import rbf
#import rbf_inv
#import spline
#import symmetric
#import white

215
GPy/kern/_src/add.py Normal file
View file

@ -0,0 +1,215 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np
import itertools
from linear import Linear
from ...core.parameterization import Parameterized
from ...core.parameterization.param import Param
from kern import Kern
class Add(Kern):
def __init__(self, subkerns, tensor):
assert all([isinstance(k, Kern) for k in subkerns])
if tensor:
input_dim = sum([k.input_dim for k in subkerns])
self.input_slices = []
n = 0
for k in subkerns:
self.input_slices.append(slice(n, n+k.input_dim))
n += k.input_dim
else:
assert all([k.input_dim == subkerns[0].input_dim for k in subkerns])
input_dim = subkerns[0].input_dim
self.input_slices = [slice(None) for k in subkerns]
super(Add, self).__init__(input_dim, 'add')
self.add_parameters(*subkerns)
def K(self, X, X2=None):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handLes this as X2 == X.
"""
assert X.shape[1] == self.input_dim
if X2 is None:
return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)])
else:
return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
def update_gradients_full(self, dL_dK, X):
[p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros_like(X)
if X2 is None:
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
else:
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def Kdiag(self, X):
assert X.shape[1] == self.input_dim
return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
def psi0(self, Z, mu, S):
return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices))],0)
def psi1(self, Z, mu, S):
return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
def psi2(self, Z, mu, S):
psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
# compute the "cross" terms
from white import White
from rbf import RBF
#from rbf_inv import RBFInv
#from bias import Bias
from linear import Linear
#ffrom fixed import Fixed
for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self.input_slices), 2):
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear))):
tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2])
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z[:,i1], mu[:,i1], S[:,i1])
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
from white import White
from rbf import RBF
#from rbf_inv import RBFInv
#from bias import Bias
from linear import Linear
#ffrom fixed import Fixed
for p1, is1 in zip(self._parameters_, self.input_slices):
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
from white import white
from rbf import rbf
#from rbf_inv import rbfinv
#from bias import bias
from linear import linear
#ffrom fixed import fixed
target = np.zeros(Z.shape)
for p1, is1 in zip(self._parameters_, self.input_slices):
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
if p2 is p1:
continue
if isinstance(p2, white):
continue
elif isinstance(p2, bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
return target
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
from white import white
from rbf import rbf
#from rbf_inv import rbfinv
#from bias import bias
from linear import linear
#ffrom fixed import fixed
target_mu = np.zeros(mu.shape)
target_S = np.zeros(S.shape)
for p1, is1 in zip(self._parameters_, self.input_slices):
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
if p2 is p1:
continue
if isinstance(p2, white):
continue
elif isinstance(p2, bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
target_mu += a
target_S += b
return target_mu, target_S
def plot(self, *args, **kwargs):
"""
See GPy.plotting.matplot_dep.plot
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import kernel_plots
kernel_plots.plot(self,*args)
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return Parameterized._getstate(self) + [#self._parameters_,
self.input_dim,
self.input_slices,
self._param_slices_
]
def _setstate(self, state):
self._param_slices_ = state.pop()
self.input_slices = state.pop()
self.input_dim = state.pop()
Parameterized._setstate(self, state)

View file

@ -1,12 +1,12 @@
# Copyright (c) 2012, James Hensman and Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
from kern import Kern
import numpy as np
from scipy import weave
from ...core.parameterization import Param
class Coregionalize(Kernpart):
class Coregionalize(Kern):
"""
Covariance function for intrinsic/linear coregionalization models
@ -133,6 +133,8 @@ class Coregionalize(Kernpart):
#dkappa = dL_dKdiag_small
#target += np.hstack([dW.flatten(),dkappa])
def gradients_X(self,dL_dK,X,X2,target):
#NOTE In this case, pass is equivalent to returning zero.
pass
def gradients_X(self,dL_dK,X,X2):
if X2 is None:
return np.zeros((X.shape[0], X.shape[0]))
else:
return np.zeros((X.shape[0], X2.shape[0]))

330
GPy/kern/_src/kern.py Normal file
View file

@ -0,0 +1,330 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np
import itertools
from ...core.parameterization import Parameterized
from ...core.parameterization.param import Param
class Kern(Parameterized):
def __init__(self, input_dim, name):
"""
The base class for a kernel: a positive definite function
which forms of a covariance function (kernel).
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
super(Kern, self).__init__(name)
self.input_dim = input_dim
def K(self, X, X2, target):
raise NotImplementedError
def Kdiag(self, Xa ,target):
raise NotImplementedError
def _param_grad_helper(self, dL_dK,X, X2, target):
raise NotImplementedError
def psi0(self,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dtheta(self,dL_dpsi0, Z,mu,S,target):
raise NotImplementedError
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi1(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dtheta(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi2(self,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2, target):
raise NotImplementedError
def dKdiag_dX(self, dL_dK, X, target):
raise NotImplementedError
def update_gradients_full(self, dL_dK, X):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
"""Set the gradients of all parameters when doing sparse (M) inference."""
raise NotImplementedError
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
raise NotImplementedError
def plot_ARD(self, *args):
"""If an ARD kernel is present, plot a bar representation using matplotlib
See GPy.plotting.matplot_dep.plot_ARD
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import kernel_plots
return kernel_plots.plot_ARD(self,*args)
def __add__(self, other):
""" Overloading of the '+' operator. for more control, see self.add """
return self.add(other)
def add(self, other, tensor=False):
"""
Add another kernel to this one.
If Tensor is False, both kernels are defined on the same _space_. then
the created kernel will have the same number of inputs as self and
other (which must be the same).
If Tensor is True, then the dimensions are stacked 'horizontally', so
that the resulting kernel has self.input_dim + other.input_dim
:param other: the other kernel to be added
:type other: GPy.kern
"""
assert isinstance(other, Kern), "only kernels can be added to kernels..."
from add import Add
return Add([self, other], tensor)
def __call__(self, X, X2=None):
return self.K(X, X2)
def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
def __pow__(self, other, tensor=False):
"""
Shortcut for tensor `prod`.
"""
return self.prod(other, tensor=True)
def prod(self, other, tensor=False):
"""
Multiply two kernels (either on the same space, or on the tensor product of the input space).
:param other: the other kernel to be added
:type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool
"""
assert isinstance(other, Kern), "only kernels can be added to kernels..."
from prod import Prod
return Prod(self, other, tensor)
from GPy.core.model import Model
class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Model.__init__(self, 'kernel_test_model')
num_samples = 20
num_samples2 = 10
if kernel==None:
kernel = GPy.kern.rbf(1)
if X==None:
X = np.random.randn(num_samples, kernel.input_dim)
if dL_dK==None:
if X2==None:
dL_dK = np.ones((X.shape[0], X.shape[0]))
else:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel
self.add_parameter(kernel)
self.X = X
self.X2 = X2
self.dL_dK = dL_dK
def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<-10*sys.float_info.epsilon):
return False
else:
return True
def log_likelihood(self):
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
class Kern_check_dK_dtheta(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model):
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def parameters_changed(self):
self.kernel.update_gradients_full(self.dL_dK, self.X)
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.remove_parameter(kernel)
self.X = Param('X', self.X)
self.add_parameter(self.X)
def _log_likelihood_gradients(self):
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
"""
This function runs on kernels to check the correctness of their
implementation. It checks that the covariance function is positive definite
for a randomly generated data set.
:param kern: the kernel to be tested.
:type kern: GPy.kern.Kernpart
:param X: X input values to test the covariance function.
:type X: ndarray
:param X2: X2 input values to test the covariance function.
:type X2: ndarray
"""
pass_checks = True
if X==None:
X = np.random.randn(10, kern.input_dim)
if output_ind is not None:
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
if X2==None:
X2 = np.random.randn(20, kern.input_dim)
if output_ind is not None:
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
if verbose:
print("Checking covariance function is positive definite.")
result = Kern_check_model(kern, X=X).is_positive_definite()
if result and verbose:
print("Check passed.")
if not result:
print("Positive definite check failed for " + kern.name + " covariance function.")
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt theta.")
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt X.")
try:
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
return pass_checks

View file

@ -4,13 +4,14 @@
import numpy as np
from scipy import weave
from kernpart import Kernpart
from kern import Kern
from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.caching import Cacher, cache_this
class Linear(Kernpart):
class Linear(Kern):
"""
Linear kernel
@ -45,22 +46,35 @@ class Linear(Kernpart):
variances = np.ones(self.input_dim)
self.variances = Param('variances', variances, Logexp())
self.variances.gradient = np.zeros(self.variances.shape)
self.add_parameter(self.variances)
self.variances.add_observer(self, self.update_variance)
self.variances.add_observer(self, self._on_changed)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2 = np.empty(shape=(2, 1))
def _on_changed(self, obj):
self._notify_observers()
def update_variance(self, v):
self.variances2 = np.square(self.variances)
@cache_this(limit=3, reset_on_self=True)
def K(self, X, X2=None):
if self.ARD:
if X2 is None:
return tdot(X*np.sqrt(self.variances))
else:
rv = np.sqrt(self.variances)
return np.dot(X*rv, (X2*rv).T)
else:
return self._dot_product(X, X2) * self.variances
def on_input_change(self, X):
self._K_computations(X, None)
@cache_this(limit=3, reset_on_self=False)
def _dot_product(self, X, X2=None):
if X2 is None:
return tdot(X)
else:
return np.dot(X, X2.T)
def Kdiag(self, X):
return np.sum(self.variances * np.square(X), -1)
def update_gradients_full(self, dL_dK, X):
self.variances.gradient[:] = 0
self.variances.gradient = np.zeros(self.variances.size)
self._param_grad_helper(dL_dK, X, None, self.variances.gradient)
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
@ -68,7 +82,7 @@ class Linear(Kernpart):
if self.ARD:
self.variances.gradient = tmp.sum(0)
else:
self.variances.gradient = tmp.sum()
self.variances.gradient = np.atleast_1d(tmp.sum())
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient)
@ -85,25 +99,8 @@ class Linear(Kernpart):
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
else: self.variances.gradient += tmp.sum()
#from Kmm
self._K_computations(Z, None)
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
def K(self, X, X2, target):
if self.ARD:
XX = X * np.sqrt(self.variances)
if X2 is None:
target += tdot(XX)
else:
XX2 = X2 * np.sqrt(self.variances)
target += np.dot(XX, XX2.T)
else:
if X is not self._X or X2 is not None:
self._K_computations(X, X2)
target += self.variances * self._dot_product
def Kdiag(self, X, target):
np.add(target, np.sum(self.variances * np.square(X), -1), target)
def _param_grad_helper(self, dL_dK, X, X2, target):
if self.ARD:
if X2 is None:
@ -112,18 +109,16 @@ class Linear(Kernpart):
product = X[:, None, :] * X2[None, :, :]
target += (dL_dK[:, :, None] * product).sum(0).sum(0)
else:
if X is not self._X or X2 is not None:
self._K_computations(X, X2)
target += np.sum(self._dot_product * dL_dK)
target += np.sum(self._dot_product(X, X2) * dL_dK)
def gradients_X(self, dL_dK, X, X2, target):
def gradients_X(self, dL_dK, X, X2=None):
if X2 is None:
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
return 2.*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
else:
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self,dL_dKdiag,X,target):
target += 2.*self.variances*dL_dKdiag[:,None]*X
def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*X
#---------------------------------------#
# PSI statistics #
@ -273,15 +268,15 @@ class Linear(Kernpart):
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, X2):
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
self._X = X.copy()
if X2 is None:
self._dot_product = tdot(param_to_array(X))
self._X2 = None
else:
self._X2 = X2.copy()
self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
#def _K_computations(self, X, X2):
#if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
#self._X = X.copy()
#if X2 is None:
##self._dot_product = tdot(param_to_array(X))
#self._X2 = None
#else:
#self._X2 = X2.copy()
#self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2

65
GPy/kern/_src/prod.py Normal file
View file

@ -0,0 +1,65 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
import numpy as np
class Prod(Kern):
"""
Computes the product of 2 kernels
:param k1, k2: the kernels to multiply
:type k1, k2: Kern
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""
def __init__(self, k1, k2, tensor=False):
if tensor:
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
self.slice1 = slice(0,k1.input_dim)
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
else:
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
self.slice1 = slice(0, self.input_dim)
self.slice2 = slice(0, self.input_dim)
self.k1 = k1
self.k2 = k2
self.add_parameters(self.k1, self.k2)
def K(self, X, X2=None):
if X2 is None:
return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None)
else:
return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2])
def Kdiag(self, X):
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
def update_gradients_full(self, dL_dK, X):
self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1])
self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2])
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] )
self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] )
def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape)
if X2 is None:
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1], None)
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2], None)
else:
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1])
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2])
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
target[:,self.slice1] = self.k1.gradients_X(dL_dKdiag*self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1])
target[:,self.slice2] += self.k2.gradients_X(dL_dKdiag*self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2])
return target

View file

@ -4,13 +4,13 @@
import numpy as np
from scipy import weave
from kernpart import Kernpart
from kern import Kern
from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class RBF(Kernpart):
class RBF(Kern):
"""
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
@ -60,22 +60,8 @@ class RBF(Kernpart):
self.add_parameters(self.variance, self.lengthscale)
self.parameters_changed() # initializes cache
#self.update_inv_lengthscale(self.lengthscale)
#self.parameters_changed()
# initialize cache
#self._Z, self._mu, self._S = np.empty(shape=(3, 1))
#self._X, self._X2, self._params_save = np.empty(shape=(3, 1))
# a set of optional args to pass to weave
# self.weave_options = {'headers' : ['<omp.h>'],
# 'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
# 'extra_link_args' : ['-lgomp']}
self.weave_options = {}
def on_input_change(self, X):
#self._K_computations(X, None)
pass
def update_lengthscale(self, l):
self.lengthscale2 = np.square(self.lengthscale)
@ -84,23 +70,27 @@ class RBF(Kernpart):
self._X, self._X2 = np.empty(shape=(2, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def K(self, X, X2, target):
def K(self, X, X2=None):
self._K_computations(X, X2)
target += self.variance * self._K_dvar
return self.variance * self._K_dvar
def Kdiag(self, X, target):
np.add(target, self.variance, target)
def Kdiag(self, X):
ret = np.ones(X.shape[0])
ret[:] = self.variance
return ret
def psi0(self, Z, mu, S, target):
target += self.variance
def psi0(self, Z, mu, S):
ret = np.empty(mu.shape[0], dtype=np.float64)
ret[:] = self.variance
return ret
def psi1(self, Z, mu, S, target):
def psi1(self, Z, mu, S):
self._psi_computations(Z, mu, S)
target += self._psi1
return self._psi1
def psi2(self, Z, mu, S, target):
def psi2(self, Z, mu, S):
self._psi_computations(Z, mu, S)
target += self._psi2
return self._psi2
def update_gradients_full(self, dL_dK, X):
self._K_computations(X, None)
@ -165,7 +155,38 @@ class RBF(Kernpart):
else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
def gradients_X(self, dL_dK, X, X2, target):
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self._psi_computations(Z, mu, S)
#psi1
denominator = (self.lengthscale2 * (self._psi1_denom))
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
#psi2
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
grad += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
return grad
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self._psi_computations(Z, mu, S)
#psi1
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
return grad_mu, grad_S
def gradients_X(self, dL_dK, X, X2=None):
#if self._X is None or X.base is not self._X.base or X2 is not None:
self._K_computations(X, X2)
if X2 is None:
@ -173,45 +194,16 @@ class RBF(Kernpart):
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
return np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
def dKdiag_dX(self, dL_dKdiag, X):
return np.zeros(X.shape[0])
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
denominator = (self.lengthscale2 * (self._psi1_denom))
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
self._psi_computations(Z, mu, S)
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
#---------------------------------------#
#---------------------------------------#
# Precomputations #
#---------------------------------------#
@ -373,6 +365,7 @@ class RBF(Kernpart):
#include <omp.h>
#include <math.h>
"""
mu = param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)

View file

@ -1,12 +1,12 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
from kern import Kern
import numpy as np
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class White(Kernpart):
class White(Kern):
"""
White noise kernel.
@ -20,14 +20,17 @@ class White(Kernpart):
self.input_dim = input_dim
self.variance = Param('variance', variance, Logexp())
self.add_parameters(self.variance)
self._psi1 = 0 # TODO: more elegance here
def K(self,X,X2,target):
def K(self, X, X2=None):
if X2 is None:
target += np.eye(X.shape[0])*self.variance
return np.eye(X.shape[0])*self.variance
else:
return np.zeros((X.shape[0], X2.shape[0]))
def Kdiag(self,X,target):
target += self.variance
def Kdiag(self,X):
ret = np.ones(X.shape[0])
ret[:] = self.variance
return ret
def update_gradients_full(self, dL_dK, X):
self.variance.gradient = np.trace(dL_dK)
@ -38,14 +41,8 @@ class White(Kernpart):
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.sum(dL_dKdiag)
def gradients_X(self,dL_dK,X,X2,target):
pass
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
def gradients_X(self,dL_dK,X,X2):
return np.zeros_like(X)
def psi0(self,Z,mu,S,target):
pass # target += self.variance

View file

@ -1,680 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np
import itertools
from parts.prod import Prod as prod
from parts.linear import Linear
from parts.kernpart import Kernpart
from ..core.parameterization import Parameterized
from GPy.core.parameterization.param import Param
class kern(Parameterized):
def __init__(self, input_dim, parts=[], input_slices=None):
"""
This is the main kernel class for GPy. It handles multiple
(additive) kernel functions, and keeps track of various things
like which parameters live where.
The technical code for kernels is divided into _parts_ (see
e.g. rbf.py). This object contains a list of parts, which are
computed additively. For multiplication, special _prod_ parts
are used.
:param input_dim: The dimensionality of the kernel's input space
:type input_dim: int
:param parts: the 'parts' (PD functions) of the kernel
:type parts: list of Kernpart objects
:param input_slices: the slices on the inputs which apply to each kernel
:type input_slices: list of slice objects, or list of bools
"""
super(kern, self).__init__('kern')
self.add_parameters(*parts)
self.input_dim = input_dim
if input_slices is None:
self.input_slices = [slice(None) for p in self._parameters_]
else:
assert len(input_slices) == len(self._parameters_)
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
for p in self._parameters_:
assert isinstance(p, Kernpart), "bad kernel part"
def parameters_changed(self):
[p.parameters_changed() for p in self._parameters_]
def connect_input(self, Xparam):
[p.connect_input(Xparam) for p in self._parameters_]
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return Parameterized._getstate(self) + [#self._parameters_,
#self.num_params,
self.input_dim,
self.input_slices,
self._param_slices_
]
def _setstate(self, state):
self._param_slices_ = state.pop()
self.input_slices = state.pop()
self.input_dim = state.pop()
#self.num_params = state.pop()
#self._parameters_ = state.pop()
Parameterized._setstate(self, state)
def plot_ARD(self, *args):
"""If an ARD kernel is present, plot a bar representation using matplotlib
See GPy.plotting.matplot_dep.plot_ARD
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import kernel_plots
return kernel_plots.plot_ARD(self,*args)
# def _transform_gradients(self, g):
# """
# Apply the transformations of the kernel so that the returned vector
# represents the gradient in the transformed space (i.e. that given by
# get_params_transformed())
#
# :param g: the gradient vector for the current model, usually created by _param_grad_helper
# """
# x = self._get_params()
# [np.place(g, index, g[index] * constraint.gradfactor(x[index]))
# for constraint, index in self.constraints.iteritems() if constraint is not __fixed__]
# # for constraint, index in self.constraints.iteritems():
# # if constraint != __fixed__:
# # g[index] = g[index] * constraint.gradfactor(x[index])
# #[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
# [np.put(g, i, v) for i, v in [[i, t.sum()] for p in self._parameters_ for t,i in p._tied_to_me_.iteritems()]]
# # if len(self.tied_indices) or len(self.fixed_indices):
# # to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
# # return np.delete(g, to_remove)
# # else:
# if self._fixes_ is not None: return g[self._fixes_]
# return g
# x = self._get_params()
# [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
# [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
# if len(self.tied_indices) or len(self.fixed_indices):
# to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
# return np.delete(g, to_remove)
# else:
# return g
def __add__(self, other):
""" Overloading of the '+' operator. for more control, see self.add """
return self.add(other)
def add(self, other, tensor=False):
"""
Add another kernel to this one.
If Tensor is False, both kernels are defined on the same _space_. then
the created kernel will have the same number of inputs as self and
other (which must be the same).
If Tensor is True, then the dimensions are stacked 'horizontally', so
that the resulting kernel has self.input_dim + other.input_dim
:param other: the other kernel to be added
:type other: GPy.kern
"""
if tensor:
D = self.input_dim + other.input_dim
self_input_slices = [slice(*sl.indices(self.input_dim)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices]
newkern = kern(D, self._parameters_ + other._parameters_, self_input_slices + other_input_slices)
# transfer constraints:
# newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices]
# newkern.constraints = self.constraints + other.constraints
# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
# newkern.fixed_values = self.fixed_values + other.fixed_values
# newkern.constraints = self.constraints + other.constraints
# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
else:
assert self.input_dim == other.input_dim
newkern = kern(self.input_dim, self._parameters_ + other._parameters_, self.input_slices + other.input_slices)
# transfer constraints:
# newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices]
# newkern.constraints = self.constraints + other.constraints
# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
# newkern.fixed_values = self.fixed_values + other.fixed_values
# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
[newkern.constraints.add(transform, ind) for transform, ind in self.constraints.iteritems()]
[newkern.constraints.add(transform, ind+self.size) for transform, ind in other.constraints.iteritems()]
newkern._fixes_ = ((self._fixes_ or 0) + (other._fixes_ or 0)) or None
return newkern
def __call__(self, X, X2=None):
return self.K(X, X2)
def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
def __pow__(self, other, tensor=False):
"""
Shortcut for tensor `prod`.
"""
return self.prod(other, tensor=True)
def prod(self, other, tensor=False):
"""
Multiply two kernels (either on the same space, or on the tensor product of the input space).
:param other: the other kernel to be added
:type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool
"""
K1 = self
K2 = other
#K1 = self.copy()
#K2 = other.copy()
slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.input_dim, [False] * K2.input_dim
s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2]
newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1._parameters_, K2._parameters_)]
if tensor:
newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices)
else:
newkern = kern(K1.input_dim, newkernparts, slices)
#newkern._follow_constrains(K1, K2)
return newkern
# def _follow_constrains(self, K1, K2):
#
# # Build the array that allows to go from the initial indices of the param to the new ones
# K1_param = []
# n = 0
# for k1 in K1.parts:
# K1_param += [range(n, n + k1.num_params)]
# n += k1.num_params
# n = 0
# K2_param = []
# for k2 in K2.parts:
# K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)]
# n += k2.num_params
# index_param = []
# for p1 in K1_param:
# for p2 in K2_param:
# index_param += p1 + p2
# index_param = np.array(index_param)
#
# # Get the ties and constrains of the kernels before the multiplication
# prev_ties = K1.tied_indices + [arr + K1.num_params for arr in K2.tied_indices]
#
# prev_constr_ind = [K1.constrained_indices] + [K1.num_params + i for i in K2.constrained_indices]
# prev_constr = K1.constraints + K2.constraints
#
# # prev_constr_fix = K1.fixed_indices + [arr + K1.num_params for arr in K2.fixed_indices]
# # prev_constr_fix_values = K1.fixed_values + K2.fixed_values
#
# # follow the previous ties
# for arr in prev_ties:
# for j in arr:
# index_param[np.where(index_param == j)[0]] = arr[0]
#
# # ties and constrains
# for i in range(K1.num_params + K2.num_params):
# index = np.where(index_param == i)[0]
# if index.size > 1:
# self.tie_params(index)
# for i, t in zip(prev_constr_ind, prev_constr):
# self.constrain(np.where(index_param == i)[0], t)
#
# def _get_params(self):
# return np.hstack(self._parameters_)
# return np.hstack([p._get_params() for p in self._parameters_])
# def _set_params(self, x):
# import ipdb;ipdb.set_trace()
# [p._set_params(x[s]) for p, s in zip(self._parameters_, self._param_slices_)]
# def _get_param_names(self):
# # this is a bit nasty: we want to distinguish between parts with the same name by appending a count
# part_names = np.array([k.name for k in self._parameters_], dtype=np.str)
# counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)]
# cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)]
# names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)]
#
# return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self._parameters_)], [])
def K(self, X, X2=None, which_parts='all'):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handles this as X2 == X.
:param which_parts: a list of booleans detailing whether to include
each of the part functions. By default, 'all'
indicates all parts
"""
if which_parts == 'all':
which_parts = [True] * self.size
assert X.shape[1] == self.input_dim
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
else:
target = np.zeros((X.shape[0], X2.shape[0]))
[p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
return target
def update_gradients_full(self, dL_dK, X):
[p.update_gradients_full(dL_dK, X) for p in self._parameters_]
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_]
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
def _param_grad_helper(self, dL_dK, X, X2=None):
"""
Compute the gradient of the covariance function with respect to the parameters.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: Np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)
returns: dL_dtheta
"""
assert X.shape[1] == self.input_dim
target = np.zeros(self.size)
if X2 is None:
[p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
else:
[p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
return self._transform_gradients(target)
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros_like(X)
if X2 is None:
[p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
else:
[p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def Kdiag(self, X, which_parts='all'):
"""Compute the diagonal of the covariance function for inputs X."""
if which_parts == 'all':
which_parts = [True] * self.size
assert X.shape[1] == self.input_dim
target = np.zeros(X.shape[0])
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on]
return target
def dKdiag_dtheta(self, dL_dKdiag, X):
"""Compute the gradient of the diagonal of the covariance function with respect to the parameters."""
assert X.shape[1] == self.input_dim
assert dL_dKdiag.size == X.shape[0]
target = np.zeros(self.size)
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)]
return self._transform_gradients(target)
def dKdiag_dX(self, dL_dKdiag, X):
assert X.shape[1] == self.input_dim
target = np.zeros_like(X)
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def psi0(self, Z, mu, S):
target = np.zeros(mu.shape[0])
[p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
target = np.zeros(self.size)
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
return self._transform_gradients(target)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
[p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target_mu, target_S
def psi1(self, Z, mu, S):
target = np.zeros((mu.shape[0], Z.shape[0]))
[p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
target = np.zeros((self.size))
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
return self._transform_gradients(target)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
"""return shapes are num_samples,num_inducing,input_dim"""
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target_mu, target_S
def psi2(self, Z, mu, S):
"""
Computer the psi2 statistics for the covariance function.
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
"""
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
# compute the "cross" terms
# TODO: input_slices needed
crossterms = 0
for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2):
if i_s1 == i_s2:
# TODO psi1 this must be faster/better/precached/more nice
tmp1 = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1)
tmp2 = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2)
prod = np.multiply(tmp1, tmp2)
crossterms += prod[:, :, None] + prod[:, None, :]
target += crossterms
return target
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
"""Gradient of the psi2 statistics with respect to the parameters."""
target = np.zeros(self.size)
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)]
# compute the "cross" terms
# TODO: better looping, input_slices
for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2):
p1, p2 = self._parameters_[i1], self._parameters_[i2]
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2]
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2])
return self._transform_gradients(target)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
# target *= 2
# compute the "cross" terms
# TODO: we need input_slices here.
for p1, p2 in itertools.permutations(self._parameters_, 2):
# if p1.name == 'linear' and p2.name == 'linear':
# raise NotImplementedError("We don't handle linear/linear cross-terms")
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target)
return target * 2
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
# compute the "cross" terms
# TODO: we need input_slices here.
for p1, p2 in itertools.permutations(self._parameters_, 2):
# if p1.name == 'linear' and p2.name == 'linear':
# raise NotImplementedError("We don't handle linear/linear cross-terms")
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S)
return target_mu, target_S
def plot(self, *args, **kwargs):
"""
See GPy.plotting.matplot_dep.plot
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import kernel_plots
kernel_plots.plot(self,*args)
from GPy.core.model import Model
class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Model.__init__(self, 'kernel_test_model')
num_samples = 20
num_samples2 = 10
if kernel==None:
kernel = GPy.kern.rbf(1)
if X==None:
X = np.random.randn(num_samples, kernel.input_dim)
if dL_dK==None:
if X2==None:
dL_dK = np.ones((X.shape[0], X.shape[0]))
else:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel
self.add_parameter(kernel)
self.X = X
self.X2 = X2
self.dL_dK = dL_dK
def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<-10*sys.float_info.epsilon):
return False
else:
return True
def log_likelihood(self):
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
class Kern_check_dK_dtheta(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model):
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def parameters_changed(self):
self.kernel.update_gradients_full(self.dL_dK, self.X)
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.remove_parameter(kernel)
self.X = Param('X', self.X)
self.add_parameter(self.X)
def _log_likelihood_gradients(self):
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
"""
This function runs on kernels to check the correctness of their
implementation. It checks that the covariance function is positive definite
for a randomly generated data set.
:param kern: the kernel to be tested.
:type kern: GPy.kern.Kernpart
:param X: X input values to test the covariance function.
:type X: ndarray
:param X2: X2 input values to test the covariance function.
:type X2: ndarray
"""
pass_checks = True
if X==None:
X = np.random.randn(10, kern.input_dim)
if output_ind is not None:
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
if X2==None:
X2 = np.random.randn(20, kern.input_dim)
if output_ind is not None:
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
if verbose:
print("Checking covariance function is positive definite.")
result = Kern_check_model(kern, X=X).is_positive_definite()
if result and verbose:
print("Check passed.")
if not result:
print("Positive definite check failed for " + kern.name + " covariance function.")
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt theta.")
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt X.")
try:
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
return pass_checks

View file

@ -1,29 +0,0 @@
import bias
import Brownian
import coregionalize
import exponential
import eq_ode1
import finite_dimensional
import fixed
import gibbs
import hetero
import hierarchical
import independent_outputs
import linear
import Matern32
import Matern52
import mlp
import ODE_1
import periodic_exponential
import periodic_Matern32
import periodic_Matern52
import poly
import prod_orthogonal
import prod
import rational_quadratic
import rbfcos
import rbf
import rbf_inv
import spline
import symmetric
import white

View file

@ -1,176 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#from ...core.parameterized.Parameterized import set_as_parameter
from ...core.parameterization import Parameterized
class Kernpart(Parameterized):
def __init__(self,input_dim,name):
"""
The base class for a kernpart: a positive definite function
which forms part of a covariance function (kernel).
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
super(Kernpart, self).__init__(name)
# the input dimensionality for the covariance
self.input_dim = input_dim
# the number of optimisable parameters
# the name of the covariance function.
# link to parameterized objects
#self._X = None
def connect_input(self, X):
X.add_observer(self, self.on_input_change)
#self._X = X
def on_input_change(self, X):
"""
During optimization this function will be called when
the inputs X changed. Use this to update caches dependent
on the inputs X.
"""
# overwrite this to update kernel when inputs X change
pass
# def set_as_parameter_named(self, name, gradient, index=None, *args, **kwargs):
# """
# :param names: name of parameter to set as parameter
# :param gradient: gradient method to get the gradient of this parameter
# :param index: index of where to place parameter in printing
# :param args, kwargs: additional arguments to gradient
#
# Convenience method to connect Kernpart parameters:
# parameter with name (attribute of this Kernpart) will be set as parameter with following name:
#
# kernel_name + _ + parameter_name
#
# To add the kernels name to the parameter name use this method to
# add parameters.
# """
# self.set_as_parameter(name, getattr(self, name), gradient, index, *args, **kwargs)
# def set_as_parameter(self, name, array, gradient, index=None, *args, **kwargs):
# """
# See :py:func:`GPy.core.parameterized.Parameterized.set_as_parameter`
#
# Note: this method adds the kernels name in front of the parameter.
# """
# p = Param(self.name+"_"+name, array, gradient, *args, **kwargs)
# if index is None:
# self._parameters_.append(p)
# else:
# self._parameters_.insert(index, p)
# self.__dict__[name] = p
#set_as_parameter.__doc__ += set_as_parameter.__doc__ # @UndefinedVariable
# def _get_params(self):
# raise NotImplementedError
# def _set_params(self,x):
# raise NotImplementedError
# def _get_param_names(self):
# raise NotImplementedError
def K(self,X,X2,target):
raise NotImplementedError
def Kdiag(self,X,target):
raise NotImplementedError
def _param_grad_helper(self,dL_dK,X,X2,target):
raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target):
# In the base case compute this by calling _param_grad_helper. Need to
# override for stationary covariances (for example) to save
# time.
for i in range(X.shape[0]):
self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
def psi0(self,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi1(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dtheta(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi2(self,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2, target):
raise NotImplementedError
def dKdiag_dX(self, dL_dK, X, target):
raise NotImplementedError
def update_gradients_full(self, dL_dK, X):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
"""Set the gradients of all parameters when doing sparse (M) inference."""
raise NotImplementedError
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
raise NotImplementedError
class Kernpart_stationary(Kernpart):
def __init__(self, input_dim, lengthscale=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not ARD:
self.num_params = 2
if lengthscale is not None:
self.lengthscale = np.asarray(lengthscale)
assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
self.lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
if lengthscale is not None:
self.lengthscale = np.asarray(lengthscale)
assert self.lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
self.lengthscale = np.ones(self.input_dim)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))
def _set_params(self, x):
self.lengthscale = x
self.lengthscale2 = np.square(self.lengthscale)
# reset cached results
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def dKdiag_dtheta(self, dL_dKdiag, X, target):
# For stationary covariances, derivative of diagonal elements
# wrt lengthscale is 0.
target[0] += np.sum(dL_dKdiag)
def dKdiag_dX(self, dL_dK, X, target):
pass # true for all stationary kernels
class Kernpart_inner(Kernpart):
def __init__(self,input_dim):
"""
The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs.
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
Kernpart.__init__(self, input_dim)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))

View file

@ -1,125 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
from coregionalize import Coregionalize
import numpy as np
import hashlib
class Prod(Kernpart):
"""
Computes the product of 2 kernels
:param k1, k2: the kernels to multiply
:type k1, k2: Kernpart
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""
def __init__(self,k1,k2,tensor=False):
if tensor:
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
self.slice1 = slice(0,k1.input_dim)
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
else:
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
self.slice1 = slice(0,self.input_dim)
self.slice2 = slice(0,self.input_dim)
self.k1 = k1
self.k2 = k2
self.add_parameters(self.k1, self.k2)
#initialize cache
self._X, self._X2 = np.empty(shape=(2,1))
self._params = None
def K(self,X,X2,target):
self._K_computations(X,X2)
target += self._K1 * self._K2
def K1(self,X, X2):
"""Compute the part of the kernel associated with k1."""
self._K_computations(X, X2)
return self._K1
def K2(self, X, X2):
"""Compute the part of the kernel associated with k2."""
self._K_computations(X, X2)
return self._K2
def update_gradients_full(self, dL_dK, X):
self._K_computations(X, None)
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
def _param_grad_helper(self,dL_dK,X,X2,target):
"""Derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2)
if X2 is None:
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
else:
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros(X.shape[0])
target2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],target1)
self.k2.Kdiag(X[:,self.slice2],target2)
target += target1 * target2
def dKdiag_dtheta(self,dL_dKdiag,X,target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
self._K_computations(X,X2)
if X2 is None:
if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize):
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize):
#NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei]
X2 = X
self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
else:
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._params == self._get_params().copy()
if X2 is None:
self._X2 = None
self._K1 = np.zeros((X.shape[0],X.shape[0]))
self._K2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X[:,self.slice1],None,self._K1)
self.k2.K(X[:,self.slice2],None,self._K2)
else:
self._X2 = X2.copy()
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)

View file

@ -57,26 +57,16 @@ class BayesianGPLVM(SparseGP, GPLVM):
self.init = state.pop()
SparseGP._setstate(self, state)
def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
return dL_dmu, dL_dS
def KL_divergence(self):
var_mean = np.square(self.X).sum()
var_S = np.sum(self.X_variance - np.log(self.X_variance))
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
self._update_gradients_Z(add=False)
super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.KL_divergence()
dL_dmu, dL_dS = self.dL_dmuS()
dL_dmu, dL_dS = self.kern.gradients_muS_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict)
# dL:
self.q.mean.gradient = dL_dmu

View file

@ -23,7 +23,7 @@ class GPRegression(GP):
def __init__(self, X, Y, kernel=None):
if kernel is None:
kernel = kern.rbf(X.shape[1])
kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Gaussian()

View file

@ -7,7 +7,7 @@ from GPy.util.linalg import PCA
import numpy
import itertools
import pylab
from GPy.kern.kern import kern
from GPy.kern import Kern
from GPy.models.bayesian_gplvm import BayesianGPLVM
class MRD2(Model):
@ -64,11 +64,11 @@ class MRD(Model):
# sort out the kernels
if kernels is None:
kernels = [None] * len(likelihood_or_Y_list)
elif isinstance(kernels, kern):
elif isinstance(kernels, Kern):
kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))]
else:
assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output"
assert all([isinstance(k, kern) for k in kernels]), "invalid kernel object detected!"
assert all([isinstance(k, Kern) for k in kernels]), "invalid kernel object detected!"
assert not ('kernel' in kw), "pass kernels through `kernels` argument"
self.input_dim = input_dim

View file

@ -7,7 +7,7 @@ import pylab as pb
import Tango
from matplotlib.textpath import TextPath
from matplotlib.transforms import offset_copy
from ...kern.parts.linear import Linear
from ...kern import Linear
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):

View file

@ -9,7 +9,7 @@ from ...util.misc import param_to_array
def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', which_parts='all', fixed_inputs=[],
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
@ -20,7 +20,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
Can plot only part of the data and part of the posterior functions
using which_data_rowsm which_data_ycols and which_parts
using which_data_rowsm which_data_ycols.
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
@ -28,8 +28,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
@ -76,12 +74,12 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#make a prediction on the frame and plot it
if plot_raw:
m, v = model._raw_predict(Xgrid, which_parts=which_parts)
m, v = model._raw_predict(Xgrid)
lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v)
Y = model.Y
else:
m, v, lower, upper = model.predict(Xgrid, which_parts=which_parts)
m, v, lower, upper = model.predict(Xgrid)
Y = model.Y
for d in which_data_ycols:
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
@ -89,7 +87,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#optionally plot some samples
if samples: #NOTE not tested with fixed_inputs
Ysim = model.posterior_samples(Xgrid, samples, which_parts=which_parts)
Ysim = model.posterior_samples(Xgrid, samples)
for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
@ -131,10 +129,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#predict on the frame and plot
if plot_raw:
m, _ = model._raw_predict(Xgrid, which_parts=which_parts)
m, _ = model._raw_predict(Xgrid)
Y = model.Y
else:
m, _, _, _ = model.predict(Xgrid, which_parts=which_parts)
m, _, _, _ = model.predict(Xgrid)
Y = model.data
for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T

View file

@ -1,44 +1,87 @@
from ..core.parameterization.array_core import ObservableArray, ParamList
from ..core.parameterization.parameter_core import Observable
from ..core.parameterization.array_core import ParamList
class Cacher(object):
def __init__(self, operation, limit=5):
def __init__(self, operation, limit=5, reset_on_first=False):
self.limit = int(limit)
self._reset_on_first = reset_on_first
self.operation=operation
self.cached_inputs = ParamList([])
self.cached_inputs = []
self.cached_outputs = []
self.inputs_changed = []
def __call__(self, X):
assert isinstance(X, ObservableArray)
if X in self.cached_inputs:
i = self.cached_inputs.index(X)
def __call__(self, *args):
if self._reset_on_first:
assert isinstance(args[0], Observable)
args[0].add_observer(args[0], self.reset)
cached_args = args
else:
cached_args = args[1:]
if not all([isinstance(arg, Observable) for arg in cached_args]):
return self.operation(*args)
if cached_args in self.cached_inputs:
i = self.cached_inputs.index(cached_args)
if self.inputs_changed[i]:
self.cached_outputs[i] = self.operation(X)
self.cached_outputs[i] = self.operation(*args)
self.inputs_changed[i] = False
return self.cached_outputs[i]
else:
if len(self.cached_inputs) == self.limit:
X_ = self.cached_inputs.pop(0)
X_.remove_observer(self)
args_ = self.cached_inputs.pop(0)
[a.remove_observer(self) for a in args_]
self.inputs_changed.pop(0)
self.cached_outputs.pop(0)
self.cached_inputs.append(X)
self.cached_outputs.append(self.operation(X))
self.cached_inputs.append(cached_args)
self.cached_outputs.append(self.operation(*args))
self.inputs_changed.append(False)
X.add_observer(self, self.on_cache_changed)
[a.add_observer(self, self.on_cache_changed) for a in args]
return self.cached_outputs[-1]
def on_cache_changed(self, X):
#print id(X)
Xbase = X
while Xbase is not None:
try:
i = self.cached_inputs.index(X)
break
except ValueError:
Xbase = X.base
continue
self.inputs_changed[i] = True
def on_cache_changed(self, arg):
self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)]
def reset(self, obj):
[[a.remove_observer(self) for a in args] for args in self.cached_inputs]
self.cached_inputs = []
self.cached_outputs = []
self.inputs_changed = []
def cache_this(limit=5, reset_on_self=False):
def limited_cache(f):
c = Cacher(f, limit, reset_on_first=reset_on_self)
def f_wrap(*args):
return c(*args)
f_wrap._cacher = c
return f_wrap
return limited_cache
#Xbase = X
#while Xbase is not None:
#try:
#i = self.cached_inputs.index(X)
#break
#except ValueError:
#Xbase = X.base
#continue
#self.inputs_changed[i] = True

View file

@ -513,8 +513,8 @@ def toy_rbf_1d(seed=default_seed, num_samples=500):
num_in = 1
X = np.random.uniform(low= -1.0, high=1.0, size=(num_samples, num_in))
X.sort(axis=0)
rbf = GPy.kern.rbf(num_in, variance=1., lengthscale=np.array((0.25,)))
white = GPy.kern.white(num_in, variance=1e-2)
rbf = GPy.kern.RBF(num_in, variance=1., lengthscale=np.array((0.25,)))
white = GPy.kern.White(num_in, variance=1e-2)
kernel = rbf + white
K = kernel.K(X)
y = np.reshape(np.random.multivariate_normal(np.zeros(num_samples), K), (num_samples, 1))