merge SSGPLVM into params branch

This commit is contained in:
Zhenwen Dai 2014-02-25 16:12:04 +00:00
commit 9dcf539d23
34 changed files with 595 additions and 1589 deletions

View file

@ -52,10 +52,7 @@ class SparseGP(GP):
self.parameters_changed() self.parameters_changed()
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
if isinstance(self.X, VariationalPosterior): return isinstance(self.X, VariationalPosterior)
return True
else:
return False
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y)

View file

@ -16,7 +16,7 @@ def olympic_marathon_men(optimize=True, plot=True):
m = GPy.models.GPRegression(data['X'], data['Y']) m = GPy.models.GPRegression(data['X'], data['Y'])
# set the lengthscale to be something sensible (defaults to 1) # set the lengthscale to be something sensible (defaults to 1)
m['rbf_lengthscale'] = 10 m.kern.lengthscale = 10.
if optimize: if optimize:
m.optimize('bfgs', max_iters=200) m.optimize('bfgs', max_iters=200)
@ -41,11 +41,10 @@ def coregionalization_toy2(optimize=True, plot=True):
Y = np.vstack((Y1, Y2)) Y = np.vstack((Y1, Y2))
#build the kernel #build the kernel
k1 = GPy.kern.RBF(1) + GPy.kern.bias(1) k1 = GPy.kern.RBF(1) + GPy.kern.Bias(1)
k2 = GPy.kern.coregionalize(2,1) k2 = GPy.kern.Coregionalize(2,1)
k = k1**k2 k = k1**k2
m = GPy.models.GPRegression(X, Y, kernel=k) m = GPy.models.GPRegression(X, Y, kernel=k)
m.constrain_fixed('.*rbf_var', 1.)
if optimize: if optimize:
m.optimize('bfgs', max_iters=100) m.optimize('bfgs', max_iters=100)
@ -86,11 +85,13 @@ def coregionalization_sparse(optimize=True, plot=True):
""" """
#fetch the data from the non sparse examples #fetch the data from the non sparse examples
m = coregionalization_toy2(optimize=False, plot=False) m = coregionalization_toy2(optimize=False, plot=False)
X, Y = m.X, m.likelihood.Y X, Y = m.X, m.Y
k = GPy.kern.RBF(1)**GPy.kern.Coregionalize(2)
#construct a model #construct a model
m = GPy.models.SparseGPRegression(X,Y) m = GPy.models.SparseGPRegression(X,Y, num_inducing=25, kernel=k)
m.constrain_fixed('iip_\d+_1') # don't optimize the inducing input indexes m.Z[:,1].fix() # don't optimize the inducing input indexes
if optimize: if optimize:
m.optimize('bfgs', max_iters=100, messages=1) m.optimize('bfgs', max_iters=100, messages=1)
@ -128,7 +129,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
np.random.randint(0, 4, num_inducing)[:, None])) np.random.randint(0, 4, num_inducing)[:, None]))
k1 = GPy.kern.RBF(1) k1 = GPy.kern.RBF(1)
k2 = GPy.kern.coregionalize(output_dim=5, rank=5) k2 = GPy.kern.Coregionalize(output_dim=5, rank=5)
k = k1**k2 k = k1**k2
m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True) m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True)
@ -322,7 +323,7 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1) kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
else: else:
kernel = GPy.kern.RBF(X.shape[1], ARD=1) kernel = GPy.kern.RBF(X.shape[1], ARD=1)
kernel += GPy.kern.White(X.shape[1]) + GPy.kern.bias(X.shape[1]) kernel += GPy.kern.White(X.shape[1]) + GPy.kern.Bias(X.shape[1])
m = GPy.models.GPRegression(X, Y, kernel) m = GPy.models.GPRegression(X, Y, kernel)
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25 # len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
# m.set_prior('.*lengthscale',len_prior) # m.set_prior('.*lengthscale',len_prior)
@ -361,7 +362,7 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1) kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
else: else:
kernel = GPy.kern.RBF(X.shape[1], ARD=1) kernel = GPy.kern.RBF(X.shape[1], ARD=1)
#kernel += GPy.kern.bias(X.shape[1]) #kernel += GPy.kern.Bias(X.shape[1])
X_variance = np.ones(X.shape) * 0.5 X_variance = np.ones(X.shape) * 0.5
m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance) m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance)
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25 # len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25

View file

@ -3,7 +3,7 @@ Created on 24 Apr 2013
@author: maxz @author: maxz
''' '''
from GPy.inference.gradient_descent_update_rules import FletcherReeves, \ from gradient_descent_update_rules import FletcherReeves, \
PolakRibiere PolakRibiere
from Queue import Empty from Queue import Empty
from multiprocessing import Value from multiprocessing import Value

View file

@ -3,25 +3,10 @@ from _src.rbf import RBF
from _src.linear import Linear from _src.linear import Linear
from _src.static import Bias, White from _src.static import Bias, White
from _src.brownian import Brownian from _src.brownian import Brownian
from _src.sympykern import Sympykern
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from _src.mlp import MLP from _src.mlp import MLP
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
from _src.independent_outputs import IndependentOutputs from _src.independent_outputs import IndependentOutputs, Hierarchical
from _src.coregionalize import Coregionalize
from _src.ssrbf import SSRBF from _src.ssrbf import SSRBF
#import coregionalize
#import eq_ode1
#import finite_dimensional
#import fixed
#import gibbs
#import hetero
#import hierarchical
#import ODE_1
#import periodic_exponential
#import periodic_Matern32
#import periodic_Matern52
#import poly
#import rbfcos
#import rbf
#import rbf_inv
#import spline
#import symmetric

View file

@ -45,9 +45,6 @@ class Add(Kern):
def update_gradients_full(self, dL_dK, X): def update_gradients_full(self, dL_dK, X):
[p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] [p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X. """Compute the gradient of the objective function with respect to X.
@ -70,13 +67,13 @@ class Add(Kern):
return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
def psi0(self, Z, mu, S): def psi0(self, Z, variational_posterior):
return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)],0) return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)],0)
def psi1(self, Z, mu, S): def psi1(self, Z, variational_posterior):
return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
def psi2(self, Z, mu, S): def psi2(self, Z, variational_posterior):
psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
# compute the "cross" terms # compute the "cross" terms
@ -104,7 +101,7 @@ class Add(Kern):
raise NotImplementedError, "psi2 cannot be computed for this kernel" raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2 return psi2
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z):
from white import White from white import White
from rbf import RBF from rbf import RBF
#from rbf_inv import RBFInv #from rbf_inv import RBFInv

View file

@ -1,568 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kern import kern
import parts
def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False,name='inverse rbf'):
"""
Construct an RBF kernel
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = parts.rbf_inv.RBFInv(input_dim,variance,inv_lengthscale,ARD,name=name)
return kern(input_dim, [part])
def rbf(input_dim,variance=1., lengthscale=None,ARD=False, name='rbf'):
"""
Construct an RBF kernel
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = parts.rbf.RBF(input_dim,variance,lengthscale,ARD, name=name)
return kern(input_dim, [part])
def linear(input_dim,variances=None,ARD=False,name='linear'):
"""
Construct a linear kernel.
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variances:
:type variances: np.ndarray
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = parts.linear.Linear(input_dim,variances,ARD,name=name)
return kern(input_dim, [part])
def mlp(input_dim,variance=1., weight_variance=None,bias_variance=100.,ARD=False):
"""
Construct an MLP kernel
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param weight_scale: the lengthscale of the kernel
:type weight_scale: vector of weight variances for input weights in neural network (length 1 if kernel is isotropic)
:param bias_variance: the variance of the biases in the neural network.
:type bias_variance: float
:param ARD: Auto Relevance Determination (allows for ARD version of covariance)
:type ARD: Boolean
"""
part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD)
return kern(input_dim, [part])
def gibbs(input_dim,variance=1., mapping=None):
"""
Gibbs and MacKay non-stationary covariance function.
.. math::
r = \\sqrt{((x_i - x_j)'*(x_i - x_j))}
k(x_i, x_j) = \\sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
Z = \\sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
Where :math:`l(x)` is a function giving the length scale as a function of space.
This is the non stationary kernel proposed by Mark Gibbs in his 1997
thesis. It is similar to an RBF but has a length scale that varies
with input location. This leads to an additional term in front of
the kernel.
The parameters are :math:`\\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\\sigma^2`
:type variance: float
:param mapping: the mapping that gives the lengthscale across the input space.
:type mapping: GPy.core.Mapping
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
"""
part = parts.gibbs.Gibbs(input_dim,variance,mapping)
return kern(input_dim, [part])
def hetero(input_dim, mapping=None, transform=None):
"""
"""
part = parts.hetero.Hetero(input_dim,mapping,transform)
return kern(input_dim, [part])
def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False):
"""
Construct a polynomial kernel
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param weight_scale: the lengthscale of the kernel
:type weight_scale: vector of weight variances for input weights.
:param bias_variance: the variance of the biases.
:type bias_variance: float
:param degree: the degree of the polynomial
:type degree: int
:param ARD: Auto Relevance Determination (allows for ARD version of covariance)
:type ARD: Boolean
"""
part = parts.poly.POLY(input_dim,variance,weight_variance,bias_variance,degree,ARD)
return kern(input_dim, [part])
def white(input_dim,variance=1.,name='white'):
"""
Construct a white kernel.
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
part = parts.white.White(input_dim,variance,name=name)
return kern(input_dim, [part])
def eq_ode1(output_dim, W=None, rank=1, kappa=None, length_scale=1., decay=None, delay=None):
"""Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
This outputs of this kernel have the form
.. math::
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
:param output_dim: number of outputs driven by latent function.
:type output_dim: int
:param W: sensitivities of each output to the latent driving function.
:type W: ndarray (output_dim x rank).
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
:type rank: int
:param decay: decay rates for the first order system.
:type decay: array of length output_dim.
:param delay: delay between latent force and output response.
:type delay: array of length output_dim.
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
:type kappa: array of length output_dim.
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
"""
part = parts.eq_ode1.Eq_ode1(output_dim, W, rank, kappa, length_scale, decay, delay)
return kern(2, [part])
def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
"""
Construct an exponential kernel
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = parts.exponential.Exponential(input_dim,variance, lengthscale, ARD)
return kern(input_dim, [part])
def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
"""
Construct a Matern 3/2 kernel.
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = parts.Matern32.Matern32(input_dim,variance, lengthscale, ARD)
return kern(input_dim, [part])
def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
"""
Construct a Matern 5/2 kernel.
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = parts.Matern52.Matern52(input_dim, variance, lengthscale, ARD)
return kern(input_dim, [part])
def bias(input_dim, variance=1., name='bias'):
"""
Construct a bias kernel.
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
part = parts.bias.Bias(input_dim, variance, name=name)
return kern(input_dim, [part])
def finite_dimensional(input_dim, F, G, variances=1., weights=None):
"""
Construct a finite dimensional kernel.
:param input_dim: the number of input dimensions
:type input_dim: int
:param F: np.array of functions with shape (n,) - the n basis functions
:type F: np.array
:param G: np.array with shape (n,n) - the Gram matrix associated to F
:type G: np.array
:param variances: np.ndarray with shape (n,)
:type: np.ndarray
"""
part = parts.finite_dimensional.FiniteDimensional(input_dim, F, G, variances, weights)
return kern(input_dim, [part])
def spline(input_dim, variance=1.):
"""
Construct a spline kernel.
:param input_dim: Dimensionality of the kernel
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
part = parts.spline.Spline(input_dim, variance)
return kern(input_dim, [part])
def Brownian(input_dim, variance=1.):
"""
Construct a Brownian motion kernel.
:param input_dim: Dimensionality of the kernel
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
part = parts.Brownian.Brownian(input_dim, variance)
return kern(input_dim, [part])
try:
import sympy as sp
sympy_available = True
except ImportError:
sympy_available = False
if sympy_available:
from parts.sympykern import spkern
from sympy.parsing.sympy_parser import parse_expr
def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.):
"""
Radial Basis Function covariance.
"""
X = sp.symbols('x_:' + str(input_dim))
Z = sp.symbols('z_:' + str(input_dim))
variance = sp.var('variance',positive=True)
if ARD:
lengthscales = sp.symbols('lengthscale_:' + str(input_dim))
dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale%i**2' % (i, i, i) for i in range(input_dim)])
dist = parse_expr(dist_string)
f = variance*sp.exp(-dist/2.)
else:
lengthscale = sp.var('lengthscale',positive=True)
dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)])
dist = parse_expr(dist_string)
f = variance*sp.exp(-dist/(2*lengthscale**2))
return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')])
def eq_sympy(input_dim, output_dim, ARD=False, variance=1., lengthscale=1.):
"""
Exponentiated quadratic with multiple outputs.
"""
real_input_dim = input_dim
if output_dim>1:
real_input_dim -= 1
X = sp.symbols('x_:' + str(real_input_dim))
Z = sp.symbols('z_:' + str(real_input_dim))
scale = sp.var('scale_i scale_j',positive=True)
if ARD:
lengthscales = [sp.var('lengthscale%i_i lengthscale%i_j' % i, positive=True) for i in range(real_input_dim)]
shared_lengthscales = [sp.var('shared_lengthscale%i' % i, positive=True) for i in range(real_input_dim)]
dist_string = ' + '.join(['(x_%i-z_%i)**2/(shared_lengthscale%i**2 + lengthscale%i_i*lengthscale%i_j)' % (i, i, i) for i in range(real_input_dim)])
dist = parse_expr(dist_string)
f = variance*sp.exp(-dist/2.)
else:
lengthscale = sp.var('lengthscale_i lengthscale_j',positive=True)
shared_lengthscale = sp.var('shared_lengthscale',positive=True)
dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(real_input_dim)])
dist = parse_expr(dist_string)
f = scale_i*scale_j*sp.exp(-dist/(2*(shared_lengthscale**2 + lengthscale_i*lengthscale_j)))
return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')])
def sympykern(input_dim, k=None, output_dim=1, name=None, param=None):
"""
A base kernel object, where all the hard work in done by sympy.
:param k: the covariance function
:type k: a positive definite sympy function of x1, z1, x2, z2...
To construct a new sympy kernel, you'll need to define:
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
- that's it! we'll extract the variables from the function k.
Note:
- to handle multiple inputs, call them x1, z1, etc
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
"""
return kern(input_dim, [spkern(input_dim, k=k, output_dim=output_dim, name=name, param=param)])
del sympy_available
def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
"""
Construct an periodic exponential kernel
:param input_dim: dimensionality, only defined for input_dim=1
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
"""
part = parts.periodic_exponential.PeriodicExponential(input_dim, variance, lengthscale, period, n_freq, lower, upper)
return kern(input_dim, [part])
def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
"""
Construct a periodic Matern 3/2 kernel.
:param input_dim: dimensionality, only defined for input_dim=1
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
"""
part = parts.periodic_Matern32.PeriodicMatern32(input_dim, variance, lengthscale, period, n_freq, lower, upper)
return kern(input_dim, [part])
def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
"""
Construct a periodic Matern 5/2 kernel.
:param input_dim: dimensionality, only defined for input_dim=1
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
"""
part = parts.periodic_Matern52.PeriodicMatern52(input_dim, variance, lengthscale, period, n_freq, lower, upper)
return kern(input_dim, [part])
def prod(k1,k2,tensor=False):
"""
Construct a product kernel over input_dim from two kernels over input_dim
:param k1, k2: the kernels to multiply
:type k1, k2: kernpart
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""
part = parts.prod.Prod(k1, k2, tensor)
return kern(part.input_dim, [part])
def symmetric(k):
"""
Construct a symmetric kernel from an existing kernel
"""
k_ = k.copy()
k_.parts = [symmetric.Symmetric(p) for p in k.parts]
return k_
def coregionalize(output_dim,rank=1, W=None, kappa=None):
"""
Coregionlization matrix B, of the form:
.. math::
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
An intrinsic/linear coregionalization kernel of the form:
.. math::
k_2(x, y)=\mathbf{B} k(x, y)
it is obtainded as the tensor product between a kernel k(x,y) and B.
:param output_dim: the number of outputs to corregionalize
:type output_dim: int
:param rank: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
:type rank: int
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
:type W: numpy array of dimensionality (num_outpus, rank)
:param kappa: a vector which allows the outputs to behave independently
:type kappa: numpy array of dimensionality (output_dim,)
:rtype: kernel object
"""
p = parts.coregionalize.Coregionalize(output_dim,rank,W,kappa)
return kern(1,[p])
def rational_quadratic(input_dim, variance=1., lengthscale=1., power=1.):
"""
Construct rational quadratic kernel.
:param input_dim: the number of input dimensions
:type input_dim: int (input_dim=1 is the only value currently supported)
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the lengthscale :math:`\ell`
:type lengthscale: float
:rtype: kern object
"""
part = parts.rational_quadratic.RationalQuadratic(input_dim, variance, lengthscale, power)
return kern(input_dim, [part])
def fixed(input_dim, K, variance=1.):
"""
Construct a Fixed effect kernel.
:param input_dim: the number of input dimensions
:type input_dim: int (input_dim=1 is the only value currently supported)
:param K: the variance :math:`\sigma^2`
:type K: np.array
:param variance: kernel variance
:type variance: float
:rtype: kern object
"""
part = parts.fixed.Fixed(input_dim, K, variance)
return kern(input_dim, [part])
def rbfcos(input_dim, variance=1., frequencies=None, bandwidths=None, ARD=False):
"""
construct a rbfcos kernel
"""
part = parts.rbfcos.RBFCos(input_dim, variance, frequencies, bandwidths, ARD)
return kern(input_dim, [part])
def independent_outputs(k):
"""
Construct a kernel with independent outputs from an existing kernel
"""
for sl in k.input_slices:
assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
_parts = [parts.independent_outputs.IndependentOutputs(p) for p in k.parts]
return kern(k.input_dim+1,_parts)
def hierarchical(k):
"""
TODO This can't be right! Construct a kernel with independent outputs from an existing kernel
"""
# for sl in k.input_slices:
# assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
_parts = [parts.hierarchical.Hierarchical(k.parts)]
return kern(k.input_dim+len(k.parts),_parts)
def build_lcm(input_dim, output_dim, kernel_list = [], rank=1,W=None,kappa=None):
"""
Builds a kernel of a linear coregionalization model
:input_dim: Input dimensionality
:output_dim: Number of outputs
:kernel_list: List of coregionalized kernels, each element in the list will be multiplied by a different corregionalization matrix
:type kernel_list: list of GPy kernels
:param rank: number tuples of the corregionalization parameters 'coregion_W'
:type rank: integer
..note the kernels dimensionality is overwritten to fit input_dim
"""
for k in kernel_list:
if k.input_dim <> input_dim:
k.input_dim = input_dim
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
k_coreg = coregionalize(output_dim,rank,W,kappa)
kernel = kernel_list[0]**k_coreg.copy()
for k in kernel_list[1:]:
k_coreg = coregionalize(output_dim,rank,W,kappa)
kernel += k**k_coreg.copy()
return kernel
def ODE_1(input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
"""
kernel resultiong from a first order ODE with OU driving GP
:param input_dim: the number of input dimension, has to be equal to one
:type input_dim: int
:param varianceU: variance of the driving GP
:type varianceU: float
:param lengthscaleU: lengthscale of the driving GP
:type lengthscaleU: float
:param varianceY: 'variance' of the transfer function
:type varianceY: float
:param lengthscaleY: 'lengthscale' of the transfer function
:type lengthscaleY: float
:rtype: kernel object
"""
part = parts.ODE_1.ODE_1(input_dim, varianceU, varianceY, lengthscaleU, lengthscaleY)
return kern(input_dim, [part])

View file

@ -129,7 +129,7 @@ class Coregionalize(Kern):
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
index = np.asarray(X, dtype=np.int).flatten() index = np.asarray(X, dtype=np.int).flatten()
dL_dKdiag_small = np.array([dL_dKdiag[index==i] for i in xrange(output_dim)]) dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in xrange(self.output_dim)])
self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None] self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None]
self.kappa.gradient = dL_dKdiag_small self.kappa.gradient = dL_dKdiag_small

View file

@ -102,7 +102,7 @@ class IndependentOutputs(Kern):
[[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices] [[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices]
self.kern._set_gradient(target) self.kern._set_gradient(target)
def Hierarchical(kern_f, kern_g, name='hierarchy'): class Hierarchical(Kern):
""" """
A kernel which can reopresent a simple hierarchical model. A kernel which can reopresent a simple hierarchical model.
@ -110,10 +110,51 @@ def Hierarchical(kern_f, kern_g, name='hierarchy'):
series across irregularly sampled replicates and clusters" series across irregularly sampled replicates and clusters"
http://www.biomedcentral.com/1471-2105/14/252 http://www.biomedcentral.com/1471-2105/14/252
The index of the functions is given by the last column in the input X The index of the functions is given by additional columns in the input X.
the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
""" """
assert kern_f.input_dim == kern_g.input_dim def __init__(self, kerns, name='hierarchy'):
return kern_f + IndependentOutputs(kern_g) assert all([k.input_dim==kerns[0].input_dim for k in kerns])
super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
self.kerns = kerns
self.add_parameters(self.kerns)
def K(self,X ,X2=None):
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)]
K = self.kerns[0].K(X, X2)
if X2 is None:
[[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)]
else:
X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(self.kerns[1:], slices, slices2)]
return target
def Kdiag(self,X):
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)]
K = self.kerns[0].K(X, X2)
[[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)]
return target
def update_gradients_full(self,dL_dK,X,X2=None):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
self.kerns[0].update_gradients_full(dL_dK, X, None)
for k, slices_k in zip(self.kerns[1:], slices):
target = np.zeros(k.size)
def collate_grads(dL, X, X2):
k.update_gradients_full(dL,X,X2)
k._collect_gradient(target)
[[k.update_gradients_full(dL_dK[s,s], X[s], None) for s in slices_i] for slices_i in slices_k]
k._set_gradient(target)
else:
X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1])
self.kerns[0].update_gradients_full(dL_dK, X, None)
for k, slices_k in zip(self.kerns[1:], slices):
target = np.zeros(k.size)
def collate_grads(dL, X, X2):
k.update_gradients_full(dL,X,X2)
k._collect_gradient(target)
[[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
k._set_gradient(target)

View file

@ -26,17 +26,17 @@ class Kern(Parameterized):
raise NotImplementedError raise NotImplementedError
def Kdiag(self, Xa): def Kdiag(self, Xa):
raise NotImplementedError raise NotImplementedError
def psi0(self,Z,posterior_variational): def psi0(self,Z,variational_posterior):
raise NotImplementedError raise NotImplementedError
def psi1(self,Z,posterior_variational): def psi1(self,Z,variational_posterior):
raise NotImplementedError raise NotImplementedError
def psi2(self,Z,posterior_variational): def psi2(self,Z,variational_posterior):
raise NotImplementedError raise NotImplementedError
def gradients_X(self, dL_dK, X, X2): def gradients_X(self, dL_dK, X, X2):
raise NotImplementedError raise NotImplementedError
def gradients_X_diag(self, dL_dK, X): def gradients_X_diag(self, dL_dK, X):
raise NotImplementedError raise NotImplementedError
def update_gradients_full(self, dL_dK, X): def update_gradients_full(self, dL_dK, X, X2):
"""Set the gradients of all parameters when doing full (N) inference.""" """Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError raise NotImplementedError
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
@ -49,16 +49,16 @@ class Kern(Parameterized):
self._collect_gradient(target) self._collect_gradient(target)
self._set_gradient(target) self._set_gradient(target)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs.""" """Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
raise NotImplementedError raise NotImplementedError
def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
grad = self.gradients_X(dL_dKmm, Z) grad = self.gradients_X(dL_dKmm, Z)
grad += self.gradients_X(dL_dKnm.T, Z, X) grad += self.gradients_X(dL_dKnm.T, Z, X)
return grad return grad
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
raise NotImplementedError raise NotImplementedError
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
raise NotImplementedError raise NotImplementedError
def plot_ARD(self, *args, **kw): def plot_ARD(self, *args, **kw):
@ -124,209 +124,3 @@ class Kern(Parameterized):
assert isinstance(other, Kern), "only kernels can be added to kernels..." assert isinstance(other, Kern), "only kernels can be added to kernels..."
from prod import Prod from prod import Prod
return Prod(self, other, tensor) return Prod(self, other, tensor)
from GPy.core.model import Model
class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
from GPy.kern import RBF
Model.__init__(self, 'kernel_test_model')
num_samples = 20
num_samples2 = 10
if kernel==None:
kernel = RBF(1)
if X==None:
X = np.random.randn(num_samples, kernel.input_dim)
if dL_dK==None:
if X2==None:
dL_dK = np.ones((X.shape[0], X.shape[0]))
else:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel
self.add_parameter(kernel)
self.X = X
self.X2 = X2
self.dL_dK = dL_dK
def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<-10*sys.float_info.epsilon):
return False
else:
return True
def log_likelihood(self):
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
class Kern_check_dK_dtheta(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model):
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def parameters_changed(self):
self.kernel.update_gradients_full(self.dL_dK, self.X)
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.remove_parameter(kernel)
self.X = Param('X', self.X)
self.add_parameter(self.X)
def _log_likelihood_gradients(self):
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
"""
This function runs on kernels to check the correctness of their
implementation. It checks that the covariance function is positive definite
for a randomly generated data set.
:param kern: the kernel to be tested.
:type kern: GPy.kern.Kernpart
:param X: X input values to test the covariance function.
:type X: ndarray
:param X2: X2 input values to test the covariance function.
:type X2: ndarray
"""
pass_checks = True
if X==None:
X = np.random.randn(10, kern.input_dim)
if output_ind is not None:
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
if X2==None:
X2 = np.random.randn(20, kern.input_dim)
if output_ind is not None:
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
if verbose:
print("Checking covariance function is positive definite.")
result = Kern_check_model(kern, X=X).is_positive_definite()
if result and verbose:
print("Check passed.")
if not result:
print("Positive definite check failed for " + kern.name + " covariance function.")
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt theta.")
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt X.")
try:
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
return pass_checks

View file

@ -106,52 +106,52 @@ class Linear(Kern):
# variational # # variational #
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, posterior_variational): def psi0(self, Z, variational_posterior):
return np.sum(self.variances * self._mu2S(posterior_variational), 1) return np.sum(self.variances * self._mu2S(variational_posterior), 1)
def psi1(self, Z, posterior_variational): def psi1(self, Z, variational_posterior):
return self.K(posterior_variational.mean, Z) #the variance, it does nothing return self.K(variational_posterior.mean, Z) #the variance, it does nothing
def psi2(self, Z, posterior_variational): def psi2(self, Z, variational_posterior):
ZA = Z * self.variances ZA = Z * self.variances
ZAinner = self._ZAinner(posterior_variational, Z) ZAinner = self._ZAinner(variational_posterior, Z)
return np.dot(ZAinner, ZA.T) return np.dot(ZAinner, ZA.T)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, posterior_variational, Z): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z):
mu, S = posterior_variational.mean, posterior_variational.variance mu, S = variational_posterior.mean, variational_posterior.variance
# psi0: # psi0:
tmp = dL_dpsi0[:, None] * self._mu2S(posterior_variational) tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior)
if self.ARD: grad = tmp.sum(0) if self.ARD: grad = tmp.sum(0)
else: grad = np.atleast_1d(tmp.sum()) else: grad = np.atleast_1d(tmp.sum())
#psi1 #psi1
self.update_gradients_full(dL_dpsi1, mu, Z) self.update_gradients_full(dL_dpsi1, mu, Z)
grad += self.variances.gradient grad += self.variances.gradient
#psi2 #psi2
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(posterior_variational, Z)[:, :, None, :] * (2. * Z)[None, None, :, :]) tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * (2. * Z)[None, None, :, :])
if self.ARD: grad += tmp.sum(0).sum(0).sum(0) if self.ARD: grad += tmp.sum(0).sum(0).sum(0)
else: grad += tmp.sum() else: grad += tmp.sum()
#from Kmm #from Kmm
self.update_gradients_full(dL_dKmm, Z, None) self.update_gradients_full(dL_dKmm, Z, None)
self.variances.gradient += grad self.variances.gradient += grad
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, posterior_variational, Z): def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z):
# Kmm # Kmm
grad = self.gradients_X(dL_dKmm, Z, None) grad = self.gradients_X(dL_dKmm, Z, None)
#psi1 #psi1
grad += self.gradients_X(dL_dpsi1.T, Z, posterior_variational.mean) grad += self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean)
#psi2 #psi2
self._weave_dpsi2_dZ(dL_dpsi2, Z, posterior_variational, grad) self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad)
return grad return grad
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, posterior_variational, Z): def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z):
grad_mu, grad_S = np.zeros(posterior_variational.mean.shape), np.zeros(posterior_variational.mean.shape) grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape)
# psi0 # psi0
grad_mu += dL_dpsi0[:, None] * (2.0 * posterior_variational.mean * self.variances) grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances)
grad_S += dL_dpsi0[:, None] * self.variances grad_S += dL_dpsi0[:, None] * self.variances
# psi1 # psi1
grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
# psi2 # psi2
self._weave_dpsi2_dmuS(dL_dpsi2, Z, posterior_variational, grad_mu, grad_S) self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S)
return grad_mu, grad_S return grad_mu, grad_S

View file

@ -42,10 +42,6 @@ class Prod(Kern):
self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1]) self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1])
self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2]) self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2])
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] )
self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] )
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)
if X2 is None: if X2 is None:

View file

@ -40,27 +40,27 @@ class RBF(Stationary):
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def psi0(self, Z, posterior_variational): def psi0(self, Z, variational_posterior):
return self.Kdiag(posterior_variational.mean) return self.Kdiag(variational_posterior.mean)
def psi1(self, Z, posterior_variational): def psi1(self, Z, variational_posterior):
mu = posterior_variational.mean mu = variational_posterior.mean
S = posterior_variational.variance S = variational_posterior.variance
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
return self._psi1 return self._psi1
def psi2(self, Z, posterior_variational): def psi2(self, Z, variational_posterior):
mu = posterior_variational.mean mu = variational_posterior.mean
S = posterior_variational.variance S = variational_posterior.variance
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
return self._psi2 return self._psi2
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
#contributions from Kmm #contributions from Kmm
sself.update_gradients_full(dL_dKmm, Z) sself.update_gradients_full(dL_dKmm, Z)
mu = posterior_variational.mean mu = variational_posterior.mean
S = posterior_variational.variance S = variational_posterior.variance
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
l2 = self.lengthscale **2 l2 = self.lengthscale **2
@ -87,9 +87,9 @@ class RBF(Stationary):
else: else:
self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0) self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0)
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
mu = posterior_variational.mean mu = variational_posterior.mean
S = posterior_variational.variance S = variational_posterior.variance
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
l2 = self.lengthscale **2 l2 = self.lengthscale **2
@ -108,9 +108,9 @@ class RBF(Stationary):
return grad return grad
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
mu = posterior_variational.mean mu = variational_posterior.mean
S = posterior_variational.variance S = variational_posterior.variance
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
l2 = self.lengthscale **2 l2 = self.lengthscale **2
#psi1 #psi1

View file

@ -1,115 +0,0 @@
# Copyright (c) 2012, James Hensman and Andrew Gordon Wilson
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from ...core.parameterization import Param
class RBFCos(Kernpart):
def __init__(self,input_dim,variance=1.,frequencies=None,bandwidths=None,ARD=False):
self.input_dim = input_dim
self.name = 'rbfcos'
if self.input_dim>10:
print "Warning: the rbfcos kernel requires a lot of memory for high dimensional inputs"
self.ARD = ARD
#set the default frequencies and bandwidths, appropriate num_params
if ARD:
self.num_params = 2*self.input_dim + 1
if frequencies is not None:
frequencies = np.asarray(frequencies)
assert frequencies.size == self.input_dim, "bad number of frequencies"
else:
frequencies = np.ones(self.input_dim)
if bandwidths is not None:
bandwidths = np.asarray(bandwidths)
assert bandwidths.size == self.input_dim, "bad number of bandwidths"
else:
bandwidths = np.ones(self.input_dim)
else:
self.num_params = 3
if frequencies is not None:
frequencies = np.asarray(frequencies)
assert frequencies.size == 1, "Exactly one frequency needed for non-ARD kernel"
else:
frequencies = np.ones(1)
if bandwidths is not None:
bandwidths = np.asarray(bandwidths)
assert bandwidths.size == 1, "Exactly one bandwidth needed for non-ARD kernel"
else:
bandwidths = np.ones(1)
self.variance = Param('variance', variance)
self.frequencies = Param('frequencies', frequencies)
self.bandwidths = Param('bandwidths', bandwidths)
#initialise cache
self._X, self._X2 = np.empty(shape=(3,1))
# def _get_params(self):
# return np.hstack((self.variance,self.frequencies, self.bandwidths))
# def _set_params(self,x):
# assert x.size==(self.num_params)
# if self.ARD:
# self.variance = x[0]
# self.frequencies = x[1:1+self.input_dim]
# self.bandwidths = x[1+self.input_dim:]
# else:
# self.variance, self.frequencies, self.bandwidths = x
# def _get_param_names(self):
# if self.num_params == 3:
# return ['variance','frequency','bandwidth']
# else:
# return ['variance']+['frequency_%i'%i for i in range(self.input_dim)]+['bandwidth_%i'%i for i in range(self.input_dim)]
def K(self,X,X2,target):
self._K_computations(X,X2)
target += self.variance*self._dvar
def Kdiag(self,X,target):
np.add(target,self.variance,target)
def _param_grad_helper(self,dL_dK,X,X2,target):
self._K_computations(X,X2)
target[0] += np.sum(dL_dK*self._dvar)
if self.ARD:
for q in xrange(self.input_dim):
target[q+1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.tan(2.*np.pi*self._dist[:,:,q]*self.frequencies[q])*self._dist[:,:,q])
target[q+1+self.input_dim] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self._dist2[:,:,q])
else:
target[1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.sum(np.tan(2.*np.pi*self._dist*self.frequencies)*self._dist,-1))
target[2] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self._dist2.sum(-1))
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target[0] += np.sum(dL_dKdiag)
def gradients_X(self,dL_dK,X,X2,target):
#TODO!!!
raise NotImplementedError
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
def parameters_changed(self):
self._rbf_part = np.exp(-2.*np.pi**2*np.sum(self._dist2*self.bandwidths,-1))
self._cos_part = np.prod(np.cos(2.*np.pi*self._dist*self.frequencies),-1)
self._dvar = self._rbf_part*self._cos_part
def _K_computations(self,X,X2):
if not (np.all(X==self._X) and np.all(X2==self._X2)):
if X2 is None: X2 = X
self._X = X.copy()
self._X2 = X2.copy()
#do the distances: this will be high memory for large input_dim
#NB: we don't take the abs of the dist because cos is symmetric
self._dist = X[:,None,:] - X2[None,:,:]
self._dist2 = np.square(self._dist)
#ensure the next section is computed:
self._params = np.empty(self.num_params)

View file

@ -3,6 +3,7 @@
from kern import Kern from kern import Kern
import numpy as np
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
import numpy as np import numpy as np
@ -18,32 +19,32 @@ class Static(Kern):
ret[:] = self.variance ret[:] = self.variance
return ret return ret
def gradients_X(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2=None):
return np.zeros(X.shape) return np.zeros(X.shape)
def gradients_X_diag(self, dL_dKdiag, X, target): def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape) return np.zeros(X.shape)
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(Z.shape) return np.zeros(Z.shape)
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(mu.shape), np.zeros(S.shape) return np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
def psi0(self, Z, mu, S): def psi0(self, Z, variational_posterior):
return self.Kdiag(mu) return self.Kdiag(variational_posterior.mean)
def psi1(self, Z, mu, S, target): def psi1(self, Z, variational_posterior):
return self.K(mu, Z) return self.K(variational_posterior.mean, Z)
def psi2(Z, mu, S): def psi2(self, Z, variational_posterior):
K = self.K(mu, Z) K = self.K(variational_posterior.mean, Z)
return K[:,:,None]*K[:,None,:] # NB. more efficient implementations on inherriting classes return K[:,:,None]*K[:,None,:] # NB. more efficient implementations on inherriting classes
class White(Static): class White(Static):
def __init__(self, input_dim, variance=1., name='white'): def __init__(self, input_dim, variance=1., name='white'):
super(White, self).__init__(input_dim, name) super(White, self).__init__(input_dim, variance, name)
def K(self, X, X2=None): def K(self, X, X2=None):
if X2 is None: if X2 is None:
@ -51,8 +52,8 @@ class White(Static):
else: else:
return np.zeros((X.shape[0], X2.shape[0])) return np.zeros((X.shape[0], X2.shape[0]))
def psi2(self, Z, mu, S, target): def psi2(self, Z, variational_posterior):
return np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) return np.zeros((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_full(self, dL_dK, X): def update_gradients_full(self, dL_dK, X):
self.variance.gradient = np.trace(dL_dK) self.variance.gradient = np.trace(dL_dK)
@ -60,13 +61,13 @@ class White(Static):
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dKdiag.sum() self.variance.gradient = dL_dKdiag.sum()
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = np.trace(dL_dKmm) + dL_dpsi0.sum() self.variance.gradient = np.trace(dL_dKmm) + dL_dpsi0.sum()
class Bias(Static): class Bias(Static):
def __init__(self, input_dim, variance=1., name='bias'): def __init__(self, input_dim, variance=1., name='bias'):
super(Bias, self).__init__(input_dim, name) super(Bias, self).__init__(input_dim, variance, name)
def K(self, X, X2=None): def K(self, X, X2=None):
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0]) shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
@ -80,11 +81,11 @@ class Bias(Static):
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dK.sum() self.variance.gradient = dL_dK.sum()
def psi2(self, Z, mu, S, target): def psi2(self, Z, variational_posterior):
ret = np.empty((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) ret = np.empty((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
ret[:] = self.variance**2 ret[:] = self.variance**2
return ret return ret
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum() self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()

View file

@ -5,6 +5,7 @@
from kern import Kern from kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
from ...util.linalg import tdot
from ... import util from ... import util
import numpy as np import numpy as np
from scipy import integrate from scipy import integrate
@ -33,10 +34,10 @@ class Stationary(Kern):
self.add_parameters(self.variance, self.lengthscale) self.add_parameters(self.variance, self.lengthscale)
def K_of_r(self, r): def K_of_r(self, r):
raise NotImplementedError, "implement the covaraiance functino and a fn of r to use this class" raise NotImplementedError, "implement the covaraiance function as a fn of r to use this class"
def dK_dr(self, r): def dK_dr(self, r):
raise NotImplementedError, "implement the covaraiance functino and a fn of r to use this class" raise NotImplementedError, "implement the covaraiance function as a fn of r to use this class"
def K(self, X, X2=None): def K(self, X, X2=None):
r = self._scaled_dist(X, X2) r = self._scaled_dist(X, X2)
@ -47,8 +48,44 @@ class Stationary(Kern):
X2 = X X2 = X
return X[:, None, :] - X2[None, :, :] return X[:, None, :] - X2[None, :, :]
def _unscaled_dist(self, X, X2=None):
"""
Compute the square distance between each row of X and X2, or between
each pair of rows of X if X2 is None.
"""
if X2 is None:
Xsq = np.sum(np.square(X),1)
return np.sqrt(-2.*tdot(X) + (Xsq[:,None] + Xsq[None,:]))
else:
X1sq = np.sum(np.square(X),1)
X2sq = np.sum(np.square(X2),1)
return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:]))
def _scaled_dist(self, X, X2=None): def _scaled_dist(self, X, X2=None):
return np.sqrt(np.sum(np.square(self._dist(X, X2) / self.lengthscale), -1)) """
Efficiently compute the scaled distance, r.
r = \sum_{q=1}^Q (x_q - x'q)^2/l_q^2
Note that if thre is only one lengthscale, l comes outside the sum. In
this case we compute the unscaled distance first (in a separate
function for caching) and divide by lengthscale afterwards
"""
if self.ARD:
if X2 is None:
Xl = X/self.lengthscale
Xsq = np.sum(np.square(Xl),1)
return np.sqrt(np.sqrt(-2.*tdot(Xl) +(Xsq[:,None] + Xsq[None,:])))
else:
X1l = X/self.lengthscale
X2l = X2/self.lengthscale
X1sq = np.sum(np.square(X1l),1)
X2sq = np.sum(np.square(X2l),1)
return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:]))
else:
return self._unscaled_dist(X, X2)/self.lengthscale
def Kdiag(self, X): def Kdiag(self, X):
ret = np.empty(X.shape[0]) ret = np.empty(X.shape[0])
@ -65,16 +102,22 @@ class Stationary(Kern):
rinv = self._inv_dist(X, X2) rinv = self._inv_dist(X, X2)
dL_dr = self.dK_dr(r) * dL_dK dL_dr = self.dK_dr(r) * dL_dK
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3
if self.ARD: if self.ARD:
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0) self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)
else: else:
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum() self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum()
self.variance.gradient = np.sum(K * dL_dK)/self.variance self.variance.gradient = np.sum(K * dL_dK)/self.variance
def _inv_dist(self, X, X2=None): def _inv_dist(self, X, X2=None):
"""
Compute the elementwise inverse of the distance matrix, expecpt on the
diagonal, where we return zero (the distance on the diagonal is zero).
This term appears in derviatives.
"""
dist = self._scaled_dist(X, X2) dist = self._scaled_dist(X, X2)
if X2 is None: if X2 is None:
nondiag = util.diag.offdiag_view(dist) nondiag = util.diag.offdiag_view(dist)
@ -84,12 +127,25 @@ class Stationary(Kern):
return 1./np.where(dist != 0., dist, np.inf) return 1./np.where(dist != 0., dist, np.inf)
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
"""
r = self._scaled_dist(X, X2) r = self._scaled_dist(X, X2)
dL_dr = self.dK_dr(r) * dL_dK
invdist = self._inv_dist(X, X2) invdist = self._inv_dist(X, X2)
ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2 dL_dr = self.dK_dr(r) * dL_dK
#The high-memory numpy way: ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2
#if X2 is None:
#ret *= 2.
#the lower memory way with a loop
tmp = invdist*dL_dr
if X2 is None: if X2 is None:
ret *= 2. tmp *= 2.
X2 = X
ret = np.empty(X.shape, dtype=np.float64)
[np.copyto(ret[:,q], np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), 1)) for q in xrange(self.input_dim)]
ret /= self.lengthscale**2
return ret return ret
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
@ -162,6 +218,8 @@ class Matern52(Stationary):
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat52'):
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, name)
def K_of_r(self, r): def K_of_r(self, r):
return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r) return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
@ -239,17 +297,19 @@ class RatQuad(Stationary):
self.add_parameters(self.power) self.add_parameters(self.power)
def K_of_r(self, r): def K_of_r(self, r):
return self.variance*(1. + r**2/2.)**(-self.power) r2 = np.power(r, 2.)
return self.variance*np.power(1. + r2/2., -self.power)
def dK_dr(self, r): def dK_dr(self, r):
return -self.variance*self.power*r*(1. + r**2/2)**(-self.power - 1.) r2 = np.power(r, 2.)
return -self.variance*self.power*r*np.power(1. + r2/2., - self.power - 1.)
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
super(RatQuad, self).update_gradients_full(dL_dK, X, X2) super(RatQuad, self).update_gradients_full(dL_dK, X, X2)
r = self._scaled_dist(X, X2) r = self._scaled_dist(X, X2)
r2 = r**2 r2 = np.power(r, 2.)
dpow = -2.**self.power*(r2 + 2.)**(-self.power)*np.log(0.5*(r2+2.)) dK_dpow = -self.variance * np.power(2., self.power) * np.power(r2 + 2., -self.power) * np.log(0.5*(r2+2.))
self.power.gradient = np.sum(dL_dK*dpow) grad = np.sum(dL_dK*dK_dpow)
self.power.gradient = grad

View file

@ -2,35 +2,17 @@
try: try:
import sympy as sp import sympy as sp
sympy_available=True sympy_available=True
from sympy.utilities.lambdify import lambdify
except ImportError: except ImportError:
sympy_available=False sympy_available=False
exit() exit()
from sympy.core.cache import clear_cache
from sympy.utilities.codegen import codegen
try:
from scipy import weave
weave_available = True
except ImportError:
weave_available = False
import os
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
import sys
import numpy as np import numpy as np
import re from kern import Kern
import tempfile
import pdb
import ast
from kernpart import Kernpart
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
# TODO have this set up in a set up file!
user_code_storage = tempfile.gettempdir()
class spkern(Kernpart): class Sympykern(Kern):
""" """
A kernel object, where all the hard work in done by sympy. A kernel object, where all the hard work in done by sympy.
@ -51,7 +33,7 @@ class spkern(Kernpart):
name='sympykern' name='sympykern'
if k is None: if k is None:
raise ValueError, "You must provide an argument for the covariance function." raise ValueError, "You must provide an argument for the covariance function."
super(spkern, self).__init__(input_dim, name) super(Sympykern, self).__init__(input_dim, name)
self._sp_k = k self._sp_k = k
@ -66,6 +48,10 @@ class spkern(Kernpart):
assert len(self._sp_x)==len(self._sp_z) assert len(self._sp_x)==len(self._sp_z)
x_dim=len(self._sp_x) x_dim=len(self._sp_x)
self._sp_kdiag = k
for x, z in zip(self._sp_x, self._sp_z):
self._sp_kdiag = self._sp_kdiag.subs(z, x)
# If it is a multi-output covariance, add an input for indexing the outputs. # If it is a multi-output covariance, add an input for indexing the outputs.
self._real_input_dim = x_dim self._real_input_dim = x_dim
# Check input dim is number of xs + 1 if output_dim is >1 # Check input dim is number of xs + 1 if output_dim is >1
@ -97,6 +83,8 @@ class spkern(Kernpart):
#setattr(self, theta, np.ones(self.output_dim)) #setattr(self, theta, np.ones(self.output_dim))
self.num_shared_params = len(self._sp_theta) self.num_shared_params = len(self._sp_theta)
for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j):
self._sp_kdiag = self._sp_kdiag.subs(theta_j, theta_i)
#self.num_params = self.num_shared_params+self.num_split_params*self.output_dim #self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
else: else:
@ -112,43 +100,33 @@ class spkern(Kernpart):
if param is not None: if param is not None:
if param.has_key(theta): if param.has_key(theta):
val = param[theta] val = param[theta]
#setattr(self, theta.name, val)
setattr(self, theta.name, Param(theta.name, val, None)) setattr(self, theta.name, Param(theta.name, val, None))
self.add_parameters(getattr(self, theta.name)) self.add_parameters(getattr(self, theta.name))
#deal with param #deal with param
#self._set_params(self._get_params()) #self._set_params(self._get_params())
# Differentiate with respect to parameters. # Differentiate with respect to parameters.
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta] derivative_arguments = self._sp_x + self._sp_theta
if self.output_dim > 1: if self.output_dim > 1:
self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i] derivative_arguments += self._sp_theta_i
# differentiate with respect to input variables. self.derivatives = {theta.name : sp.diff(self._sp_k,theta).simplify() for theta in derivative_arguments}
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x] self.diag_derivatives = {theta.name : sp.diff(self._sp_kdiag,theta).simplify() for theta in derivative_arguments}
# This gives the parameters for the arg list.
self.arg_list = self._sp_x + self._sp_z + self._sp_theta
self.diag_arg_list = self._sp_x + self._sp_theta
if self.output_dim > 1:
self.arg_list += self._sp_theta_i + self._sp_theta_j
self.diag_arg_list += self._sp_theta_i
# psi_stats aren't yet implemented. # psi_stats aren't yet implemented.
if False: if False:
self.compute_psi_stats() self.compute_psi_stats()
self._code = {}
# generate the code for the covariance functions # generate the code for the covariance functions
self._gen_code() self._gen_code()
if weave_available:
if False:
extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
else:
extra_compile_args = []
self.weave_kwargs = {
'support_code': None, #self._function_code,
'include_dirs':[user_code_storage, os.path.join(current_dir,'parts/')],
'headers':['"sympy_helpers.h"', '"'+self.name+'.h"'],
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp"), os.path.join(user_code_storage, self.name+'.cpp')],
'extra_compile_args':extra_compile_args,
'extra_link_args':['-lgomp'],
'verbose':True}
self.parameters_changed() # initializes caches self.parameters_changed() # initializes caches
@ -157,407 +135,128 @@ class spkern(Kernpart):
def _gen_code(self): def _gen_code(self):
argument_sequence = self._sp_x+self._sp_z+self._sp_theta self._K_function = lambdify(self.arg_list, self._sp_k, 'numpy')
code_list = [('k',self._sp_k)] for key in self.derivatives.keys():
# gradients with respect to covariance input setattr(self, '_K_diff_' + key, lambdify(self.arg_list, self.derivatives[key], 'numpy'))
code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
# gradient with respect to parameters self._Kdiag_function = lambdify(self.diag_arg_list, self._sp_kdiag, 'numpy')
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)] for key in self.derivatives.keys():
# gradient with respect to multiple output parameters setattr(self, '_Kdiag_diff_' + key, lambdify(self.diag_arg_list, self.diag_derivatives[key], 'numpy'))
if self.output_dim > 1:
argument_sequence += self._sp_theta_i + self._sp_theta_j def K(self,X,X2=None):
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)] self._K_computations(X, X2)
# generate c functions from sympy objects return self._K_function(**self._arguments)
if weave_available:
code_type = "C"
else:
code_type = "PYTHON"
# Need to add the sympy_helpers header in here.
(foo_c,self._function_code), (foo_h,self._function_header) = \
codegen(code_list,
code_type,
self.name,
argument_sequence=argument_sequence)
# Use weave to compute the underlying functions. def Kdiag(self,X):
if weave_available: self._K_computations(X)
# put the header file where we can find it return self._Kdiag_function(**self._diag_arguments)
f = file(os.path.join(user_code_storage, self.name + '.h'),'w')
f.write(self._function_header)
f.close()
if weave_available:
# Substitute any known derivatives which sympy doesn't compute
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
# put the cpp file in user code storage (defaults to temp file location)
f = file(os.path.join(user_code_storage, self.name + '.cpp'),'w')
else:
# put the python file in user code storage
f = file(os.path.join(user_code_storage, self.name + '.py'),'w')
f.write(self._function_code)
f.close()
if weave_available:
# arg_list will store the arguments required for the C code.
input_arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
# for multiple outputs reverse argument list is also required
if self.output_dim>1:
reverse_input_arg_list = list(input_arg_list)
reverse_input_arg_list.reverse()
# This gives the parameters for the arg list.
param_arg_list = [shared_params.name for shared_params in self._sp_theta]
arg_list = input_arg_list + param_arg_list
precompute_list=[]
if self.output_dim > 1:
reverse_arg_list= reverse_input_arg_list + list(param_arg_list)
# For multiple outputs, also need the split parameters.
split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
arg_list += split_param_arg_list
reverse_arg_list += split_param_reverse_arg_list
# Extract the right output indices from the inputs.
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
precompute_list += c_define_output_indices
reverse_arg_string = ", ".join(reverse_arg_list)
arg_string = ", ".join(arg_list)
precompute_string = "\n".join(precompute_list)
# Now we use the arguments in code that computes the separate parts.
# Any precomputations will be done here eventually.
self._precompute = \
"""
// Precompute code would go here. It will be called when parameters are updated.
"""
# Here's the code to do the looping for K
self._code['K'] =\
"""
// _K_code
// Code for computing the covariance function.
int i;
int j;
int n = target_array->dimensions[0];
int num_inducing = target_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n;i++){
for (j=0;j<num_inducing;j++){
%s
//target[i*num_inducing+j] =
TARGET2(i, j) += k(%s);
}
}
%s
"""%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/")
# adding a string representation of the function in the
# comment forces recompile when needed
self._code['K_X'] = self._code['K'].replace('Z2(', 'X2(')
# Code to compute diagonal of covariance.
diag_arg_string = re.sub('Z','X',arg_string)
diag_arg_string = re.sub('int jj','//int jj',diag_arg_string)
diag_arg_string = re.sub('j','i',diag_arg_string)
diag_precompute_string = re.sub('int jj','//int jj',precompute_string)
diag_precompute_string = re.sub('Z','X',diag_precompute_string)
diag_precompute_string = re.sub('j','i',diag_precompute_string)
# Code to do the looping for Kdiag
self._code['Kdiag'] =\
"""
// _code['Kdiag']
// Code for computing diagonal of covariance function.
int i;
int n = target_array->dimensions[0];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for
for (i=0;i<n;i++){
%s
//target[i] =
TARGET1(i)=k(%s);
}
%s
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code to compute gradients
if self.output_dim>1:
for i, theta in enumerate(self._sp_theta_i):
grad_func_list = [' '*26 + 'TARGET1(ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, arg_string)]
grad_func_list += [' '*26 + 'TARGET1(jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, reverse_arg_string)]
grad_func_list = c_define_output_indices+grad_func_list
grad_func_string = '\n'.join(grad_func_list)
self._code['dK_d' + theta.name] =\
"""
int i;
int j;
int n = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n;i++){
for (j=0;j<num_inducing;j++){
%s
}
}
%s
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
self._code['dK_d' +theta.name + '_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
# Code to compute gradients for Kdiag TODO: needs clean up
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
self._code['dKdiag_d' + theta.name] =\
"""
// _dKdiag_dtheta_code
// Code for computing gradient of diagonal with respect to parameters.
int i;
int n = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (i=0;i<n;i++){
%s
}
%s
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
for i, theta in enumerate(self._sp_theta):
grad_func_list = [' '*26 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string)]
grad_func_string = '\n'.join(grad_func_list)
self._code['dK_d' + theta.name] =\
"""
// _dK_dtheta_code
// Code for computing gradient of covariance with respect to parameters.
int i;
int j;
int n = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n;i++){
for (j=0;j<num_inducing;j++){
%s
}
}
%s
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
self._code['dK_d' + theta.name +'_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
# Code to compute gradients for Kdiag TODO: needs clean up
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
self._code['dKdiag_d' + theta.name] =\
"""
// _dKdiag_dtheta_code
// Code for computing gradient of diagonal with respect to parameters.
int i;
int n = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (i=0;i<n;i++){
%s
}
%s
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
gradX_func_list = []
if self.output_dim>1:
gradX_func_list += c_define_output_indices
gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
gradX_func_string = "\n".join(gradX_func_list)
self._code['dK_dX'] = \
"""
// _dK_dX_code
// Code for computing gradient of covariance with respect to inputs.
int i;
int j;
int n = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n; i++){
for (j=0; j<num_inducing; j++){
%s
}
}
%s
"""%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
self._code['dK_dX_X'] = self._code['dK_dX'].replace('Z2(', 'X2(')
diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
diag_gradX_func_string = re.sub('PARTIAL2\(i\, i\)','2*PARTIAL(i)',diag_gradX_func_string)
# Code for gradients of Kdiag wrt X
self._code['dKdiag_dX'] = \
"""
// _dKdiag_dX_code
// Code for computing gradient of diagonal with respect to inputs.
int n = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (int i=0;i<n; i++){
%s
}
%s
"""%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
# string representation forces recompile when needed Get rid
# of Zs in argument for diagonal. TODO: Why wasn't
# diag_func_string called here? Need to check that.
#TODO: insert multiple functions here via string manipulation
#TODO: similar functions for psi_stats
#TODO: similar functions when cython available.
#TODO: similar functions when only python available.
def _get_arg_names(self, target=None, Z=None, partial=None):
arg_names = ['X']
if target is not None:
arg_names += ['target']
for shared_params in self._sp_theta:
arg_names += [shared_params.name]
if Z is not None:
arg_names += ['Z']
if partial is not None:
arg_names += ['partial']
if self.output_dim>1:
arg_names += self._split_theta_names
arg_names += ['output_dim']
return arg_names
def _generate_inline(self, code, X, target=None, Z=None, partial=None):
output_dim = self.output_dim
# Need to extract parameters to local variables first
for shared_params in self._sp_theta:
locals()[shared_params.name] = getattr(self, shared_params.name)
for split_params in self._split_theta_names:
locals()[split_params] = np.asarray(getattr(self, split_params))
arg_names = self._get_arg_names(target, Z, partial)
if weave_available:
return weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
else:
raise RuntimeError('Weave not available and other variants of sympy covariance not yet implemented')
def K(self,X,Z,target):
if Z is None:
self._generate_inline(self._code['K_X'], X, target)
else:
self._generate_inline(self._code['K'], X, target, Z)
def Kdiag(self,X,target):
self._generate_inline(self._code['Kdiag'], X, target)
def _param_grad_helper(self,partial,X,Z,target): def _param_grad_helper(self,partial,X,Z,target):
if Z is None: pass
self._generate_inline(self._code['dK_dtheta_X'], X, target, Z, partial)
else:
self._generate_inline(self._code['dK_dtheta'], X, target, Z, partial)
def dKdiag_dtheta(self,partial,X,target):
self._generate_inline(self._code['dKdiag_dtheta'], X, target, Z=None, partial=partial).namelocals()[shared_params.name] = getattr(self, shared_params.name)
def gradients_X(self,partial,X,Z,target): def gradients_X(self, dL_dK, X, X2=None):
if Z is None: #if self._X is None or X.base is not self._X.base or X2 is not None:
self._generate_inline(self._code['dK_dX_X'], X, target, Z, partial) self._K_computations(X, X2)
else: gradients_X = np.zeros((X.shape[0], X.shape[1]))
self._generate_inline(self._code['dK_dX'], X, target, Z, partial) for i, x in enumerate(self._sp_x):
gf = getattr(self, '_K_diff_' + x.name)
gradients_X[:, i] = (gf(**self._arguments)*dL_dK).sum(1)
if X2 is None:
gradients_X *= 2
return gradients_X
def dKdiag_dX(self,partial,X,target): def gradients_X_diag(self, dL_dK, X):
self._generate_inline(self._code['dKdiag_dX'], X, target, Z, partial) self._K_computations(X)
dX = np.zeros_like(X)
for i, x in enumerate(self._sp_x):
gf = getattr(self, '_Kdiag_diff_' + x.name)
dX[:, i] = gf(**self._diag_arguments)*dL_dK
return dX
def compute_psi_stats(self): def update_gradients_full(self, dL_dK, X, X2=None):
#define some normal distributions
mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
#do some integration!
#self._sp_psi0 = ??
self._sp_psi1 = self._sp_k
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi1 *= normals[i]
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi1 = self._sp_psi1.simplify()
#and here's psi2 (eek!)
zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi2 *= normals[i]
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi2 = self._sp_psi2.simplify()
def parameters_changed(self):
# Reset the caches
self._cache, self._cache2 = np.empty(shape=(2, 1))
self._cache3, self._cache4, self._cache5 = np.empty(shape=(3, 1))
def update_gradients_full(self, dL_dK, X):
# Need to extract parameters to local variables first # Need to extract parameters to local variables first
self._K_computations(X, None) self._K_computations(X, X2)
for shared_params in self._sp_theta: for theta in self._sp_theta:
parameter = getattr(self, shared_params.name) parameter = getattr(self, theta.name)
code = self._code['dK_d' + shared_params.name] gf = getattr(self, '_K_diff_' + theta.name)
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK)) gradient = (gf(**self._arguments)*dL_dK).sum()
if X2 is not None:
for split_params in self._split_theta_names: gradient += (gf(**self._reverse_arguments)*dL_dK).sum()
parameter = getattr(self, split_params.name) setattr(parameter, 'gradient', gradient)
code = self._code['dK_d' + split_params.name] if self.output_dim>1:
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK)) for theta in self._sp_theta_i:
parameter = getattr(self, theta.name[:-2])
gf = getattr(self, '_K_diff_' + theta.name)
# def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): A = gf(**self._arguments)*dL_dK
# #contributions from Kdiag gradient = np.asarray([A[np.where(self._output_ind==i)].T.sum()
# self.variance.gradient = np.sum(dL_dKdiag) for i in np.arange(self.output_dim)])
if X2 is None:
# #from Knm gradient *= 2
# self._K_computations(X, Z)
# self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
# if self.ARD:
# self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
# else:
# self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
# #from Kmm
# self._K_computations(Z, None)
# self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
# if self.ARD:
# self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
# else:
# self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, Z):
if Z is None:
self._generate_inline(self._precompute, X)
else: else:
self._generate_inline(self._precompute, X, Z=Z) A = gf(**self._reverse_arguments)*dL_dK.T
gradient += np.asarray([A[np.where(self._output_ind2==i)].T.sum()
for i in np.arange(self.output_dim)])
setattr(parameter, 'gradient', gradient)
def update_gradients_diag(self, dL_dKdiag, X):
self._K_computations(X)
for theta in self._sp_theta:
parameter = getattr(self, theta.name)
gf = getattr(self, '_Kdiag_diff_' + theta.name)
setattr(parameter, 'gradient', (gf(**self._diag_arguments)*dL_dKdiag).sum())
if self.output_dim>1:
for theta in self._sp_theta_i:
parameter = getattr(self, theta.name[:-2])
gf = getattr(self, '_Kdiag_diff_' + theta.name)
a = gf(**self._diag_arguments)*dL_dKdiag
setattr(parameter, 'gradient',
np.asarray([a[np.where(self._output_ind==i)].sum()
for i in np.arange(self.output_dim)]))
def _K_computations(self, X, X2=None):
"""Set up argument lists for the derivatives."""
# Could check if this needs doing or not, there could
# definitely be some computational savings by checking for
# parameter updates here.
self._arguments = {}
self._diag_arguments = {}
for i, x in enumerate(self._sp_x):
self._arguments[x.name] = X[:, i][:, None]
self._diag_arguments[x.name] = X[:, i][:, None]
if self.output_dim > 1:
self._output_ind = np.asarray(X[:, -1], dtype='int')
for i, theta in enumerate(self._sp_theta_i):
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind])[:, None]
self._diag_arguments[theta.name] = self._arguments[theta.name]
for theta in self._sp_theta:
self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
self._diag_arguments[theta.name] = self._arguments[theta.name]
if X2 is not None:
for i, z in enumerate(self._sp_z):
self._arguments[z.name] = X2[:, i][None, :]
if self.output_dim > 1:
self._output_ind2 = np.asarray(X2[:, -1], dtype='int')
for i, theta in enumerate(self._sp_theta_j):
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind2])[None, :]
else:
for z in self._sp_z:
self._arguments[z.name] = self._arguments['x_'+z.name[2:]].T
if self.output_dim > 1:
self._output_ind2 = self._output_ind
for theta in self._sp_theta_j:
self._arguments[theta.name] = self._arguments[theta.name[:-2] + '_i'].T
if X2 is not None:
# These arguments are needed in gradients when X2 is not equal to X.
self._reverse_arguments = self._arguments
for x, z in zip(self._sp_x, self._sp_z):
self._reverse_arguments[x.name] = self._arguments[z.name].T
self._reverse_arguments[z.name] = self._arguments[x.name].T
if self.output_dim > 1:
for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j):
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T

View file

@ -7,6 +7,7 @@ from ..core import SparseGP
from .. import likelihoods from .. import likelihoods
from .. import kern from .. import kern
from ..inference.latent_function_inference import VarDTC from ..inference.latent_function_inference import VarDTC
from ..util.misc import param_to_array
class SparseGPRegression(SparseGP): class SparseGPRegression(SparseGP):
""" """
@ -33,18 +34,18 @@ class SparseGPRegression(SparseGP):
# kern defaults to rbf (plus white for stability) # kern defaults to rbf (plus white for stability)
if kernel is None: if kernel is None:
kernel = kern.rbf(input_dim)# + kern.white(input_dim, variance=1e-3) kernel = kern.RBF(input_dim)# + kern.white(input_dim, variance=1e-3)
# Z defaults to a subset of the data # Z defaults to a subset of the data
if Z is None: if Z is None:
i = np.random.permutation(num_data)[:min(num_inducing, num_data)] i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
Z = X[i].copy() Z = param_to_array(X)[i].copy()
else: else:
assert Z.shape[1] == input_dim assert Z.shape[1] == input_dim
likelihood = likelihoods.Gaussian() likelihood = likelihoods.Gaussian()
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance, inference_method=VarDTC()) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC())
def _getstate(self): def _getstate(self):
return SparseGP._getstate(self) return SparseGP._getstate(self)

View file

@ -10,11 +10,11 @@ class BGPLVMTests(unittest.TestCase):
def test_bias_kern(self): def test_bias_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -22,11 +22,11 @@ class BGPLVMTests(unittest.TestCase):
def test_linear_kern(self): def test_linear_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.Linear(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -34,11 +34,11 @@ class BGPLVMTests(unittest.TestCase):
def test_rbf_kern(self): def test_rbf_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -46,11 +46,11 @@ class BGPLVMTests(unittest.TestCase):
def test_rbf_bias_kern(self): def test_rbf_bias_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -58,11 +58,11 @@ class BGPLVMTests(unittest.TestCase):
def test_rbf_line_kern(self): def test_rbf_line_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.Linear(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -70,11 +70,11 @@ class BGPLVMTests(unittest.TestCase):
def test_linear_bias_kern(self): def test_linear_bias_kern(self):
N, num_inducing, input_dim, D = 30, 5, 4, 30 N, num_inducing, input_dim, D = 30, 5, 4, 30
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.Linear(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.Linear(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -9,10 +9,10 @@ class GPLVMTests(unittest.TestCase):
def test_bias_kern(self): def test_bias_kern(self):
num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4 num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
X = np.random.rand(num_data, input_dim) X = np.random.rand(num_data, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = GPy.models.GPLVM(Y, input_dim, kernel = k) m = GPy.models.GPLVM(Y, input_dim, kernel = k)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -20,10 +20,10 @@ class GPLVMTests(unittest.TestCase):
def test_linear_kern(self): def test_linear_kern(self):
num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4 num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
X = np.random.rand(num_data, input_dim) X = np.random.rand(num_data, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.Linear(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = GPy.models.GPLVM(Y, input_dim, kernel = k) m = GPy.models.GPLVM(Y, input_dim, kernel = k)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -31,10 +31,10 @@ class GPLVMTests(unittest.TestCase):
def test_rbf_kern(self): def test_rbf_kern(self):
num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4 num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
X = np.random.rand(num_data, input_dim) X = np.random.rand(num_data, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = GPy.models.GPLVM(Y, input_dim, kernel = k) m = GPy.models.GPLVM(Y, input_dim, kernel = k)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -4,6 +4,7 @@
import unittest import unittest
import numpy as np import numpy as np
import GPy import GPy
import sys
verbose = True verbose = True
@ -14,106 +15,223 @@ except ImportError:
SYMPY_AVAILABLE=False SYMPY_AVAILABLE=False
class KernelTests(unittest.TestCase): class Kern_check_model(GPy.core.Model):
def test_kerneltie(self):
K = GPy.kern.rbf(5, ARD=True)
K.tie_params('.*[01]')
K.constrain_fixed('2')
X = np.random.rand(5,5)
Y = np.ones((5,1))
m = GPy.models.GPRegression(X,Y,K)
self.assertTrue(m.checkgrad())
def test_rbfkernel(self):
kern = GPy.kern.rbf(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_rbf_sympykernel(self):
if SYMPY_AVAILABLE:
kern = GPy.kern.rbf_sympy(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_eq_sympykernel(self):
if SYMPY_AVAILABLE:
kern = GPy.kern.eq_sympy(5, 3)
self.assertTrue(GPy.kern.kern_test(kern, output_ind=4, verbose=verbose))
def test_ode1_eqkernel(self):
if SYMPY_AVAILABLE:
kern = GPy.kern.ode1_eq(3)
self.assertTrue(GPy.kern.kern_test(kern, output_ind=1, verbose=verbose, X_positive=True))
def test_rbf_invkernel(self):
kern = GPy.kern.rbf_inv(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_Matern32kernel(self):
kern = GPy.kern.Matern32(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_Matern52kernel(self):
kern = GPy.kern.Matern52(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_linearkernel(self):
kern = GPy.kern.linear(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_periodic_exponentialkernel(self):
kern = GPy.kern.periodic_exponential(1)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_periodic_Matern32kernel(self):
kern = GPy.kern.periodic_Matern32(1)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_periodic_Matern52kernel(self):
kern = GPy.kern.periodic_Matern52(1)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_rational_quadratickernel(self):
kern = GPy.kern.rational_quadratic(1)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_gibbskernel(self):
kern = GPy.kern.gibbs(5, mapping=GPy.mappings.Linear(5, 1))
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_heterokernel(self):
kern = GPy.kern.hetero(5, mapping=GPy.mappings.Linear(5, 1), transform=GPy.core.transformations.logexp())
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_mlpkernel(self):
kern = GPy.kern.mlp(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_polykernel(self):
kern = GPy.kern.poly(5, degree=4)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_fixedkernel(self):
""" """
Fixed effect kernel test This is a dummy model class used as a base class for checking that the
gradients of a given kernel are implemented correctly. It enables
checkgrad() to be called independently on a kernel.
""" """
X = np.random.rand(30, 4) def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
K = np.dot(X, X.T) GPy.core.Model.__init__(self, 'kernel_test_model')
kernel = GPy.kern.fixed(4, K) if kernel==None:
kern = GPy.kern.poly(5, degree=4) kernel = GPy.kern.RBF(1)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) if X is None:
X = np.random.randn(20, kernel.input_dim)
if dL_dK is None:
if X2 is None:
dL_dK = np.ones((X.shape[0], X.shape[0]))
else:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
# def test_coregionalization(self): self.kernel = kernel
# X1 = np.random.rand(50,1)*8 self.X = GPy.core.parameterization.Param('X',X)
# X2 = np.random.rand(30,1)*5 self.X2 = X2
# index = np.vstack((np.zeros_like(X1),np.ones_like(X2))) self.dL_dK = dL_dK
# X = np.hstack((np.vstack((X1,X2)),index))
# Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05 def is_positive_definite(self):
# Y2 = np.sin(X2) + np.random.randn(*X2.shape)*0.05 + 2. v = np.linalg.eig(self.kernel.K(self.X))[0]
# Y = np.vstack((Y1,Y2)) if any(v<-10*sys.float_info.epsilon):
return False
else:
return True
def log_likelihood(self):
return np.sum(self.dL_dK*self.kernel.K(self.X, self.X2))
class Kern_check_dK_dtheta(Kern_check_model):
"""
This class allows gradient checks for the gradient of a kernel with
respect to parameters.
"""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.add_parameter(self.kernel)
def parameters_changed(self):
return self.kernel.update_gradients_full(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model):
"""
This class allows gradient checks of the gradient of the diagonal of a
kernel with respect to the parameters.
"""
def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
self.add_parameter(self.kernel)
def parameters_changed(self):
self.kernel.update_gradients_diag(self.dL_dK, self.X)
def log_likelihood(self):
return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
def parameters_changed(self):
return self.kernel.update_gradients_diag(np.diag(self.dL_dK), self.X)
class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.add_parameter(self.X)
def parameters_changed(self):
self.X.gradient = self.kernel.gradients_X(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
def log_likelihood(self):
return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
def parameters_changed(self):
self.X.gradient = self.kernel.gradients_X_diag(self.dL_dK, self.X)
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
"""
This function runs on kernels to check the correctness of their
implementation. It checks that the covariance function is positive definite
for a randomly generated data set.
:param kern: the kernel to be tested.
:type kern: GPy.kern.Kernpart
:param X: X input values to test the covariance function.
:type X: ndarray
:param X2: X2 input values to test the covariance function.
:type X2: ndarray
"""
pass_checks = True
if X==None:
X = np.random.randn(10, kern.input_dim)
if output_ind is not None:
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
if X2==None:
X2 = np.random.randn(20, kern.input_dim)
if output_ind is not None:
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
if verbose:
print("Checking covariance function is positive definite.")
result = Kern_check_model(kern, X=X).is_positive_definite()
if result and verbose:
print("Check passed.")
if not result:
print("Positive definite check failed for " + kern.name + " covariance function.")
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt theta.")
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt X.")
try:
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
return pass_checks
class KernelTestsContinuous(unittest.TestCase):
def setUp(self):
self.X = np.random.randn(100,2)
self.X2 = np.random.randn(110,2)
def test_Matern32(self):
k = GPy.kern.Matern32(2)
self.assertTrue(kern_test(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Matern52(self):
k = GPy.kern.Matern52(2)
self.assertTrue(kern_test(k, X=self.X, X2=self.X2, verbose=verbose))
#TODO: turn off grad checkingwrt X for indexed kernels liek coregionalize
# k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
# k2 = GPy.kern.coregionalize(2,1)
# kern = k1**k2
# self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -5,10 +5,10 @@ Created on 26 Apr 2013
''' '''
import unittest import unittest
import numpy import numpy
from GPy.inference.conjugate_gradient_descent import CGD, RUNNING from GPy.inference.optimization.conjugate_gradient_descent import CGD, RUNNING
import pylab import pylab
from scipy.optimize.optimize import rosen, rosen_der from scipy.optimize.optimize import rosen, rosen_der
from GPy.inference.gradient_descent_update_rules import PolakRibiere from GPy.inference.optimization.gradient_descent_update_rules import PolakRibiere
class Test(unittest.TestCase): class Test(unittest.TestCase):
@ -30,7 +30,7 @@ class Test(unittest.TestCase):
assert numpy.allclose(res[0], 0, atol=1e-5) assert numpy.allclose(res[0], 0, atol=1e-5)
break break
except AssertionError: except AssertionError:
import ipdb;ipdb.set_trace() import pdb;pdb.set_trace()
# RESTART # RESTART
pass pass
else: else:
@ -108,4 +108,3 @@ if __name__ == "__main__":
res[0] = opt.opt(f, df, x0.copy(), callback, messages=True, maxiter=1000, res[0] = opt.opt(f, df, x0.copy(), callback, messages=True, maxiter=1000,
report_every=7, gtol=1e-12, update_rule=PolakRibiere) report_every=7, gtol=1e-12, update_rule=PolakRibiere)
pass

View file

@ -62,41 +62,41 @@ def force_F_ordered(A):
print "why are your arrays not F order?" print "why are your arrays not F order?"
return np.asfortranarray(A) return np.asfortranarray(A)
def jitchol(A, maxtries=5):
A = force_F_ordered_symmetric(A)
L, info = lapack.dpotrf(A, lower=1)
if info == 0:
return L
else:
if maxtries==0:
raise linalg.LinAlgError, "not positive definite, even with jitter."
diagA = np.diag(A)
if np.any(diagA <= 0.):
raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
jitter = diagA.mean() * 1e-6
return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1)
# def jitchol(A, maxtries=5): # def jitchol(A, maxtries=5):
# A = np.ascontiguousarray(A) # A = force_F_ordered_symmetric(A)
# L, info = lapack.dpotrf(A, lower=1) # L, info = lapack.dpotrf(A, lower=1)
# if info == 0: # if info == 0:
# return L # return L
# else: # else:
# if maxtries==0:
# raise linalg.LinAlgError, "not positive definite, even with jitter."
# diagA = np.diag(A) # diagA = np.diag(A)
# if np.any(diagA <= 0.): # if np.any(diagA <= 0.):
# raise linalg.LinAlgError, "not pd: non-positive diagonal elements" # raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
# jitter = diagA.mean() * 1e-6 # jitter = diagA.mean() * 1e-6
# while maxtries > 0 and np.isfinite(jitter):
# print 'Warning: adding jitter of {:.10e}'.format(jitter) # return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1)
# try:
# return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) def jitchol(A, maxtries=5):
# except: A = np.ascontiguousarray(A)
# jitter *= 10 L, info = lapack.dpotrf(A, lower=1)
# finally: if info == 0:
# maxtries -= 1 return L
# raise linalg.LinAlgError, "not positive definite, even with jitter." else:
# diagA = np.diag(A)
if np.any(diagA <= 0.):
raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
jitter = diagA.mean() * 1e-6
while maxtries > 0 and np.isfinite(jitter):
print 'Warning: adding jitter of {:.10e}'.format(jitter)
try:
return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
except:
jitter *= 10
finally:
maxtries -= 1
raise linalg.LinAlgError, "not positive definite, even with jitter."