From 3d346cbdd6e0b418b047ee0ffb9a6b938d236faa Mon Sep 17 00:00:00 2001 From: kcutajar Date: Tue, 22 Mar 2016 09:09:30 +0100 Subject: [PATCH] Added kernels for GpGrid and GpSsm regression --- .../gaussian_grid_inference.py | 113 +++++++++++++ .../gaussian_ssm_inference.py | 140 +++++++++++++++++ .../grid_posterior.py | 62 ++++++++ .../ssm_posterior.py | 73 +++++++++ GPy/kern/__init__.py | 3 +- GPy/kern/src/gridKerns.py | 76 +++++++++ GPy/kern/src/rbf.py | 8 + GPy/kern/src/ssm_kerns.py | 148 ++++++++++++++++++ GPy/kern/src/stationary.py | 19 +++ 9 files changed, 641 insertions(+), 1 deletion(-) create mode 100644 GPy/inference/latent_function_inference/gaussian_grid_inference.py create mode 100644 GPy/inference/latent_function_inference/gaussian_ssm_inference.py create mode 100644 GPy/inference/latent_function_inference/grid_posterior.py create mode 100644 GPy/inference/latent_function_inference/ssm_posterior.py create mode 100644 GPy/kern/src/gridKerns.py create mode 100644 GPy/kern/src/ssm_kerns.py diff --git a/GPy/inference/latent_function_inference/gaussian_grid_inference.py b/GPy/inference/latent_function_inference/gaussian_grid_inference.py new file mode 100644 index 00000000..8917abea --- /dev/null +++ b/GPy/inference/latent_function_inference/gaussian_grid_inference.py @@ -0,0 +1,113 @@ +# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +# Kurt Cutajar + +# This implementation of converting GPs to state space models is based on the article: + +#@article{Gilboa:2015, +# title={Scaling multidimensional inference for structured Gaussian processes}, +# author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P}, +# journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, +# volume={37}, +# number={2}, +# pages={424--436}, +# year={2015}, +# publisher={IEEE} +#} + +from grid_posterior import GridPosterior +import numpy as np +from . import LatentFunctionInference +log_2_pi = np.log(2*np.pi) + +class GaussianGridInference(LatentFunctionInference): + """ + An object for inference when the likelihood is Gaussian and inputs are on a grid. + + The function self.inference returns a GridPosterior object, which summarizes + the posterior. + + """ + def __init__(self): + pass + + def kron_mvprod(self, A, b): + x = b + N = 1 + D = len(A) + G = np.zeros((D,1)) + for d in xrange(0, D): + G[d] = len(A[d]) + N = np.prod(G) + for d in xrange(D-1, -1, -1): + X = np.reshape(x, (G[d], round(N/G[d])), order='F') + Z = np.dot(A[d], X) + Z = Z.T + x = np.reshape(Z, (-1, 1), order='F') + return x + + def inference(self, kern, X, likelihood, Y, Y_metadata=None): + + """ + Returns a GridPosterior class containing essential quantities of the posterior + """ + N = X.shape[0] #number of training points + D = X.shape[1] #number of dimensions + + Kds = np.zeros(D, dtype=object) #vector for holding covariance per dimension + Qs = np.zeros(D, dtype=object) #vector for holding eigenvectors of covariance per dimension + QTs = np.zeros(D, dtype=object) #vector for holding transposed eigenvectors of covariance per dimension + V_kron = 1 # kronecker product of eigenvalues + + # retrieve the one-dimensional variation of the designated kernel + oneDkernel = kern.getOneDimensionalKernel(D) + + for d in xrange(D): + xg = list(set(X[:,d])) #extract unique values for a dimension + xg = np.reshape(xg, (len(xg), 1)) + oneDkernel.lengthscale = kern.lengthscale[d] + Kds[d] = oneDkernel.K(xg) + [V, Q] = np.linalg.eig(Kds[d]) + V_kron = np.kron(V_kron, V) + Qs[d] = Q + QTs[d] = Q.T + + noise = likelihood.variance + 1e-8 + + alpha_kron = self.kron_mvprod(QTs, Y) + V_kron = V_kron.reshape(-1, 1) + alpha_kron = alpha_kron / (V_kron + noise) + alpha_kron = self.kron_mvprod(Qs, alpha_kron) + + log_likelihood = -0.5 * (np.dot(Y.T, alpha_kron) + np.sum((np.log(V_kron + noise))) + N*log_2_pi) + + # compute derivatives wrt parameters Thete + derivs = np.zeros(D+2, dtype='object') + for t in xrange(len(derivs)): + dKd_dTheta = np.zeros(D, dtype='object') + gamma = np.zeros(D, dtype='object') + gam = 1 + for d in xrange(D): + xg = list(set(X[:,d])) + xg = np.reshape(xg, (len(xg), 1)) + oneDkernel.lengthscale = kern.lengthscale[d] + if t < D: + dKd_dTheta[d] = oneDkernel.dKd_dLen(xg, (t==d), lengthscale=kern.lengthscale[t]) #derivative wrt lengthscale + elif (t == D): + dKd_dTheta[d] = oneDkernel.dKd_dVar(xg) #derivative wrt variance + else: + dKd_dTheta[d] = np.identity(len(xg)) #derivative wrt noise + gamma[d] = np.diag(np.dot(np.dot(QTs[d], dKd_dTheta[d].T), Qs[d])) + gam = np.kron(gam, gamma[d]) + + gam = gam.reshape(-1,1) + kappa = self.kron_mvprod(dKd_dTheta, alpha_kron) + derivs[t] = 0.5*np.dot(alpha_kron.T,kappa) - 0.5*np.sum(gam / (V_kron + noise)) + + # separate derivatives + dL_dLen = derivs[:D] + dL_dVar = derivs[D] + dL_dThetaL = derivs[D+1] + + return GridPosterior(alpha_kron=alpha_kron, QTs=QTs, Qs=Qs, V_kron=V_kron), log_likelihood, {'dL_dLen':dL_dLen, 'dL_dVar':dL_dVar, 'dL_dthetaL':dL_dThetaL} diff --git a/GPy/inference/latent_function_inference/gaussian_ssm_inference.py b/GPy/inference/latent_function_inference/gaussian_ssm_inference.py new file mode 100644 index 00000000..4eb441cb --- /dev/null +++ b/GPy/inference/latent_function_inference/gaussian_ssm_inference.py @@ -0,0 +1,140 @@ +# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +# Kurt Cutajar + +# This implementation of converting GPs to state space models is based on the article: + +#@article{Gilboa:2015, +# title={Scaling multidimensional inference for structured Gaussian processes}, +# author={Gilboa, Elad and Saat{\c{c}}i, Yunus and Cunningham, John P}, +# journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, +# volume={37}, +# number={2}, +# pages={424--436}, +# year={2015}, +# publisher={IEEE} +#} + +from ssm_posterior import SsmPosterior +from ...util.linalg import pdinv, dpotrs, tdot +from ...util import diag +import numpy as np +import math as mt +from . import LatentFunctionInference +log_2_pi = np.log(2*np.pi) + + +class GaussianSSMInference(LatentFunctionInference): + """ + An object for inference when the likelihood is Gaussian. + + The function self.inference returns a Posterior object, which summarizes + the posterior. + + """ + def __init__(self): + pass#self._YYTfactor_cache = caching.cache() + + def get_YYTfactor(self, Y): + """ + find a matrix L which satisfies LL^T = YY^T. + + Note that L may have fewer columns than Y, else L=Y. + """ + N, D = Y.shape + if (N>D): + return Y + else: + #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L. + #print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!" + return Y + + def inference(self, kern, X, likelihood, Y, Y_metadata=None): + + """ + Returns a Posterior class containing essential quantities of the posterior + """ + order = kern.order + K = X.shape[0] + log_likelihood = 0 + results = np.zeros((K,4),dtype=object) + H = np.zeros((1,order)) + H[0][0] = 1 + v_0 = kern.Phi_of_r(-1) + mu_0 = np.zeros((order, 1)) + noise_var = likelihood.variance + 1e-8 + + # carry out forward filtering + for t in range(K): + if (t == 0): + prior_m = np.dot(H,mu_0) + prior_v = np.dot(np.dot(H, v_0), H.T) + noise_var + + log_likelihood = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[0] - prior_m)**2)/prior_v) + + kalman_gain = np.dot(v_0, H.T) / prior_v + mu = mu_0 + kalman_gain*(Y[0] - prior_m) + + + V = np.dot(np.eye(order) - np.dot(kalman_gain,H), v_0) + results[0][0] = mu + results[0][1] = V + else: + delta = X[t] - X[t-1] + Q = kern.Q_of_r(delta) + Phi = kern.Phi_of_r(delta) + P = np.dot(np.dot(Phi, V), Phi.T) + Q + PhiMu = np.dot(Phi, mu) + prior_m = np.dot(H, PhiMu) + prior_v = np.dot(np.dot(H, P), H.T) + noise_var + + log_likelihood_i = -0.5*(log_2_pi + mt.log(prior_v) + ((Y[t] - prior_m)**2)/prior_v) + log_likelihood += log_likelihood_i + + kalman_gain = np.dot(P, H.T)/prior_v + mu = PhiMu + kalman_gain*(Y[t] - prior_m) + V = np.dot((np.eye(order) - np.dot(kalman_gain, H)), P) + + results[t-1][2] = Phi + results[t-1][3] = P + results[t][0] = mu + results[t][1] = V + + # carry out backwards smoothing + + W = np.dot((np.eye(order) - np.dot(kalman_gain,H)),(np.dot(Phi,results[K-2][1]))) + + mu_s = results[K-1][0] + V_s = results[K-1][1] + + posterior_mean = np.zeros((K,1)) + posterior_var = np.zeros((K,1)) + E = np.zeros((K,4), dtype='object') + + posterior_mean[K-1] = np.dot(H, mu_s) + posterior_var[K-1] = np.dot(np.dot(H, V_s), H.T) + E[K-1][0] = mu_s + E[K-1][1] = V_s + + for t in range(K-2, -1, -1): + mu = results[t][0] + V = results[t][1] + Phi = results[t][2] + P = results[t][3] + + L = np.dot(np.dot(V, Phi.T), np.linalg.solve(P, np.eye(order))) # forward substitution + mu_s = mu + np.dot(L, mu_s - np.dot(Phi, mu)) + V_s = V + np.dot(np.dot(L, V_s - P), L.T) + posterior_mean[t] = np.dot(H, mu_s) + posterior_var[t] = np.dot(np.dot(H, V_s), H.T) + + if (t < K-2): + W = np.dot(results[t+1][1], L.T) + np.dot(E[t+1][2], np.dot(W - np.dot(results[t+1][2], results[t+1][1]), L.T)) + + E[t][0] = mu_s + E[t][1] = V_s + E[t][2] = L + E[t][3] = W + + return SsmPosterior(mu_f=results[:,0], V_f=results[:,1], mu_s=E[:,0], V_s=E[:,1], expectations=E), log_likelihood diff --git a/GPy/inference/latent_function_inference/grid_posterior.py b/GPy/inference/latent_function_inference/grid_posterior.py new file mode 100644 index 00000000..da86dd84 --- /dev/null +++ b/GPy/inference/latent_function_inference/grid_posterior.py @@ -0,0 +1,62 @@ +# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +# Kurt Cutajar + +import numpy as np + +class GridPosterior(object): + """ + Specially intended for the Grid Regression case + An object to represent a Gaussian posterior over latent function values, p(f|D). + + The purpose of this class is to serve as an interface between the inference + schemes and the model classes. + + """ + def __init__(self, alpha_kron=None, QTs=None, Qs=None, V_kron=None): + """ + alpha_kron : + QTs : transpose of eigen vectors resulting from decomposition of single dimension covariance matrices + Qs : eigen vectors resulting from decomposition of single dimension covariance matrices + V_kron : kronecker product of eigenvalues reulting decomposition of single dimension covariance matrices + """ + + if ((alpha_kron is not None) and (QTs is not None) + and (Qs is not None) and (V_kron is not None)): + pass # we have sufficient to compute the posterior + else: + raise ValueError("insufficient information for predictions") + + self._alpha_kron = alpha_kron + self._qTs = QTs + self._qs = Qs + self._v_kron = V_kron + + @property + def alpha(self): + """ + """ + return self._alpha_kron + + @property + def QTs(self): + """ + array of transposed eigenvectors resulting for single dimension covariance + """ + return self._qTs + + @property + def Qs(self): + """ + array of eigenvectors resulting for single dimension covariance + """ + return self._qs + + @property + def V_kron(self): + """ + kronecker product of eigenvalues s + """ + return self._v_kron + diff --git a/GPy/inference/latent_function_inference/ssm_posterior.py b/GPy/inference/latent_function_inference/ssm_posterior.py new file mode 100644 index 00000000..1b87ddb3 --- /dev/null +++ b/GPy/inference/latent_function_inference/ssm_posterior.py @@ -0,0 +1,73 @@ +# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +# Kurt Cutajar + +import numpy as np + +class SsmPosterior(object): + """ + Specially intended for the SSM Regression case + An object to represent a Gaussian posterior over latent function values, p(f|D). + + The purpose of this class is to serve as an interface between the inference + schemes and the model classes. + + """ + def __init__(self, mu_f = None, V_f=None, mu_s=None, V_s=None, expectations=None): + """ + mu_f : mean values predicted during kalman filtering step + var_f : variance predicted during the kalman filtering step + mu_s : mean values predicted during backwards smoothing step + var_s : variance predicted during backwards smoothing step + expectations : posterior expectations + """ + + if ((mu_f is not None) and (V_f is not None) and + (mu_s is not None) and (V_s is not None) and + (expectations is not None)): + pass # we have sufficient to compute the posterior + else: + raise ValueError("insufficient information to compute predictions") + + self._mu_f = mu_f + self._V_f = V_f + self._mu_s = mu_s + self._V_s = V_s + self._expectations = expectations + + @property + def mu_f(self): + """ + Mean values predicted during kalman filtering step mean + """ + return self._mu_f + + @property + def V_f(self): + """ + Variance predicted during the kalman filtering step + """ + return self._V_f + + @property + def mu_s(self): + """ + Mean values predicted during kalman backwards smoothin mean + """ + return self._mu_s + + @property + def V_s(self): + """ + Variance predicted during backwards smoothing step + """ + return self._V_s + + @property + def expectations(self): + """ + Posterior expectations + """ + return self._expectations + diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 7f44b6a9..041159f6 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -29,7 +29,6 @@ from .src.splitKern import SplitKern,DEtime from .src.splitKern import DEtime as DiffGenomeKern from .src.spline import Spline from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel - from .src.sde_matern import sde_Matern32 from .src.sde_matern import sde_Matern52 from .src.sde_linear import sde_Linear @@ -37,3 +36,5 @@ from .src.sde_standard_periodic import sde_StdPeriodic from .src.sde_static import sde_White, sde_Bias from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad from .src.sde_brownian import sde_Brownian +from .src.ssmKerns import Matern32_SSM +from .src.gridKerns import GridRBF diff --git a/GPy/kern/src/gridKerns.py b/GPy/kern/src/gridKerns.py new file mode 100644 index 00000000..98804544 --- /dev/null +++ b/GPy/kern/src/gridKerns.py @@ -0,0 +1,76 @@ +# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +# Kurt Cutajar + +import numpy as np +from stationary import Stationary +from ...util.config import * +from ...util.caching import Cache_this + +class GridKern(Stationary): + + def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, originalDimensions, useGPU=False): + super(GridKern, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU) + self.originalDimensions = originalDimensions + + @Cache_this(limit=3, ignore_args=()) + def dKd_dVar(self, X, X2=None): + """ + Derivative of Kernel function wrt variance applied on inputs X and X2. + In the stationary case there is an inner function depending on the + distances from X to X2, called r. + + dKd_dVar(X, X2) = dKdVar_of_r((X-X2)**2) + """ + r = self._scaled_dist(X, X2) + return self.dKdVar_of_r(r) + + @Cache_this(limit=3, ignore_args=()) + def dKd_dLen(self, X, dimension, lengthscale, X2=None): + """ + Derivate of Kernel function wrt lengthscale applied on inputs X and X2. + In the stationary case there is an inner function depending on the + distances from X to X2, called r. + + dKd_dLen(X, X2) = dKdLen_of_r((X-X2)**2) + """ + r = self._scaled_dist(X, X2) + return self.dKdLen_of_r(r, dimension, lengthscale) + +class GridRBF(GridKern): + """ + Similar to regular RBF but supplemented with methods required for Gaussian grid regression + Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel: + + .. math:: + + k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) + + """ + _support_GPU = True + def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='gridRBF', originalDimensions=1, useGPU=False): + super(GridRBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, originalDimensions, useGPU=useGPU) + + def K_of_r(self, r): + return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 * r**2) + + def dKdVar_of_r(self, r): + """ + Compute derivative of kernel wrt variance + """ + return np.exp(-0.5 * r**2) + + def dKdLen_of_r(self, r, dimCheck, lengthscale): + """ + Compute derivative of kernel for dimension wrt lengthscale + Computation of derivative changes when lengthscale corresponds to + the dimension of the kernel whose derivate is being computed. + """ + if (dimCheck == True): + return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 * r**2) * (r**2) / (lengthscale**(float(1)/self.originalDimensions)) + else: + return (self.variance**(float(1)/self.originalDimensions)) * np.exp(-0.5 * r**2) / (lengthscale**(float(1)/self.originalDimensions)) + + def dK_dr(self, r): + return -r*self.K_of_r(r) \ No newline at end of file diff --git a/GPy/kern/src/rbf.py b/GPy/kern/src/rbf.py index ff86561d..557366ae 100644 --- a/GPy/kern/src/rbf.py +++ b/GPy/kern/src/rbf.py @@ -58,6 +58,14 @@ class RBF(Stationary): if self.use_invLengthscale: self.lengthscale[:] = 1./np.sqrt(self.inv_l+1e-200) super(RBF,self).parameters_changed() + + def get_one_dimensional_kernel(self, dim): + """ + Specially intended for Grid regression. + """ + oneDkernel = GridRBF(input_dim=1, variance=self.variance.copy(), originalDimensions=dim) + return oneDkernel + #---------------------------------------# # PSI statistics # #---------------------------------------# diff --git a/GPy/kern/src/ssm_kerns.py b/GPy/kern/src/ssm_kerns.py new file mode 100644 index 00000000..aa988e52 --- /dev/null +++ b/GPy/kern/src/ssm_kerns.py @@ -0,0 +1,148 @@ +# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) +# +# Kurt Cutajar + +from kern import Kern +from ... import util +import numpy as np +from scipy import integrate, weave +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp + +class SsmKerns(Kern): + """ + Kernels suitable for GP SSM regression with one-dimensional inputs + + """ + + def __init__(self, input_dim, variance, lengthscale, active_dims, name, useGPU=False): + + super(SsmKerns, self).__init__(input_dim, active_dims, name,useGPU=useGPU) + self.lengthscale = np.abs(lengthscale) + self.variance = Param('variance', variance, Logexp()) + + def K_of_r(self, r): + raise NotImplementedError("implement the covariance function as a fn of r to use this class") + + def dK_dr(self, r): + raise NotImplementedError("implement derivative of the covariance function wrt r to use this class") + + def Q_of_r(self, r): + raise NotImplementedError("implement covariance function of SDE wrt r to use this class") + + def Phi_of_r(self, r): + raise NotImplementedError("implement transition function of SDE wrt r to use this class") + + def noise(self): + raise NotImplementedError("implement noise function of SDE to use this class") + + def dPhidLam(self, r): + raise NotImplementedError("implement derivative of the transition function wrt to lambda to use this class") + + def dQ(self, r): + raise NotImplementedError("implement detivatives of Q wrt to lambda and variance to use this class") + + +class Matern32_SSM(SsmKerns): + """ + Matern 3/2 kernel: + + .. math:: + + k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } + + """ + + def __init__(self, input_dim, variance=1., lengthscale=1, active_dims=None, name='Mat32'): + super(Matern32_SSM, self).__init__(input_dim, variance, lengthscale, active_dims, name) + lambd = np.sqrt(2*1.5)/lengthscale # additional paramter of model (derived from lengthscale) + self.lam = Param('lambda', lambd, Logexp()) + self.link_parameters(self.lam, self.variance) + self.order = 2 + + def noise(self): + """ + Compute noise for the kernel + """ + p = 1 + lp_fact = np.sum(np.log(range(1,p+1))) + l2p_fact = np.sum(np.log(range(1,2*p +1))) + logq = np.log(2*self.variance) + p*np.log(4) + 2*lp_fact + (2*p + 1)*np.log(self.lam) - l2p_fact + q = np.exp(logq) + return q + + def K_of_r(self, r): + lengthscale = np.sqrt(3.) / self.lam + r = r / lengthscale + return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r) + + def dK_dr(self,r): + return -3.*self.variance*r*np.exp(-np.sqrt(3.)*r) + + def Q_of_r(self, r): + """ + Compute process variance (Q) + """ + q = self.noise() + Q = np.zeros((2,2)) + Q[0][0] = 1/(4*self.lam**3) - (4*(r**2)*(self.lam**2) + + 4*r*self.lam + 2)/(8*(self.lam**3)*np.exp(2*r*self.lam)) + Q[0][1] = (r**2)/(2*np.exp(2*r*self.lam)) + Q[1][0] = Q[0][1] + Q[1][1] = 1/(4*self.lam) - (2*(r**2)*(self.lam**2) + - 2*r*self.lam + 1)/(4*self.lam*np.exp(2*r*self.lam)) + return q*Q + + def Phi_of_r(self, r): + """ + Compute transition function (Phi) + """ + if r < 0: + phi = np.zeros((2,2)) + phi[0][0] = self.variance + phi[0][1] = 0 + phi[1][0] = 0 + phi[1][1] = np.power(self.lam,2)*self.variance + return phi + else: + mult = np.exp(-self.lam*r) + phi = np.zeros((2, 2)) + phi[0][0] = (1 + self.lam*r)*mult + phi[0][1] = mult*r + phi[1][0] = -r*mult*self.lam**2 + phi[1][1] = mult*(1 - self.lam*r) + return phi + + def dPhidLam(self, r): + """ + Compute derivative of transition function (Phi) wrt lambda + """ + mult = np.exp(self.lam*r) + dPhi = np.zeros((2, 2)) + dPhi[0][0] = (r / mult) - (r * (self.lam*r + 1))/mult + dPhi[0][1] = (-1 * (r**2)) / mult + dPhi[1][0] = (self.lam**2 * r**2) / mult - (2*self.lam*r)/mult + dPhi[1][1] = (r * (self.lam*r - 1))/mult - r/mult + return dPhi + + def dQ(self, r): + """ + Compute derivatives of Q with respect to lambda and variance + """ + if (r == -1): + dQdLam = np.array([[0, 0], [0, 2*self.lam*self.variance]]) + dQdVar = None + else: + q = self.noise() + Q = self.Q_of_r(r) + dQdLam = np.zeros((2, 2)) + mult = np.exp(2*self.lam*r) + dQdLam[0][0] = (3*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(8*(self.lam**4)*mult) - 3/(4*(self.lam**4)) - (8*self.lam*(r**2) + 4*r)/(8*(self.lam**3)*mult) + (r*(4*(r**2)*(self.lam**2) + 4*r*self.lam + 2))/(4*(self.lam**3)*mult) + dQdLam[0][1] = -(r**3)/mult + dQdLam[1][0] = dQdLam[0][1] + dQdLam[1][1] = (2*(r**2)*(self.lam**2) - 2*r*self.lam + 1)/(4*(self.lam**2)*mult) - 1/(4*(self.lam**2)) + (2*r - 4*(r**2)*self.lam)/(4*self.lam*mult) + (r*(2*(r**2)*(self.lam**2) - 2*r*self.lam + 1))/(2*self.lam*mult) + dq = (q*(2+1))/self.lam + dQdLam = q*dQdLam + dq*(Q/q) + dQdVar = Q / self.variance + return [dQdLam, dQdVar] \ No newline at end of file diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py index 5e137abb..a8ae3ac4 100644 --- a/GPy/kern/src/stationary.py +++ b/GPy/kern/src/stationary.py @@ -189,6 +189,16 @@ class Stationary(Kern): self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale + def update_gradients_direct(self, dL_dVar, dL_dLen): + """ + Specially intended for the Grid regression case. + Given the computed log likelihood derivates, update the corresponding + kernel and likelihood gradients. + Useful for when gradients have been computed a priori. + """ + self.variance.gradient = dL_dVar + self.lengthscale.gradient = dL_dLen + def _inv_dist(self, X, X2=None): """ Compute the elementwise inverse of the distance matrix, expecpt on the @@ -307,6 +317,15 @@ class Stationary(Kern): def input_sensitivity(self, summarize=True): return self.variance*np.ones(self.input_dim)/self.lengthscale**2 + def get_one_dimensional_kernel(self, dimensions): + """ + Specially intended for the grid regression case + For a given covariance kernel, this method returns the corresponding kernel for + a single dimension. The resulting values can then be used in the algorithm for + reconstructing the full covariance matrix. + """ + raise NotImplementedError("implement one dimensional variation of kernel") +