From ff23a59d2df5f43269f221aa53b58fad6e6e3802 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 24 Feb 2014 09:41:13 +0000 Subject: [PATCH] unfinished work on ratinoal quadratic kern --- GPy/kern/__init__.py | 12 +-- GPy/kern/_src/mlp.py | 138 ++++++++++----------------- GPy/kern/_src/{bias.py => static.py} | 83 +++++++++++----- GPy/kern/_src/stationary.py | 23 ++++- GPy/kern/_src/white.py | 70 -------------- 5 files changed, 134 insertions(+), 192 deletions(-) rename GPy/kern/_src/{bias.py => static.py} (54%) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 594ff6d3..d858ad5b 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,12 +1,11 @@ from _src.rbf import RBF -from _src.white import White from _src.kern import Kern from _src.linear import Linear -from _src.bias import Bias +from _src.static import Bias, White from _src.brownian import Brownian -from _src.stationary import Exponential, Matern32, Matern52, ExpQuad +from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad +from _src.mlp import MLP #import coregionalize -#import exponential #import eq_ode1 #import finite_dimensional #import fixed @@ -14,10 +13,6 @@ from _src.stationary import Exponential, Matern32, Matern52, ExpQuad #import hetero #import hierarchical #import independent_outputs -#import linear -#import Matern32 -#import Matern52 -#import mlp #import ODE_1 #import periodic_exponential #import periodic_Matern32 @@ -31,4 +26,3 @@ from _src.stationary import Exponential, Matern32, Matern52, ExpQuad #import rbf_inv #import spline #import symmetric -#import white diff --git a/GPy/kern/_src/mlp.py b/GPy/kern/_src/mlp.py index 59979a62..f2f40e62 100644 --- a/GPy/kern/_src/mlp.py +++ b/GPy/kern/_src/mlp.py @@ -1,11 +1,13 @@ # Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import Kernpart +from kern import Kern +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp import numpy as np four_over_tau = 2./np.pi -class MLP(Kernpart): +class MLP(Kern): """ Multi layer perceptron kernel (also known as arc sine kernel or neural network kernel) @@ -13,10 +15,10 @@ class MLP(Kernpart): .. math:: k(x,y) = \\sigma^{2}\\frac{2}{\\pi } \\text{asin} \\left ( \\frac{ \\sigma_w^2 x^\\top y+\\sigma_b^2}{\\sqrt{\\sigma_w^2x^\\top x + \\sigma_b^2 + 1}\\sqrt{\\sigma_w^2 y^\\top y \\sigma_b^2 +1}} \\right ) - + :param input_dim: the number of input dimensions - :type input_dim: int + :type input_dim: int :param variance: the variance :math:`\sigma^2` :type variance: float :param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w` @@ -29,85 +31,58 @@ class MLP(Kernpart): """ - def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=100., ARD=False): - self.input_dim = input_dim - self.ARD = ARD - if not ARD: - self.num_params=3 - if weight_variance is not None: - weight_variance = np.asarray(weight_variance) - assert weight_variance.size == 1, "Only one weight variance needed for non-ARD kernel" - else: - weight_variance = 100.*np.ones(1) - else: - self.num_params = self.input_dim + 2 - if weight_variance is not None: - weight_variance = np.asarray(weight_variance) - assert weight_variance.size == self.input_dim, "bad number of weight variances" - else: - weight_variance = np.ones(self.input_dim) - raise NotImplementedError + def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=100., name='mlp'): + super(Linear, self).__init__(input_dim, name) + self.variance = Param('variance', variance, Logexp) + self.weight_variance = Param('weight_variance', weight_variance, Logexp) + self.bias_variance = Param('bias_variance', bias_variance, Logexp) + self.add_parameters(self.variance, self.weight_variance, self.bias_variance) - self.name='mlp' - self._set_params(np.hstack((variance, weight_variance.flatten(), bias_variance))) - def _get_params(self): - return np.hstack((self.variance, self.weight_variance.flatten(), self.bias_variance)) - - def _set_params(self, x): - assert x.size == (self.num_params) - self.variance = x[0] - self.weight_variance = x[1:-1] - self.weight_std = np.sqrt(self.weight_variance) - self.bias_variance = x[-1] - - def _get_param_names(self): - if self.num_params == 3: - return ['variance', 'weight_variance', 'bias_variance'] - else: - return ['variance'] + ['weight_variance_%i' % i for i in range(self.lengthscale.size)] + ['bias_variance'] - - def K(self, X, X2, target): - """Return covariance between X and X2.""" + def K(self, X, X2=None): self._K_computations(X, X2) - target += self.variance*self._K_dvar + return self.variance*self._K_dvar - def Kdiag(self, X, target): + def Kdiag(self, X): """Compute the diagonal of the covariance matrix for X.""" self._K_diag_computations(X) - target+= self.variance*self._K_diag_dvar + return self.variance*self._K_diag_dvar - def _param_grad_helper(self, dL_dK, X, X2, target): + def update_gradients_full(self, dL_dK, X, X2=None): """Derivative of the covariance with respect to the parameters.""" self._K_computations(X, X2) - denom3 = self._K_denom*self._K_denom*self._K_denom + self.variance.gradient = np.sum(self._K_dvar*dL_dK) + + denom3 = self._K_denom**3 base = four_over_tau*self.variance/np.sqrt(1-self._K_asin_arg*self._K_asin_arg) base_cov_grad = base*dL_dK if X2 is None: vec = np.diag(self._K_inner_prod) - target[1] += ((self._K_inner_prod/self._K_denom + self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom -.5*self._K_numer/denom3 - *(np.outer((self.weight_variance*vec+self.bias_variance+1.), vec) + *(np.outer((self.weight_variance*vec+self.bias_variance+1.), vec) +np.outer(vec,(self.weight_variance*vec+self.bias_variance+1.))))*base_cov_grad).sum() - target[2] += ((1./self._K_denom - -.5*self._K_numer/denom3 + self.bias_variance.gradient = ((1./self._K_denom + -.5*self._K_numer/denom3 *((vec[None, :]+vec[:, None])*self.weight_variance +2.*self.bias_variance + 2.))*base_cov_grad).sum() else: vec1 = (X*X).sum(1) vec2 = (X2*X2).sum(1) - target[1] += ((self._K_inner_prod/self._K_denom + self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom -.5*self._K_numer/denom3 *(np.outer((self.weight_variance*vec1+self.bias_variance+1.), vec2) + np.outer(vec1, self.weight_variance*vec2 + self.bias_variance+1.)))*base_cov_grad).sum() - target[2] += ((1./self._K_denom - -.5*self._K_numer/denom3 + self.bias_variance.gradient = ((1./self._K_denom + -.5*self._K_numer/denom3 *((vec1[:, None]+vec2[None, :])*self.weight_variance + 2*self.bias_variance + 2.))*base_cov_grad).sum() - - target[0] += np.sum(self._K_dvar*dL_dK) - def gradients_X(self, dL_dK, X, X2, target): + def update_gradients_diag(self, X): + raise NotImplementedError, "TODO" + + + def gradients_X(self, dL_dK, X, X2): """Derivative of the covariance matrix with respect to X""" self._K_computations(X, X2) arg = self._K_asin_arg @@ -116,47 +91,38 @@ class MLP(Kernpart): denom3 = denom*denom*denom if X2 is not None: vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1. - target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1) + return four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1) else: vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1. - target += 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1) - + return 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1) + def dKdiag_dX(self, dL_dKdiag, X, target): """Gradient of diagonal of covariance with respect to X""" self._K_diag_computations(X) arg = self._K_diag_asin_arg denom = self._K_diag_denom numer = self._K_diag_numer - target += four_over_tau*2.*self.weight_variance*self.variance*X*(1/denom*(1 - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None] + return four_over_tau*2.*self.weight_variance*self.variance*X*(1./denom*(1. - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None] + - def _K_computations(self, X, X2): """Pre-computations for the covariance matrix (used for computing the covariance and its gradients.""" - if self.ARD: - pass + if X2 is None: + self._K_inner_prod = np.dot(X,X.T) + vec = np.diag(self._K_numer) + 1. + self._K_denom = np.sqrt(np.outer(vec,vec)) else: - if X2 is None: - self._K_inner_prod = np.dot(X,X.T) - self._K_numer = self._K_inner_prod*self.weight_variance+self.bias_variance - vec = np.diag(self._K_numer) + 1. - self._K_denom = np.sqrt(np.outer(vec,vec)) - self._K_asin_arg = self._K_numer/self._K_denom - self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg) - else: - self._K_inner_prod = np.dot(X,X2.T) - self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance - vec1 = (X*X).sum(1)*self.weight_variance + self.bias_variance + 1. - vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1. - self._K_denom = np.sqrt(np.outer(vec1,vec2)) - self._K_asin_arg = self._K_numer/self._K_denom - self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg) + self._K_inner_prod = np.dot(X,X2.T) + vec1 = (X*X).sum(1)*self.weight_variance + self.bias_variance + 1. + vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1. + self._K_denom = np.sqrt(np.outer(vec1,vec2)) + self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance + self._K_asin_arg = self._K_numer/self._K_denom + self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg) def _K_diag_computations(self, X): """Pre-computations concerning the diagonal terms (used for computation of diagonal and its gradients).""" - if self.ARD: - pass - else: - self._K_diag_numer = (X*X).sum(1)*self.weight_variance + self.bias_variance - self._K_diag_denom = self._K_diag_numer+1. - self._K_diag_asin_arg = self._K_diag_numer/self._K_diag_denom - self._K_diag_dvar = four_over_tau*np.arcsin(self._K_diag_asin_arg) + self._K_diag_numer = (X*X).sum(1)*self.weight_variance + self.bias_variance + self._K_diag_denom = self._K_diag_numer+1. + self._K_diag_asin_arg = self._K_diag_numer/self._K_diag_denom + self._K_diag_dvar = four_over_tau*np.arcsin(self._K_diag_asin_arg) diff --git a/GPy/kern/_src/bias.py b/GPy/kern/_src/static.py similarity index 54% rename from GPy/kern/_src/bias.py rename to GPy/kern/_src/static.py index e1938c95..28854162 100644 --- a/GPy/kern/_src/bias.py +++ b/GPy/kern/_src/static.py @@ -7,8 +7,63 @@ from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp import numpy as np -class Bias(Kern): - def __init__(self,input_dim,variance=1.,name=None): +class Static(Kern): + def gradients_X(self, dL_dK, X, X2, target): + return np.zeros(X.shape) + + def gradients_X_diag(self, dL_dKdiag, X, target): + return np.zeros(X.shape) + + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + return np.zeros(Z.shape) + + def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + return np.zeros(mu.shape), np.zeros(S.shape) + + def psi0(self, Z, mu, S): + return self.Kdiag(mu) + + def psi1(self, Z, mu, S, target): + return self.K(mu, Z) + + def psi2(Z, mu, S): + K = self.K(mu, Z) + return K[:,:,None]*K[:,None,:] # NB. more efficient implementations on inherriting classes + + +class White(Static): + def __init__(self, input_dim, variance=1., name='white'): + super(White, self).__init__(input_dim, name) + self.input_dim = input_dim + self.variance = Param('variance', variance, Logexp()) + self.add_parameters(self.variance) + + def K(self, X, X2=None): + if X2 is None: + return np.eye(X.shape[0])*self.variance + else: + return np.zeros((X.shape[0], X2.shape[0])) + + def Kdiag(self, X): + ret = np.ones(X.shape[0]) + ret[:] = self.variance + return ret + + def psi2(self, Z, mu, S, target): + return np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) + + def update_gradients_full(self, dL_dK, X): + self.variance.gradient = np.trace(dL_dK) + + def update_gradients_diag(self, dL_dKdiag, X): + self.variance.gradient = dL_dKdiag.sum() + + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + self.variance.gradient = np.trace(dL_dKmm) + dL_dpsi0.sum() + + +class Bias(Static): + def __init__(self, input_dim, variance=1., name=None): super(Bias, self).__init__(input_dim, name) self.variance = Param("variance", variance, Logexp()) self.add_parameter(self.variance) @@ -19,7 +74,7 @@ class Bias(Kern): ret[:] = self.variance return ret - def Kdiag(self,X): + def Kdiag(self, X): ret = np.empty((X.shape[0],), dtype=np.float64) ret[:] = self.variance return ret @@ -30,23 +85,6 @@ class Bias(Kern): def update_gradients_diag(self, dL_dKdiag, X): self.variance.gradient = dL_dK.sum() - def gradients_X(self, dL_dK,X, X2, target): - return np.zeros(X.shape) - - def gradients_X_diag(self,dL_dKdiag,X,target): - return np.zeros(X.shape) - - - #---------------------------------------# - # PSI statistics # - #---------------------------------------# - - def psi0(self, Z, mu, S): - return self.Kdiag(mu) - - def psi1(self, Z, mu, S, target): - return self.K(mu, S) - def psi2(self, Z, mu, S, target): ret = np.empty((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) ret[:] = self.variance**2 @@ -55,8 +93,3 @@ class Bias(Kern): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum() - def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - return np.zeros(Z.shape) - - def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - return np.zeros(mu.shape), np.zeros(S.shape) diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index a6ff9424..e8f1f8e9 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -193,8 +193,6 @@ class Matern52(Stationary): return(1./self.variance* (G_coef*G + orig + orig2)) - - class ExpQuad(Stationary): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'): super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name) @@ -207,5 +205,26 @@ class ExpQuad(Stationary): dist = self._scaled_dist(X, X2) return -dist*self.K(X, X2) +class RatQuad(Stationary): + def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, name='ExpQuad'): + super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, name) + self.power = Param('power', power, Logexp) + self.add_parameters(self.power) + + def K(self, X, X2=None): + r = self._scaled_dist(X, X2) + return self.variance*(1. + r**2/2.)**(-self.power) + + def dK_dr(self, X, X2): + r = self._scaled_dist(X, X2) + return -self.variance*self.power*r*(1. + r**2/2)**(-self.power - 1.) + + def update_gradients_full(self, dL_dK, X, X2=None): + super(RatQuad, self).update_gradients_full(dL_dK, X, X2) + r = self._scaled_dist(X, X2) + r2 = r**2 + dpow = -2.**self.power*(r2 + 2.)**(-self.power)*np.log(0.5*(r2+2.)) + self.power.gradient = np.sum(dL_dK*dpow) + diff --git a/GPy/kern/_src/white.py b/GPy/kern/_src/white.py index d20e2fe1..1fc022f5 100644 --- a/GPy/kern/_src/white.py +++ b/GPy/kern/_src/white.py @@ -6,73 +6,3 @@ import numpy as np from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp -class White(Kern): - """ - White noise kernel. - - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: - :type variance: float - """ - def __init__(self,input_dim,variance=1., name='white'): - super(White, self).__init__(input_dim, name) - self.input_dim = input_dim - self.variance = Param('variance', variance, Logexp()) - self.add_parameters(self.variance) - - def K(self, X, X2=None): - if X2 is None: - return np.eye(X.shape[0])*self.variance - else: - return np.zeros((X.shape[0], X2.shape[0])) - - def Kdiag(self,X): - ret = np.ones(X.shape[0]) - ret[:] = self.variance - return ret - - def update_gradients_full(self, dL_dK, X): - self.variance.gradient = np.trace(dL_dK) - - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - self.variance.gradient = np.trace(dL_dKmm) + np.sum(dL_dKdiag) - - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - raise NotImplementedError - - def gradients_X(self,dL_dK,X,X2): - return np.zeros_like(X) - - def psi0(self,Z,mu,S,target): - pass # target += self.variance - - def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): - pass # target += dL_dpsi0.sum() - - def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): - pass - - def psi1(self,Z,mu,S,target): - pass - - def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target): - pass - - def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): - pass - - def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): - pass - - def psi2(self,Z,mu,S,target): - pass - - def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): - pass - - def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): - pass - - def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): - pass