speed tuning for mlp kernel and gauss qudrature for psi-statistics

This commit is contained in:
Zhenwen Dai 2015-09-07 14:04:57 +01:00
parent 276330d1d1
commit e6b1482d21
4 changed files with 13 additions and 2 deletions

View file

@ -88,6 +88,8 @@ class Kern(Parameterized):
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2] return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
def gradients_X(self, dL_dK, X, X2): def gradients_X(self, dL_dK, X, X2):
raise NotImplementedError raise NotImplementedError
def gradients_X_X2(self, dL_dK, X, X2):
return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X)
def gradients_XX(self, dL_dK, X, X2): def gradients_XX(self, dL_dK, X, X2):
raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel") raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_XX_diag(self, dL_dKdiag, X): def gradients_XX_diag(self, dL_dKdiag, X):

View file

@ -19,6 +19,7 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full) put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag) put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
put_clean(dct, 'gradients_X', _slice_gradients_X) put_clean(dct, 'gradients_X', _slice_gradients_X)
put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
put_clean(dct, 'gradients_XX', _slice_gradients_XX) put_clean(dct, 'gradients_XX', _slice_gradients_XX)
put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag) put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)

View file

@ -5,6 +5,7 @@ from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
import numpy as np import numpy as np
from ...util.linalg import tdot
from ...util.caching import Cache_this from ...util.caching import Cache_this
four_over_tau = 2./np.pi four_over_tau = 2./np.pi
@ -40,6 +41,7 @@ class MLP(Kern):
self.link_parameters(self.variance, self.weight_variance, self.bias_variance) self.link_parameters(self.variance, self.weight_variance, self.bias_variance)
@Cache_this(limit=20, ignore_args=())
def K(self, X, X2=None): def K(self, X, X2=None):
if X2 is None: if X2 is None:
X_denom = np.sqrt(self._comp_prod(X)+1.) X_denom = np.sqrt(self._comp_prod(X)+1.)
@ -51,6 +53,7 @@ class MLP(Kern):
XTX = self._comp_prod(X,X2)/X_denom[:,None]/X2_denom[None,:] XTX = self._comp_prod(X,X2)/X_denom[:,None]/X2_denom[None,:]
return self.variance*four_over_tau*np.arcsin(XTX) return self.variance*four_over_tau*np.arcsin(XTX)
@Cache_this(limit=20, ignore_args=())
def Kdiag(self, X): def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix for X.""" """Compute the diagonal of the covariance matrix for X."""
X_prod = self._comp_prod(X) X_prod = self._comp_prod(X)
@ -73,6 +76,10 @@ class MLP(Kern):
"""Derivative of the covariance matrix with respect to X""" """Derivative of the covariance matrix with respect to X"""
return self._comp_grads(dL_dK, X, X2)[3] return self._comp_grads(dL_dK, X, X2)[3]
def gradients_X_X2(self, dL_dK, X, X2):
"""Derivative of the covariance matrix with respect to X"""
return self._comp_grads(dL_dK, X, X2)[3:]
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
"""Gradient of diagonal of covariance with respect to X""" """Gradient of diagonal of covariance with respect to X"""
return self._comp_grads_diag(dL_dKdiag, X)[3] return self._comp_grads_diag(dL_dKdiag, X)[3]

View file

@ -80,8 +80,9 @@ class PSICOMP_GH(PSICOMP):
dL_dkfu = (dL_dpsi1+ 2.*Kfu.dot(dL_dpsi2))*self.weights[i] dL_dkfu = (dL_dpsi1+ 2.*Kfu.dot(dL_dpsi2))*self.weights[i]
kern.update_gradients_full(dL_dkfu, X, Z) kern.update_gradients_full(dL_dkfu, X, Z)
dtheta += kern.gradient dtheta += kern.gradient
dX += kern.gradients_X(dL_dkfu, X, Z) dX_i, dZ_i = kern.gradients_X_X2(dL_dkfu, X, Z)
dZ += kern.gradients_X(dL_dkfu.T, Z, X) dX += dX_i
dZ += dZ_i
dmu += dX dmu += dX
dS += dX*self.locs[i]/(2.*S_sq) dS += dX*self.locs[i]/(2.*S_sq)
kern.gradient[:] = dtheta_old kern.gradient[:] = dtheta_old