mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-03 08:42:39 +02:00
corrected caching for psi derivatives
This commit is contained in:
parent
3818aa3745
commit
50b9e4dc82
4 changed files with 29 additions and 34 deletions
|
|
@ -58,12 +58,7 @@ class Kern(Parameterized):
|
||||||
|
|
||||||
self._sliced_X = 0
|
self._sliced_X = 0
|
||||||
self.useGPU = self._support_GPU and useGPU
|
self.useGPU = self._support_GPU and useGPU
|
||||||
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool)
|
self._return_psi2_n_flag = ObsAr(np.zeros(1).astype(bool))
|
||||||
|
|
||||||
#FIXME: temporary solution
|
|
||||||
from ...core.parameterization.lists_and_dicts import ObserverList
|
|
||||||
self._return_psi2_n_flag.observers = ObserverList()
|
|
||||||
self._return_psi2_n_flag._update_on = True
|
|
||||||
|
|
||||||
from .psi_comp import PSICOMP_GH
|
from .psi_comp import PSICOMP_GH
|
||||||
self.psicomp = PSICOMP_GH()
|
self.psicomp = PSICOMP_GH()
|
||||||
|
|
|
||||||
|
|
@ -68,6 +68,7 @@ def __psi2computations(variance, lengthscale, Z, mu, S):
|
||||||
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
|
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
|
||||||
return _psi2
|
return _psi2
|
||||||
|
|
||||||
|
@profile
|
||||||
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
|
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
|
||||||
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||||
ARD = (len(lengthscale)!=1)
|
ARD = (len(lengthscale)!=1)
|
||||||
|
|
@ -121,6 +122,7 @@ def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None, Lpsi1=No
|
||||||
|
|
||||||
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
||||||
|
|
||||||
|
@profile
|
||||||
def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None, Lpsi2=None):
|
def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None, Lpsi2=None):
|
||||||
"""
|
"""
|
||||||
Z - MxQ
|
Z - MxQ
|
||||||
|
|
@ -156,8 +158,14 @@ def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None, Lpsi2=No
|
||||||
_dL_dvar = Lpsi2sum.sum()*2/variance
|
_dL_dvar = Lpsi2sum.sum()*2/variance
|
||||||
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
|
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
|
||||||
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
|
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
|
||||||
_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \
|
_dL_dZ1 = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2
|
||||||
2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
|
_dL_dZ2 = np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2
|
||||||
|
_dL_dZ3 = 2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom)
|
||||||
|
_dL_dZ4 = - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z)
|
||||||
|
_dL_dZ5 = - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
|
||||||
|
_dL_dZ = _dL_dZ1 + _dL_dZ2 + _dL_dZ3 + _dL_dZ4 + _dL_dZ5
|
||||||
|
#_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \
|
||||||
|
#2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
|
||||||
_dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))-
|
_dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))-
|
||||||
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
|
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_miniba
|
||||||
import logging
|
import logging
|
||||||
from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch
|
from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch
|
||||||
from GPy.core.parameterization.param import Param
|
from GPy.core.parameterization.param import Param
|
||||||
|
from GPy.core.parameterization.observable_array import ObsAr
|
||||||
|
|
||||||
class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||||
"""
|
"""
|
||||||
|
|
@ -134,17 +135,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||||
full_values['Xgrad'] = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z)
|
full_values['Xgrad'] = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z)
|
||||||
full_values['Xgrad'] += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X)
|
full_values['Xgrad'] += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X)
|
||||||
|
|
||||||
#kl_fctr = self.kl_factr
|
|
||||||
#if self.has_uncertain_inputs():
|
|
||||||
#self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
|
|
||||||
|
|
||||||
# Subsetting Variational Posterior objects, makes the gradients
|
|
||||||
# empty. We need them to be 0 though:
|
|
||||||
#self.X.mean.gradient[:] = 0
|
|
||||||
#self.X.variance.gradient[:] = 0
|
|
||||||
|
|
||||||
#self.variational_prior.update_gradients_KL(self.X)
|
|
||||||
|
|
||||||
if self.has_uncertain_inputs():
|
if self.has_uncertain_inputs():
|
||||||
self.X.mean.gradient = full_values['meangrad']
|
self.X.mean.gradient = full_values['meangrad']
|
||||||
self.X.variance.gradient = full_values['vargrad']
|
self.X.variance.gradient = full_values['vargrad']
|
||||||
|
|
@ -155,13 +145,15 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||||
full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values()
|
full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values()
|
||||||
full_values['meangrad'] = np.zeros((self.X.shape[0], self.X.shape[1]))
|
full_values['meangrad'] = np.zeros((self.X.shape[0], self.X.shape[1]))
|
||||||
full_values['vargrad'] = np.zeros((self.X.shape[0], self.X.shape[1]))
|
full_values['vargrad'] = np.zeros((self.X.shape[0], self.X.shape[1]))
|
||||||
full_values['dL_dpsi0'] = np.zeros(self.X.shape[0])
|
|
||||||
full_values['dL_dpsi1'] = np.zeros((self.X.shape[0], self.Z.shape[0]))
|
|
||||||
full_values['dL_dpsi2'] = np.zeros((self.Z.shape[0], self.Z.shape[0]))
|
|
||||||
|
|
||||||
full_values['Lpsi0'] = np.zeros(self.X.shape[0])
|
#FIXME Hack
|
||||||
full_values['Lpsi1'] = np.zeros((self.X.shape[0], self.Z.shape[0]))
|
full_values['dL_dpsi0'] = ObsAr(np.zeros(self.X.shape[0]))
|
||||||
full_values['Lpsi2'] = np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0]))
|
full_values['dL_dpsi1'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0])))
|
||||||
|
full_values['dL_dpsi2'] = ObsAr(np.zeros((self.Z.shape[0], self.Z.shape[0])))
|
||||||
|
|
||||||
|
full_values['Lpsi0'] = ObsAr(np.zeros(self.X.shape[0]))
|
||||||
|
full_values['Lpsi1'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0])))
|
||||||
|
full_values['Lpsi2'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0])))
|
||||||
return full_values
|
return full_values
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
|
|
|
||||||
|
|
@ -147,7 +147,11 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
if np.isscalar(current_values[key]):
|
if np.isscalar(current_values[key]):
|
||||||
full_values[key] += current_values[key]
|
full_values[key] += current_values[key]
|
||||||
else:
|
else:
|
||||||
full_values[key][index] += current_values[key]
|
from ..core.parameterization.observable_array import ObsAr
|
||||||
|
if isinstance(full_values[key], ObsAr):
|
||||||
|
full_values[key].values[index] += current_values[key]
|
||||||
|
else:
|
||||||
|
full_values[key][index] += current_values[key]
|
||||||
else:
|
else:
|
||||||
full_values[key] = current_values[key]
|
full_values[key] = current_values[key]
|
||||||
|
|
||||||
|
|
@ -178,11 +182,6 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
dL_dpsi2=full_values['dL_dpsi2'],
|
dL_dpsi2=full_values['dL_dpsi2'],
|
||||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2,
|
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2,
|
||||||
Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2'])
|
Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2'])
|
||||||
#self.kern.update_gradients_expectations(variational_posterior=self.X,
|
|
||||||
#Z=self.Z,
|
|
||||||
#dL_dpsi0=full_values['dL_dpsi0'],
|
|
||||||
#dL_dpsi1=full_values['dL_dpsi1'],
|
|
||||||
#dL_dpsi2=full_values['dL_dpsi2'])
|
|
||||||
full_values['kerngrad'] += self.kern.gradient
|
full_values['kerngrad'] += self.kern.gradient
|
||||||
|
|
||||||
#gradients wrt Z
|
#gradients wrt Z
|
||||||
|
|
@ -251,9 +250,10 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
#Compute the psi statistics for N once, but don't sum out N in psi2
|
#Compute the psi statistics for N once, but don't sum out N in psi2
|
||||||
if self.has_uncertain_inputs():
|
if self.has_uncertain_inputs():
|
||||||
self.kern.return_psi2_n = True
|
self.kern.return_psi2_n = True
|
||||||
psi0 = self.kern.psi0(self.Z, self.X)
|
from ..core.parameterization.observable_array import ObsAr
|
||||||
psi1 = self.kern.psi1(self.Z, self.X)
|
psi0 = ObsAr(self.kern.psi0(self.Z, self.X))
|
||||||
psi2 = self.kern.psi2(self.Z, self.X)
|
psi1 = ObsAr(self.kern.psi1(self.Z, self.X))
|
||||||
|
psi2 = ObsAr(self.kern.psi2(self.Z, self.X))
|
||||||
else:
|
else:
|
||||||
psi0 = self.kern.Kdiag(self.X)
|
psi0 = self.kern.Kdiag(self.X)
|
||||||
psi1 = self.kern.K(self.X, self.Z)
|
psi1 = self.kern.K(self.X, self.Z)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue