corrected caching for psi derivatives

This commit is contained in:
Alan Saul 2015-09-01 19:17:56 +03:00
parent 3818aa3745
commit 50b9e4dc82
4 changed files with 29 additions and 34 deletions

View file

@ -58,12 +58,7 @@ class Kern(Parameterized):
self._sliced_X = 0 self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU self.useGPU = self._support_GPU and useGPU
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool) self._return_psi2_n_flag = ObsAr(np.zeros(1).astype(bool))
#FIXME: temporary solution
from ...core.parameterization.lists_and_dicts import ObserverList
self._return_psi2_n_flag.observers = ObserverList()
self._return_psi2_n_flag._update_on = True
from .psi_comp import PSICOMP_GH from .psi_comp import PSICOMP_GH
self.psicomp = PSICOMP_GH() self.psicomp = PSICOMP_GH()

View file

@ -68,6 +68,7 @@ def __psi2computations(variance, lengthscale, Z, mu, S):
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2) _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
return _psi2 return _psi2
@profile
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior, def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior,
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None): psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)
@ -121,6 +122,7 @@ def __psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, psi1=None, Lpsi1=No
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
@profile
def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None, Lpsi2=None): def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None, Lpsi2=None):
""" """
Z - MxQ Z - MxQ
@ -156,8 +158,14 @@ def __psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, psi2=None, Lpsi2=No
_dL_dvar = Lpsi2sum.sum()*2/variance _dL_dvar = Lpsi2sum.sum()*2/variance
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat) _dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None] _dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \ _dL_dZ1 = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2
2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom) _dL_dZ2 = np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2
_dL_dZ3 = 2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom)
_dL_dZ4 = - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z)
_dL_dZ5 = - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
_dL_dZ = _dL_dZ1 + _dL_dZ2 + _dL_dZ3 + _dL_dZ4 + _dL_dZ5
#_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \
#2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
_dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))- _dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))-
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0) (2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)

View file

@ -9,6 +9,7 @@ from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_miniba
import logging import logging
from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch
from GPy.core.parameterization.param import Param from GPy.core.parameterization.param import Param
from GPy.core.parameterization.observable_array import ObsAr
class BayesianGPLVMMiniBatch(SparseGPMiniBatch): class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
""" """
@ -134,17 +135,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
full_values['Xgrad'] = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z) full_values['Xgrad'] = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z)
full_values['Xgrad'] += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X) full_values['Xgrad'] += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X)
#kl_fctr = self.kl_factr
#if self.has_uncertain_inputs():
#self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
# Subsetting Variational Posterior objects, makes the gradients
# empty. We need them to be 0 though:
#self.X.mean.gradient[:] = 0
#self.X.variance.gradient[:] = 0
#self.variational_prior.update_gradients_KL(self.X)
if self.has_uncertain_inputs(): if self.has_uncertain_inputs():
self.X.mean.gradient = full_values['meangrad'] self.X.mean.gradient = full_values['meangrad']
self.X.variance.gradient = full_values['vargrad'] self.X.variance.gradient = full_values['vargrad']
@ -155,13 +145,15 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values() full_values = super(BayesianGPLVMMiniBatch, self)._outer_init_full_values()
full_values['meangrad'] = np.zeros((self.X.shape[0], self.X.shape[1])) full_values['meangrad'] = np.zeros((self.X.shape[0], self.X.shape[1]))
full_values['vargrad'] = np.zeros((self.X.shape[0], self.X.shape[1])) full_values['vargrad'] = np.zeros((self.X.shape[0], self.X.shape[1]))
full_values['dL_dpsi0'] = np.zeros(self.X.shape[0])
full_values['dL_dpsi1'] = np.zeros((self.X.shape[0], self.Z.shape[0]))
full_values['dL_dpsi2'] = np.zeros((self.Z.shape[0], self.Z.shape[0]))
full_values['Lpsi0'] = np.zeros(self.X.shape[0]) #FIXME Hack
full_values['Lpsi1'] = np.zeros((self.X.shape[0], self.Z.shape[0])) full_values['dL_dpsi0'] = ObsAr(np.zeros(self.X.shape[0]))
full_values['Lpsi2'] = np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0])) full_values['dL_dpsi1'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0])))
full_values['dL_dpsi2'] = ObsAr(np.zeros((self.Z.shape[0], self.Z.shape[0])))
full_values['Lpsi0'] = ObsAr(np.zeros(self.X.shape[0]))
full_values['Lpsi1'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0])))
full_values['Lpsi2'] = ObsAr(np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0])))
return full_values return full_values
def parameters_changed(self): def parameters_changed(self):

View file

@ -146,6 +146,10 @@ class SparseGPMiniBatch(SparseGP):
if key in full_values: if key in full_values:
if np.isscalar(current_values[key]): if np.isscalar(current_values[key]):
full_values[key] += current_values[key] full_values[key] += current_values[key]
else:
from ..core.parameterization.observable_array import ObsAr
if isinstance(full_values[key], ObsAr):
full_values[key].values[index] += current_values[key]
else: else:
full_values[key][index] += current_values[key] full_values[key][index] += current_values[key]
else: else:
@ -178,11 +182,6 @@ class SparseGPMiniBatch(SparseGP):
dL_dpsi2=full_values['dL_dpsi2'], dL_dpsi2=full_values['dL_dpsi2'],
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2, psi0=self.psi0, psi1=self.psi1, psi2=self.psi2,
Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2']) Lpsi0=full_values['Lpsi0'], Lpsi1=full_values['Lpsi1'], Lpsi2=full_values['Lpsi2'])
#self.kern.update_gradients_expectations(variational_posterior=self.X,
#Z=self.Z,
#dL_dpsi0=full_values['dL_dpsi0'],
#dL_dpsi1=full_values['dL_dpsi1'],
#dL_dpsi2=full_values['dL_dpsi2'])
full_values['kerngrad'] += self.kern.gradient full_values['kerngrad'] += self.kern.gradient
#gradients wrt Z #gradients wrt Z
@ -251,9 +250,10 @@ class SparseGPMiniBatch(SparseGP):
#Compute the psi statistics for N once, but don't sum out N in psi2 #Compute the psi statistics for N once, but don't sum out N in psi2
if self.has_uncertain_inputs(): if self.has_uncertain_inputs():
self.kern.return_psi2_n = True self.kern.return_psi2_n = True
psi0 = self.kern.psi0(self.Z, self.X) from ..core.parameterization.observable_array import ObsAr
psi1 = self.kern.psi1(self.Z, self.X) psi0 = ObsAr(self.kern.psi0(self.Z, self.X))
psi2 = self.kern.psi2(self.Z, self.X) psi1 = ObsAr(self.kern.psi1(self.Z, self.X))
psi2 = ObsAr(self.kern.psi2(self.Z, self.X))
else: else:
psi0 = self.kern.Kdiag(self.X) psi0 = self.kern.Kdiag(self.X)
psi1 = self.kern.K(self.X, self.Z) psi1 = self.kern.K(self.X, self.Z)