mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-21 14:05:14 +02:00
Merge branch 'params' of https://github.com/SheffieldML/GPy into params
This commit is contained in:
commit
3cca3c2463
44 changed files with 1056 additions and 511 deletions
|
|
@ -51,8 +51,6 @@ class Coregionalize(Kern):
|
|||
assert kappa.shape==(self.output_dim, )
|
||||
self.kappa = Param('kappa', kappa, Logexp())
|
||||
self.add_parameters(self.W, self.kappa)
|
||||
self.parameters_changed()
|
||||
|
||||
|
||||
def parameters_changed(self):
|
||||
self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ class Kern(Parameterized):
|
|||
"""
|
||||
Returns the sensitivity for each dimension of this kernel.
|
||||
"""
|
||||
return np.zeros(self.input_dim)
|
||||
return self.kern.input_sensitivity()
|
||||
|
||||
def __add__(self, other):
|
||||
""" Overloading of the '+' operator. for more control, see self.add """
|
||||
|
|
@ -129,7 +129,7 @@ class Kern(Parameterized):
|
|||
"""
|
||||
return self.prod(other, tensor=True)
|
||||
|
||||
def prod(self, other, tensor=False):
|
||||
def prod(self, other, tensor=False, name=None):
|
||||
"""
|
||||
Multiply two kernels (either on the same space, or on the tensor
|
||||
product of the input space).
|
||||
|
|
@ -142,4 +142,4 @@ class Kern(Parameterized):
|
|||
"""
|
||||
assert isinstance(other, Kern), "only kernels can be added to kernels..."
|
||||
from prod import Prod
|
||||
return Prod(self, other, tensor)
|
||||
return Prod(self, other, tensor, name)
|
||||
|
|
|
|||
|
|
@ -6,10 +6,12 @@ import numpy as np
|
|||
from scipy import weave
|
||||
from kern import Kern
|
||||
from ...util.linalg import tdot
|
||||
from ...util.misc import fast_array_equal, param_to_array
|
||||
from ...util.misc import param_to_array
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
from ...util.caching import Cache_this
|
||||
from ...core.parameterization import variational
|
||||
from psi_comp import linear_psi_comp
|
||||
|
||||
class Linear(Kern):
|
||||
"""
|
||||
|
|
@ -104,49 +106,113 @@ class Linear(Kern):
|
|||
#---------------------------------------#
|
||||
|
||||
def psi0(self, Z, variational_posterior):
|
||||
return np.sum(self.variances * self._mu2S(variational_posterior), 1)
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
gamma = variational_posterior.binary_prob
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
|
||||
return np.einsum('q,nq,nq->n',self.variances,gamma,np.square(mu)+S)
|
||||
# return (self.variances*gamma*(np.square(mu)+S)).sum(axis=1)
|
||||
else:
|
||||
return np.sum(self.variances * self._mu2S(variational_posterior), 1)
|
||||
|
||||
def psi1(self, Z, variational_posterior):
|
||||
return self.K(variational_posterior.mean, Z) #the variance, it does nothing
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
gamma = variational_posterior.binary_prob
|
||||
mu = variational_posterior.mean
|
||||
return np.einsum('nq,q,mq,nq->nm',gamma,self.variances,Z,mu)
|
||||
# return (self.variances*gamma*mu).sum(axis=1)
|
||||
else:
|
||||
return self.K(variational_posterior.mean, Z) #the variance, it does nothing
|
||||
|
||||
@Cache_this(limit=1)
|
||||
def psi2(self, Z, variational_posterior):
|
||||
ZA = Z * self.variances
|
||||
ZAinner = self._ZAinner(variational_posterior, Z)
|
||||
return np.dot(ZAinner, ZA.T)
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
gamma = variational_posterior.binary_prob
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
mu2 = np.square(mu)
|
||||
variances2 = np.square(self.variances)
|
||||
tmp = np.einsum('nq,q,mq,nq->nm',gamma,self.variances,Z,mu)
|
||||
return np.einsum('nq,q,mq,oq,nq->nmo',gamma,variances2,Z,Z,mu2+S)+\
|
||||
np.einsum('nm,no->nmo',tmp,tmp) - np.einsum('nq,q,mq,oq,nq->nmo',np.square(gamma),variances2,Z,Z,mu2)
|
||||
else:
|
||||
ZA = Z * self.variances
|
||||
ZAinner = self._ZAinner(variational_posterior, Z)
|
||||
return np.dot(ZAinner, ZA.T)
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
#psi1
|
||||
self.update_gradients_full(dL_dpsi1, variational_posterior.mean, Z)
|
||||
# psi0:
|
||||
tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior)
|
||||
if self.ARD: self.variances.gradient += tmp.sum(0)
|
||||
else: self.variances.gradient += tmp.sum()
|
||||
#psi2
|
||||
if self.ARD:
|
||||
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * Z[None, None, :, :])
|
||||
self.variances.gradient += 2.*tmp.sum(0).sum(0).sum(0)
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
gamma = variational_posterior.binary_prob
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
mu2S = np.square(mu)+S
|
||||
|
||||
_dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
|
||||
grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\
|
||||
np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance)
|
||||
if self.ARD:
|
||||
self.variances.gradient = grad
|
||||
else:
|
||||
self.variances.gradient = grad.sum()
|
||||
else:
|
||||
self.variances.gradient += 2.*np.sum(dL_dpsi2 * self.psi2(Z, variational_posterior))/self.variances
|
||||
#psi1
|
||||
self.update_gradients_full(dL_dpsi1, variational_posterior.mean, Z)
|
||||
# psi0:
|
||||
tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior)
|
||||
if self.ARD: self.variances.gradient += tmp.sum(0)
|
||||
else: self.variances.gradient += tmp.sum()
|
||||
#psi2
|
||||
if self.ARD:
|
||||
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * Z[None, None, :, :])
|
||||
self.variances.gradient += 2.*tmp.sum(0).sum(0).sum(0)
|
||||
else:
|
||||
self.variances.gradient += 2.*np.sum(dL_dpsi2 * self.psi2(Z, variational_posterior))/self.variances
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
#psi1
|
||||
grad = self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean)
|
||||
#psi2
|
||||
self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad)
|
||||
return grad
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
gamma = variational_posterior.binary_prob
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
_, _, _, _, _dpsi2_dZ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
|
||||
|
||||
grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\
|
||||
np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ)
|
||||
|
||||
return grad
|
||||
else:
|
||||
#psi1
|
||||
grad = self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean)
|
||||
#psi2
|
||||
self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad)
|
||||
return grad
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape)
|
||||
# psi0
|
||||
grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances)
|
||||
grad_S += dL_dpsi0[:, None] * self.variances
|
||||
# psi1
|
||||
grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
|
||||
# psi2
|
||||
self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S)
|
||||
|
||||
return grad_mu, grad_S
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
gamma = variational_posterior.binary_prob
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
mu2S = np.square(mu)+S
|
||||
_, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
|
||||
|
||||
grad_gamma = np.einsum('n,q,nq->nq',dL_dpsi0,self.variances,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,self.variances,Z,mu) +\
|
||||
np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dgamma)
|
||||
grad_mu = np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*self.variances,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,self.variances,Z) +\
|
||||
np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dmu)
|
||||
grad_S = np.einsum('n,nq,q->nq',dL_dpsi0,gamma,self.variances) + np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dS)
|
||||
|
||||
return grad_mu, grad_S, grad_gamma
|
||||
else:
|
||||
grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape)
|
||||
# psi0
|
||||
grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances)
|
||||
grad_S += dL_dpsi0[:, None] * self.variances
|
||||
# psi1
|
||||
grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
|
||||
# psi2
|
||||
self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S)
|
||||
|
||||
return grad_mu, grad_S
|
||||
|
||||
#--------------------------------------------------#
|
||||
# Helpers for psi statistics #
|
||||
|
|
|
|||
|
|
@ -34,7 +34,6 @@ class Periodic(Kern):
|
|||
self.lengthscale = Param('lengthscale', np.float64(lengthscale), Logexp())
|
||||
self.period = Param('period', np.float64(period), Logexp())
|
||||
self.add_parameters(self.variance, self.lengthscale, self.period)
|
||||
self.parameters_changed()
|
||||
|
||||
def _cos(self, alpha, omega, phase):
|
||||
def f(x):
|
||||
|
|
|
|||
|
|
@ -15,14 +15,16 @@ class Prod(Kern):
|
|||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
def __init__(self, k1, k2, tensor=False):
|
||||
def __init__(self, k1, k2, tensor=False,name=None):
|
||||
if tensor:
|
||||
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
|
||||
name = k1.name + '_xx_' + k2.name if name is None else name
|
||||
super(Prod, self).__init__(k1.input_dim + k2.input_dim, name)
|
||||
self.slice1 = slice(0,k1.input_dim)
|
||||
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
|
||||
else:
|
||||
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
|
||||
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
|
||||
name = k1.name + '_x_' + k2.name if name is None else name
|
||||
super(Prod, self).__init__(k1.input_dim, name)
|
||||
self.slice1 = slice(0, self.input_dim)
|
||||
self.slice2 = slice(0, self.input_dim)
|
||||
self.k1 = k1
|
||||
|
|
@ -39,17 +41,17 @@ class Prod(Kern):
|
|||
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1])
|
||||
self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2])
|
||||
self.k1.update_gradients_full(dL_dK*self.k2.K(X[:,self.slice2]), X[:,self.slice1])
|
||||
self.k2.update_gradients_full(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2])
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
target = np.zeros(X.shape)
|
||||
if X2 is None:
|
||||
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1], None)
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2], None)
|
||||
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2.K(X[:,self.slice2]), X[:,self.slice1], None)
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2], None)
|
||||
else:
|
||||
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1])
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2])
|
||||
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2.K(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1])
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1.K(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2])
|
||||
return target
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
|
|
|
|||
51
GPy/kern/_src/psi_comp/linear_psi_comp.py
Normal file
51
GPy/kern/_src/psi_comp/linear_psi_comp.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
"""
|
||||
The package for the Psi statistics computation of the linear kernel for SSGPLVM
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from GPy.util.caching import Cache_this
|
||||
|
||||
#@Cache_this(limit=1)
|
||||
def _psi2computations(variance, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi1 and psi2
|
||||
# Produced intermediate results:
|
||||
# _psi2 NxMxM
|
||||
# _psi2_dvariance NxMxMxQ
|
||||
# _psi2_dZ NxMxQ
|
||||
# _psi2_dgamma NxMxMxQ
|
||||
# _psi2_dmu NxMxMxQ
|
||||
# _psi2_dS NxMxMxQ
|
||||
|
||||
mu2 = np.square(mu)
|
||||
gamma2 = np.square(gamma)
|
||||
variance2 = np.square(variance)
|
||||
mu2S = mu2+S # NxQ
|
||||
common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM
|
||||
|
||||
_dpsi2_dvariance = np.einsum('nq,q,mq,oq->nmoq',2.*(gamma*mu2S-gamma2*mu2),variance,Z,Z)+\
|
||||
np.einsum('nq,mq,nq,no->nmoq',gamma,Z,mu,common_sum)+\
|
||||
np.einsum('nq,oq,nq,nm->nmoq',gamma,Z,mu,common_sum)
|
||||
|
||||
_dpsi2_dgamma = np.einsum('q,mq,oq,nq->nmoq',variance2,Z,Z,(mu2S-2.*gamma*mu2))+\
|
||||
np.einsum('q,mq,nq,no->nmoq',variance,Z,mu,common_sum)+\
|
||||
np.einsum('q,oq,nq,nm->nmoq',variance,Z,mu,common_sum)
|
||||
|
||||
_dpsi2_dmu = np.einsum('q,mq,oq,nq,nq->nmoq',variance2,Z,Z,mu,2.*(gamma-gamma2))+\
|
||||
np.einsum('nq,q,mq,no->nmoq',gamma,variance,Z,common_sum)+\
|
||||
np.einsum('nq,q,oq,nm->nmoq',gamma,variance,Z,common_sum)
|
||||
|
||||
_dpsi2_dS = np.einsum('nq,q,mq,oq->nmoq',gamma,variance2,Z,Z)
|
||||
|
||||
_dpsi2_dZ = 2.*(np.einsum('nq,q,mq,nq->nmq',gamma,variance2,Z,mu2S)+np.einsum('nq,q,nq,nm->nmq',gamma,variance,mu,common_sum)
|
||||
-np.einsum('nq,q,mq,nq->nmq',gamma2,variance2,Z,mu2))
|
||||
|
||||
return _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ
|
||||
|
|
@ -6,22 +6,15 @@ The package for the psi statistics computation
|
|||
"""
|
||||
|
||||
import numpy as np
|
||||
from GPy.util.caching import Cache_this
|
||||
|
||||
@Cache_this(limit=1)
|
||||
def _Z_distances(Z):
|
||||
Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||
Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||
return Zhat, Zdist
|
||||
|
||||
# def _psi1computations(self, Z, vp):
|
||||
# mu, S = vp.mean, vp.variance
|
||||
# l2 = lengthscale **2
|
||||
# denom = S[:, None, :] / l2 + 1. # N,1,Q
|
||||
# dist = Z[None, :, :] - mu[:, None, :] # N,M,Q
|
||||
# dist_sq = np.square(dist) / l2 / denom # N,M,Q
|
||||
# exponent = -0.5 * np.sum(dist_sq + np.log(denom), -1)#N,M
|
||||
# psi1 = self.variance * np.exp(exponent) # N,M
|
||||
# return denom, dist, dist_sq, psi1
|
||||
|
||||
@Cache_this(limit=1)
|
||||
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
|
|
@ -49,7 +42,8 @@ def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
|
|||
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
|
||||
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ
|
||||
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
|
||||
_psi1_exponent = np.log(np.exp(_psi1_exponent1) + np.exp(_psi1_exponent2)) #NxMxQ
|
||||
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
|
||||
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
|
||||
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
|
||||
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
|
||||
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
|
||||
|
|
@ -64,6 +58,7 @@ def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
|
|||
|
||||
return _psi1, _dpsi1_dvariance, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _dpsi1_dZ, _dpsi1_dlengthscale
|
||||
|
||||
@Cache_this(limit=1)
|
||||
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
|
|
@ -95,7 +90,8 @@ def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
|
|||
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
|
||||
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
|
||||
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
|
||||
_psi2_exponent = np.log(np.exp(_psi2_exponent1) + np.exp(_psi2_exponent2))
|
||||
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
|
||||
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
||||
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
||||
_psi2_q = np.square(variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
|
||||
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
|
||||
|
|
@ -8,7 +8,7 @@ from ...util.misc import param_to_array
|
|||
from stationary import Stationary
|
||||
from GPy.util.caching import Cache_this
|
||||
from ...core.parameterization import variational
|
||||
from rbf_psi_comp import ssrbf_psi_comp
|
||||
from psi_comp import ssrbf_psi_comp
|
||||
|
||||
class RBF(Stationary):
|
||||
"""
|
||||
|
|
@ -62,13 +62,18 @@ class RBF(Stationary):
|
|||
|
||||
#from psi1
|
||||
self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
|
||||
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
|
||||
if self.ARD:
|
||||
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
|
||||
else:
|
||||
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
|
||||
|
||||
|
||||
#from psi2
|
||||
self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
|
||||
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
|
||||
return
|
||||
if self.ARD:
|
||||
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
|
||||
else:
|
||||
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
|
||||
|
||||
elif isinstance(variational_posterior, variational.NormalPosterior):
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import numpy as np
|
|||
from ...util.linalg import tdot
|
||||
from ...util.config import *
|
||||
from stationary import Stationary
|
||||
from rbf_psi_comp import ssrbf_psi_comp
|
||||
from psi_comp import ssrbf_psi_comp
|
||||
|
||||
class SSRBF(Stationary):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
# Check Matthew Rocklin's blog post.
|
||||
try:
|
||||
try:
|
||||
import sympy as sp
|
||||
sympy_available=True
|
||||
from sympy.utilities.lambdify import lambdify
|
||||
except ImportError:
|
||||
sympy_available=False
|
||||
exit()
|
||||
|
||||
import numpy as np
|
||||
from kern import Kern
|
||||
|
|
@ -36,7 +35,7 @@ class Sympykern(Kern):
|
|||
super(Sympykern, self).__init__(input_dim, name)
|
||||
|
||||
self._sp_k = k
|
||||
|
||||
|
||||
# pull the variable names out of the symbolic covariance function.
|
||||
sp_vars = [e for e in k.atoms() if e.is_Symbol]
|
||||
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
|
||||
|
|
@ -51,7 +50,7 @@ class Sympykern(Kern):
|
|||
self._sp_kdiag = k
|
||||
for x, z in zip(self._sp_x, self._sp_z):
|
||||
self._sp_kdiag = self._sp_kdiag.subs(z, x)
|
||||
|
||||
|
||||
# If it is a multi-output covariance, add an input for indexing the outputs.
|
||||
self._real_input_dim = x_dim
|
||||
# Check input dim is number of xs + 1 if output_dim is >1
|
||||
|
|
@ -73,7 +72,7 @@ class Sympykern(Kern):
|
|||
|
||||
# Extract names of shared parameters (those without a subscript)
|
||||
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
|
||||
|
||||
|
||||
self.num_split_params = len(self._sp_theta_i)
|
||||
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
|
||||
# Add split parameters to the model.
|
||||
|
|
@ -82,11 +81,11 @@ class Sympykern(Kern):
|
|||
setattr(self, theta, Param(theta, np.ones(self.output_dim), None))
|
||||
self.add_parameter(getattr(self, theta))
|
||||
|
||||
|
||||
|
||||
self.num_shared_params = len(self._sp_theta)
|
||||
for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j):
|
||||
self._sp_kdiag = self._sp_kdiag.subs(theta_j, theta_i)
|
||||
|
||||
|
||||
else:
|
||||
self.num_split_params = 0
|
||||
self._split_theta_names = []
|
||||
|
|
@ -107,10 +106,10 @@ class Sympykern(Kern):
|
|||
derivative_arguments = self._sp_x + self._sp_theta
|
||||
if self.output_dim > 1:
|
||||
derivative_arguments += self._sp_theta_i
|
||||
|
||||
|
||||
self.derivatives = {theta.name : sp.diff(self._sp_k,theta).simplify() for theta in derivative_arguments}
|
||||
self.diag_derivatives = {theta.name : sp.diff(self._sp_kdiag,theta).simplify() for theta in derivative_arguments}
|
||||
|
||||
|
||||
# This gives the parameters for the arg list.
|
||||
self.arg_list = self._sp_x + self._sp_z + self._sp_theta
|
||||
self.diag_arg_list = self._sp_x + self._sp_theta
|
||||
|
|
@ -125,9 +124,6 @@ class Sympykern(Kern):
|
|||
# generate the code for the covariance functions
|
||||
self._gen_code()
|
||||
|
||||
self.parameters_changed() # initializes caches
|
||||
|
||||
|
||||
def __add__(self,other):
|
||||
return spkern(self._sp_k+other._sp_k)
|
||||
|
||||
|
|
@ -141,7 +137,7 @@ class Sympykern(Kern):
|
|||
for key in self.derivatives.keys():
|
||||
setattr(self, '_Kdiag_diff_' + key, lambdify(self.diag_arg_list, self.diag_derivatives[key], 'numpy'))
|
||||
|
||||
def K(self,X,X2=None):
|
||||
def K(self,X,X2=None):
|
||||
self._K_computations(X, X2)
|
||||
return self._K_function(**self._arguments)
|
||||
|
||||
|
|
@ -149,11 +145,11 @@ class Sympykern(Kern):
|
|||
def Kdiag(self,X):
|
||||
self._K_computations(X)
|
||||
return self._Kdiag_function(**self._diag_arguments)
|
||||
|
||||
|
||||
def _param_grad_helper(self,partial,X,Z,target):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
||||
self._K_computations(X, X2)
|
||||
|
|
@ -172,7 +168,7 @@ class Sympykern(Kern):
|
|||
gf = getattr(self, '_Kdiag_diff_' + x.name)
|
||||
dX[:, i] = gf(**self._diag_arguments)*dL_dK
|
||||
return dX
|
||||
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
# Need to extract parameters to local variables first
|
||||
self._K_computations(X, X2)
|
||||
|
|
@ -197,7 +193,7 @@ class Sympykern(Kern):
|
|||
gradient += np.asarray([A[np.where(self._output_ind2==i)].T.sum()
|
||||
for i in np.arange(self.output_dim)])
|
||||
setattr(parameter, 'gradient', gradient)
|
||||
|
||||
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
self._K_computations(X)
|
||||
|
|
@ -213,7 +209,7 @@ class Sympykern(Kern):
|
|||
setattr(parameter, 'gradient',
|
||||
np.asarray([a[np.where(self._output_ind==i)].sum()
|
||||
for i in np.arange(self.output_dim)]))
|
||||
|
||||
|
||||
def _K_computations(self, X, X2=None):
|
||||
"""Set up argument lists for the derivatives."""
|
||||
# Could check if this needs doing or not, there could
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue