prod now seems to work for sparse

This commit is contained in:
James Hensman 2014-02-19 22:23:07 +00:00
parent 92d71384b7
commit de51ad638a
6 changed files with 34 additions and 41 deletions

View file

@ -68,22 +68,21 @@ class SparseGP(GP):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
self._update_gradients_Z(add=False)
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False):
"""
Make a prediction for the latent function values
"""
if X_variance_new is None:
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
Kx = self.kern.K(self.Z, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx)
else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0)
else:
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new)
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
raise NotImplementedError, "TODO"

View file

@ -46,7 +46,7 @@ class Add(Kern):
[p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, i_s)]
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]

View file

@ -43,16 +43,16 @@ class Linear(Kern):
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
else:
variances = np.ones(self.input_dim)
self.variances = Param('variances', variances, Logexp())
self.variances.gradient = np.zeros(self.variances.shape)
#TODO: remove?self.variances.gradient = np.zeros(self.variances.shape)
self.add_parameter(self.variances)
self.variances.add_observer(self, self.update_variance)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2 = np.empty(shape=(2, 1))
def update_variance(self, v):
self.variances2 = np.square(self.variances)
@ -62,7 +62,7 @@ class Linear(Kern):
def update_gradients_full(self, dL_dK, X):
self.variances.gradient[:] = 0
self._param_grad_helper(dL_dK, X, None, self.variances.gradient)
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
@ -71,7 +71,7 @@ class Linear(Kern):
self.variances.gradient = tmp.sum()
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self._psi_computations(Z, mu, S)
# psi0:
@ -87,7 +87,7 @@ class Linear(Kern):
#from Kmm
self._K_computations(Z, None)
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
def K(self, X, X2, target):
if self.ARD:
XX = X * np.sqrt(self.variances)
@ -224,7 +224,7 @@ class Linear(Kern):
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
@ -281,7 +281,7 @@ class Linear(Kern):
self._X2 = None
else:
self._X2 = X2.copy()
self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2

View file

@ -36,38 +36,33 @@ class Prod(Kern):
self._params = None
def K(self, X, X2=None):
self._K_computations(X,X2)
self._K_computations(X, X2)
return self._K1 * self._K2
def Kdiag(self, X):
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
def update_gradients_full(self, dL_dK, X):
self._K_computations(X, None)
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X."""
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] )
self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] )
def update_gradients_sparse(self):
pass
#wtf goes here??
#def dKdiag_dtheta(self,dL_dKdiag,X,target):
#K1 = np.zeros(X.shape[0])
#K2 = np.zeros(X.shape[0])
#self.k1.Kdiag(X[:,self.slice1],K1)
#self.k2.Kdiag(X[:,self.slice2],K2)
#self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
#self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
def gradients_X(self,dL_dK,X,X2):
def gradients_X(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to X."""
self._K_computations(X,X2)
self._K_computations(X, X2)
target = np.zeros(X.shape)
if X2 is None:
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None)
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None)
else:
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1])
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2])
return target
def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0])
@ -78,7 +73,7 @@ class Prod(Kern):
self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
def _K_computations(self,X,X2):
def _K_computations(self, X, X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._params == self._get_params().copy()

View file

@ -154,7 +154,7 @@ class RBF(Kern):
else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
def gradients_X(self, dL_dK, X, X2):
def gradients_X(self, dL_dK, X, X2=None):
#if self._X is None or X.base is not self._X.base or X2 is not None:
self._K_computations(X, X2)
if X2 is None:

View file

@ -20,9 +20,8 @@ class White(Kern):
self.input_dim = input_dim
self.variance = Param('variance', variance, Logexp())
self.add_parameters(self.variance)
self._psi1 = 0 # TODO: more elegance here
def K(self,X,X2):
def K(self, X, X2=None):
if X2 is None:
return np.eye(X.shape[0])*self.variance
else: