mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-15 06:52:39 +02:00
prod now seems to work for sparse
This commit is contained in:
parent
92d71384b7
commit
de51ad638a
6 changed files with 34 additions and 41 deletions
|
|
@ -68,22 +68,21 @@ class SparseGP(GP):
|
|||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
|
||||
self._update_gradients_Z(add=False)
|
||||
|
||||
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
|
||||
def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False):
|
||||
"""
|
||||
Make a prediction for the latent function values
|
||||
"""
|
||||
if X_variance_new is None:
|
||||
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
|
||||
Kx = self.kern.K(self.Z, Xnew)
|
||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||
if full_cov:
|
||||
Kxx = self.kern.K(Xnew, which_parts=which_parts)
|
||||
var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting
|
||||
Kxx = self.kern.K(Xnew)
|
||||
var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx)
|
||||
else:
|
||||
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
|
||||
Kxx = self.kern.Kdiag(Xnew)
|
||||
var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0)
|
||||
else:
|
||||
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
|
||||
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
|
||||
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new)
|
||||
mu = np.dot(Kx, self.Cpsi1V)
|
||||
if full_cov:
|
||||
raise NotImplementedError, "TODO"
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ class Add(Kern):
|
|||
[p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, i_s)]
|
||||
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
|
||||
|
|
|
|||
|
|
@ -43,16 +43,16 @@ class Linear(Kern):
|
|||
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
|
||||
else:
|
||||
variances = np.ones(self.input_dim)
|
||||
|
||||
|
||||
self.variances = Param('variances', variances, Logexp())
|
||||
self.variances.gradient = np.zeros(self.variances.shape)
|
||||
#TODO: remove?self.variances.gradient = np.zeros(self.variances.shape)
|
||||
self.add_parameter(self.variances)
|
||||
self.variances.add_observer(self, self.update_variance)
|
||||
|
||||
# initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||
self._X, self._X2 = np.empty(shape=(2, 1))
|
||||
|
||||
|
||||
def update_variance(self, v):
|
||||
self.variances2 = np.square(self.variances)
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ class Linear(Kern):
|
|||
def update_gradients_full(self, dL_dK, X):
|
||||
self.variances.gradient[:] = 0
|
||||
self._param_grad_helper(dL_dK, X, None, self.variances.gradient)
|
||||
|
||||
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
tmp = dL_dKdiag[:, None] * X ** 2
|
||||
if self.ARD:
|
||||
|
|
@ -71,7 +71,7 @@ class Linear(Kern):
|
|||
self.variances.gradient = tmp.sum()
|
||||
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
|
||||
self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient)
|
||||
|
||||
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
self._psi_computations(Z, mu, S)
|
||||
# psi0:
|
||||
|
|
@ -87,7 +87,7 @@ class Linear(Kern):
|
|||
#from Kmm
|
||||
self._K_computations(Z, None)
|
||||
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
|
||||
|
||||
|
||||
def K(self, X, X2, target):
|
||||
if self.ARD:
|
||||
XX = X * np.sqrt(self.variances)
|
||||
|
|
@ -224,7 +224,7 @@ class Linear(Kern):
|
|||
weave_options = {'headers' : ['<omp.h>'],
|
||||
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
||||
'extra_link_args' : ['-lgomp']}
|
||||
|
||||
|
||||
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
|
||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
|
||||
|
|
@ -281,7 +281,7 @@ class Linear(Kern):
|
|||
self._X2 = None
|
||||
else:
|
||||
self._X2 = X2.copy()
|
||||
self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
|
||||
self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
|
||||
|
||||
def _psi_computations(self, Z, mu, S):
|
||||
# here are the "statistics" for psi1 and psi2
|
||||
|
|
|
|||
|
|
@ -36,38 +36,33 @@ class Prod(Kern):
|
|||
self._params = None
|
||||
|
||||
def K(self, X, X2=None):
|
||||
self._K_computations(X,X2)
|
||||
self._K_computations(X, X2)
|
||||
return self._K1 * self._K2
|
||||
|
||||
def Kdiag(self, X):
|
||||
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
self._K_computations(X, None)
|
||||
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
|
||||
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
|
||||
|
||||
def Kdiag(self, X):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] )
|
||||
self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] )
|
||||
|
||||
def update_gradients_sparse(self):
|
||||
pass
|
||||
#wtf goes here??
|
||||
#def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
#K1 = np.zeros(X.shape[0])
|
||||
#K2 = np.zeros(X.shape[0])
|
||||
#self.k1.Kdiag(X[:,self.slice1],K1)
|
||||
#self.k2.Kdiag(X[:,self.slice2],K2)
|
||||
#self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
|
||||
#self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
|
||||
|
||||
def gradients_X(self,dL_dK,X,X2):
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
self._K_computations(X,X2)
|
||||
self._K_computations(X, X2)
|
||||
target = np.zeros(X.shape)
|
||||
if X2 is None:
|
||||
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
|
||||
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
|
||||
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None)
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None)
|
||||
else:
|
||||
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
|
||||
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1])
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2])
|
||||
|
||||
return target
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
|
|
@ -78,7 +73,7 @@ class Prod(Kern):
|
|||
self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
def _K_computations(self, X, X2):
|
||||
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
||||
self._X = X.copy()
|
||||
self._params == self._get_params().copy()
|
||||
|
|
|
|||
|
|
@ -154,7 +154,7 @@ class RBF(Kern):
|
|||
else:
|
||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2):
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
||||
self._K_computations(X, X2)
|
||||
if X2 is None:
|
||||
|
|
|
|||
|
|
@ -20,9 +20,8 @@ class White(Kern):
|
|||
self.input_dim = input_dim
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
self.add_parameters(self.variance)
|
||||
self._psi1 = 0 # TODO: more elegance here
|
||||
|
||||
def K(self,X,X2):
|
||||
def K(self, X, X2=None):
|
||||
if X2 is None:
|
||||
return np.eye(X.shape[0])*self.variance
|
||||
else:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue