mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-15 06:52:39 +02:00
[GPU] GPU version of varDTC is ready
This commit is contained in:
commit
bbcba2553c
59 changed files with 2012 additions and 1186 deletions
|
|
@ -32,7 +32,8 @@ class ExactGaussianInference(object):
|
|||
return Y
|
||||
else:
|
||||
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
|
||||
raise NotImplementedError, 'TODO' #TODO
|
||||
print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
|
||||
return Y
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
#
|
||||
#Parts of this file were influenced by the Matlab GPML framework written by
|
||||
|
|
@ -91,7 +91,11 @@ class Laplace(object):
|
|||
iteration = 0
|
||||
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
|
||||
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
|
||||
if np.any(np.isnan(W)):
|
||||
raise ValueError('One or more element(s) of W is NaN')
|
||||
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
|
||||
if np.any(np.isnan(grad)):
|
||||
raise ValueError('One or more element(s) of grad is NaN')
|
||||
|
||||
W_f = W*f
|
||||
|
||||
|
|
@ -141,25 +145,30 @@ class Laplace(object):
|
|||
"""
|
||||
#At this point get the hessian matrix (or vector as W is diagonal)
|
||||
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
|
||||
if np.any(np.isnan(W)):
|
||||
raise ValueError('One or more element(s) of W is NaN')
|
||||
|
||||
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
|
||||
#compute vital matrices
|
||||
C = np.dot(LiW12, K)
|
||||
Ki_W_i = K - C.T.dot(C) #Could this be wrong?
|
||||
Ki_W_i = K - C.T.dot(C)
|
||||
|
||||
#compute the log marginal
|
||||
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
|
||||
|
||||
#Compute vival matrices for derivatives
|
||||
# Compute matrices for derivatives
|
||||
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
|
||||
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9.
|
||||
if np.any(np.isnan(dW_df)):
|
||||
raise ValueError('One or more element(s) of dW_df is NaN')
|
||||
|
||||
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) # s2 in R&W p126 line 9.
|
||||
#BiK, _ = dpotrs(L, K, lower=1)
|
||||
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
|
||||
I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i)
|
||||
|
||||
####################
|
||||
#compute dL_dK#
|
||||
# compute dL_dK #
|
||||
####################
|
||||
if kern.size > 0 and not kern.is_fixed:
|
||||
#Explicit
|
||||
|
|
@ -202,12 +211,12 @@ class Laplace(object):
|
|||
def _compute_B_statistics(self, K, W, log_concave):
|
||||
"""
|
||||
Rasmussen suggests the use of a numerically stable positive definite matrix B
|
||||
Which has a positive diagonal element and can be easyily inverted
|
||||
Which has a positive diagonal elements and can be easily inverted
|
||||
|
||||
:param K: Prior Covariance matrix evaluated at locations X
|
||||
:type K: NxN matrix
|
||||
:param W: Negative hessian at a point (diagonal matrix)
|
||||
:type W: Vector of diagonal values of hessian (1xN)
|
||||
:type W: Vector of diagonal values of Hessian (1xN)
|
||||
:returns: (W12BiW12, L_B, Li_W12)
|
||||
"""
|
||||
if not log_concave:
|
||||
|
|
@ -218,7 +227,8 @@ class Laplace(object):
|
|||
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||
# To cause the posterior to become less certain than the prior and likelihood,
|
||||
# This is a property only held by non-log-concave likelihoods
|
||||
|
||||
if np.any(np.isnan(W)):
|
||||
raise ValueError('One or more element(s) of W is NaN')
|
||||
#W is diagonal so its sqrt is just the sqrt of the diagonal elements
|
||||
W_12 = np.sqrt(W)
|
||||
B = np.eye(K.shape[0]) + W_12*K*W_12.T
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ class VarDTC(object):
|
|||
def __init__(self, limit=1):
|
||||
#self._YYTfactor_cache = caching.cache()
|
||||
from ...util.caching import Cacher
|
||||
self.limit = limit
|
||||
self.get_trYYT = Cacher(self._get_trYYT, limit)
|
||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
|
||||
|
||||
|
|
@ -33,6 +34,17 @@ class VarDTC(object):
|
|||
def _get_trYYT(self, Y):
|
||||
return param_to_array(np.sum(np.square(Y)))
|
||||
|
||||
def __getstate__(self):
|
||||
# has to be overridden, as Cacher objects cannot be pickled.
|
||||
return self.limit
|
||||
|
||||
def __setstate__(self, state):
|
||||
# has to be overridden, as Cacher objects cannot be pickled.
|
||||
self.limit = state
|
||||
from ...util.caching import Cacher
|
||||
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
|
||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
|
||||
|
||||
def _get_YYTfactor(self, Y):
|
||||
"""
|
||||
find a matrix L which satisfies LLT = YYT.
|
||||
|
|
@ -126,7 +138,7 @@ class VarDTC(object):
|
|||
delit += output_dim * np.eye(num_inducing)
|
||||
# Compute dL_dKmm
|
||||
dL_dKmm = backsub_both_sides(Lm, delit)
|
||||
|
||||
|
||||
# derivatives of L w.r.t. psi
|
||||
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
|
||||
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
|
||||
|
|
@ -179,6 +191,7 @@ class VarDTC(object):
|
|||
return post, log_marginal, grad_dict
|
||||
|
||||
class VarDTCMissingData(object):
|
||||
const_jitter = 1e-6
|
||||
def __init__(self, limit=1):
|
||||
from ...util.caching import Cacher
|
||||
self._Y = Cacher(self._subarray_computations, limit)
|
||||
|
|
@ -250,7 +263,7 @@ class VarDTCMissingData(object):
|
|||
|
||||
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
|
||||
if het_noise: beta = beta_all[ind]
|
||||
else: beta = beta_all[0]
|
||||
else: beta = beta_all
|
||||
|
||||
VVT_factor = (beta*y)
|
||||
VVT_factor_all[v, ind].flat = VVT_factor.flat
|
||||
|
|
@ -311,7 +324,7 @@ class VarDTCMissingData(object):
|
|||
het_noise, uncertain_inputs, LB,
|
||||
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
||||
psi0, psi1, beta,
|
||||
data_fit, num_data, output_dim, trYYT)
|
||||
data_fit, num_data, output_dim, trYYT, Y)
|
||||
|
||||
if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf
|
||||
else:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue