Scale Factor removed and moved V=Y*beta into likelihoods

This commit is contained in:
Max Zwiessele 2013-05-08 15:38:14 +01:00
parent 65ead17ff5
commit f39afb9ea5
3 changed files with 140 additions and 122 deletions

View file

@ -32,6 +32,7 @@ class EP(likelihood):
self.precision = np.ones(self.N)[:,None] self.precision = np.ones(self.N)[:,None]
self.Z = 0 self.Z = 0
self.YYT = None self.YYT = None
self.V = self.precision * self.Y
def restart(self): def restart(self):
self.tau_tilde = np.zeros(self.N) self.tau_tilde = np.zeros(self.N)
@ -41,6 +42,7 @@ class EP(likelihood):
self.precision = np.ones(self.N)[:,None] self.precision = np.ones(self.N)[:,None]
self.Z = 0 self.Z = 0
self.YYT = None self.YYT = None
self.V = self.precision * self.Y
def predictive_values(self,mu,var,full_cov): def predictive_values(self,mu,var,full_cov):
if full_cov: if full_cov:
@ -67,6 +69,7 @@ class EP(likelihood):
self.YYT = np.dot(self.Y,self.Y.T) self.YYT = np.dot(self.Y,self.Y.T)
self.covariance_matrix = np.diag(1./self.tau_tilde) self.covariance_matrix = np.diag(1./self.tau_tilde)
self.precision = self.tau_tilde[:,None] self.precision = self.tau_tilde[:,None]
self.V = self.precision * self.Y
def fit_full(self,K): def fit_full(self,K):
""" """

View file

@ -11,33 +11,34 @@ class Gaussian(likelihood):
:param normalize: whether to normalize the data before computing (predictions will be in original scales) :param normalize: whether to normalize the data before computing (predictions will be in original scales)
:type normalize: False|True :type normalize: False|True
""" """
def __init__(self,data,variance=1.,normalize=False): def __init__(self, data, variance=1., normalize=False):
self.is_heteroscedastic = False self.is_heteroscedastic = False
self.Nparams = 1 self.Nparams = 1
self.Z = 0. # a correction factor which accounts for the approximation made self.Z = 0. # a correction factor which accounts for the approximation made
N, self.D = data.shape N, self.D = data.shape
#normalization # normalization
if normalize: if normalize:
self._bias = data.mean(0)[None,:] self._bias = data.mean(0)[None, :]
self._scale = data.std(0)[None,:] self._scale = data.std(0)[None, :]
# Don't scale outputs which have zero variance to zero. # Don't scale outputs which have zero variance to zero.
self._scale[np.nonzero(self._scale==0.)] = 1.0e-3 self._scale[np.nonzero(self._scale == 0.)] = 1.0e-3
else: else:
self._bias = np.zeros((1,self.D)) self._bias = np.zeros((1, self.D))
self._scale = np.ones((1,self.D)) self._scale = np.ones((1, self.D))
self.set_data(data) self.set_data(data)
self._variance = np.asarray(variance) + 1.
self._set_params(np.asarray(variance)) self._set_params(np.asarray(variance))
def set_data(self,data): def set_data(self, data):
self.data = data self.data = data
self.N,D = data.shape self.N, D = data.shape
assert D == self.D assert D == self.D
self.Y = (self.data - self._bias)/self._scale self.Y = (self.data - self._bias) / self._scale
if D > self.N: if D > self.N:
self.YYT = np.dot(self.Y,self.Y.T) self.YYT = np.dot(self.Y, self.Y.T)
self.trYYT = np.trace(self.YYT) self.trYYT = np.trace(self.YYT)
else: else:
self.YYT = None self.YYT = None
@ -49,27 +50,30 @@ class Gaussian(likelihood):
def _get_param_names(self): def _get_param_names(self):
return ["noise_variance"] return ["noise_variance"]
def _set_params(self,x): def _set_params(self, x):
self._variance = float(x) x = float(x)
self.covariance_matrix = np.eye(self.N)*self._variance if self._variance != x:
self.precision = 1./self._variance self._variance = x
self.covariance_matrix = np.eye(self.N) * self._variance
self.precision = 1. / self._variance
self.V = (self.precision) * self.Y
def predictive_values(self,mu,var, full_cov): def predictive_values(self, mu, var, full_cov):
""" """
Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
""" """
mean = mu*self._scale + self._bias mean = mu * self._scale + self._bias
if full_cov: if full_cov:
if self.D >1: if self.D > 1:
raise NotImplementedError, "TODO" raise NotImplementedError, "TODO"
#Note. for D>1, we need to re-normalise all the outputs independently. # Note. for D>1, we need to re-normalise all the outputs independently.
# This will mess up computations of diag(true_var), below. # This will mess up computations of diag(true_var), below.
#note that the upper, lower quantiles should be the same shape as mean # note that the upper, lower quantiles should be the same shape as mean
true_var = (var + np.eye(var.shape[0])*self._variance)*self._scale**2 true_var = (var + np.eye(var.shape[0]) * self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(np.diag(true_var)) _5pc = mean - 2.*np.sqrt(np.diag(true_var))
_95pc = mean + 2.*np.sqrt(np.diag(true_var)) _95pc = mean + 2.*np.sqrt(np.diag(true_var))
else: else:
true_var = (var + self._variance)*self._scale**2 true_var = (var + self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(true_var) _5pc = mean - 2.*np.sqrt(true_var)
_95pc = mean + 2.*np.sqrt(true_var) _95pc = mean + 2.*np.sqrt(true_var)
return mean, true_var, _5pc, _95pc return mean, true_var, _5pc, _95pc
@ -80,5 +84,5 @@ class Gaussian(likelihood):
""" """
pass pass
def _gradients(self,partial): def _gradients(self, partial):
return np.sum(partial) return np.sum(partial)

View file

@ -9,10 +9,10 @@ from .. import kern
from GP import GP from GP import GP
from scipy import linalg from scipy import linalg
def backsub_both_sides(L,X): def backsub_both_sides(L, X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky""" """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1) tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
class sparse_GP(GP): class sparse_GP(GP):
""" """
@ -35,22 +35,22 @@ class sparse_GP(GP):
""" """
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable # self.scale_factor = 100.0 # a scaling factor to help keep the algorithm stable
self.auto_scale_factor = False # self.auto_scale_factor = False
self.Z = Z self.Z = Z
self.M = Z.shape[0] self.M = Z.shape[0]
self.likelihood = likelihood self.likelihood = likelihood
if X_variance is None: if X_variance is None:
self.has_uncertain_inputs=False self.has_uncertain_inputs = False
else: else:
assert X_variance.shape==X.shape assert X_variance.shape == X.shape
self.has_uncertain_inputs=True self.has_uncertain_inputs = True
self.X_variance = X_variance self.X_variance = X_variance
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X) GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
#normalize X uncertainty also # normalize X uncertainty also
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.X_variance /= np.square(self._Xstd) self.X_variance /= np.square(self._Xstd)
@ -59,141 +59,152 @@ class sparse_GP(GP):
# kernel computations, using BGPLVM notation # kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z) self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_variance) self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance) self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
else: else:
self.psi0 = self.kern.Kdiag(self.X) self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.Z,self.X) self.psi1 = self.kern.K(self.Z, self.X)
self.psi2 = None self.psi2 = None
def _computations(self): def _computations(self):
sf = self.scale_factor # sf = self.scale_factor
sf2 = sf**2 # sf2 = sf ** 2
#factor Kmm # factor Kmm
self.Lm = jitchol(self.Kmm) self.Lm = jitchol(self.Kmm)
#The rather complex computations of self.A # The rather complex computations of self.A
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs? assert self.likelihood.D == 1 # TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0) # psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1) / sf2)).sum(0)
psi2_beta_scaled = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0)
evals, evecs = linalg.eigh(psi2_beta_scaled) evals, evecs = linalg.eigh(psi2_beta_scaled)
clipped_evals = np.clip(evals,0.,1e6) # TODO: make clipping configurable clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
if not np.allclose(evals, clipped_evals): if not np.allclose(evals, clipped_evals):
print "Warning: clipping posterior eigenvalues" print "Warning: clipping posterior eigenvalues"
tmp = evecs*np.sqrt(clipped_evals) tmp = evecs * np.sqrt(clipped_evals)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
else: else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf) # tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)) / sf)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
else: else:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0) # psi2_beta_scaled = (self.psi2 * (self.likelihood.precision / sf2)).sum(0)
psi2_beta_scaled = (self.psi2 * (self.likelihood.precision)).sum(0)
evals, evecs = linalg.eigh(psi2_beta_scaled) evals, evecs = linalg.eigh(psi2_beta_scaled)
clipped_evals = np.clip(evals,0.,1e6) # TODO: make clipping configurable clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
if not np.allclose(evals, clipped_evals): if not np.allclose(evals, clipped_evals):
print "Warning: clipping posterior eigenvalues" print "Warning: clipping posterior eigenvalues"
tmp = evecs*np.sqrt(clipped_evals) tmp = evecs * np.sqrt(clipped_evals)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
else: else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf) # tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
#factor B # factor B
self.B = np.eye(self.M)/sf2 + self.A # self.B = np.eye(self.M) / sf2 + self.A
self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B) self.LB = jitchol(self.B)
self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y # TODO: make a switch for either first compute psi1V, or VV.T
self.psi1V = np.dot(self.psi1, self.V) self.psi1V = np.dot(self.psi1, self.likelihood.V)
#back substutue C into psi1V # back substutue C into psi1V
tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0) tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V,_ = linalg.lapack.flapack.dtrtrs(self.LB,np.asfortranarray(tmp),lower=1,trans=0) self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1) tmp, info2 = linalg.lapack.flapack.dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1) self.Cpsi1V, info3 = linalg.lapack.flapack.dtrtrs(self.Lm, tmp, lower=1, trans=1)
# Compute dL_dKmm # Compute dL_dKmm
tmp = tdot(self._LBi_Lmi_psi1V) tmp = tdot(self._LBi_Lmi_psi1V)
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D*np.eye(self.M) + tmp) self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D * np.eye(self.M) + tmp)
tmp = -0.5*self.DBi_plus_BiPBi/sf2 # tmp = -0.5 * self.DBi_plus_BiPBi / sf2
tmp += -0.5*self.B*sf2*self.D # tmp += -0.5 * self.B * sf2 * self.D
tmp += self.D*np.eye(self.M) tmp = -0.5 * self.DBi_plus_BiPBi
self.dL_dKmm = backsub_both_sides(self.Lm,tmp) tmp += -0.5 * self.B * self.D
tmp += self.D * np.eye(self.M)
self.dL_dKmm = backsub_both_sides(self.Lm, tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten() self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T) self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T)
dL_dpsi2_beta = 0.5*backsub_both_sides(self.Lm,self.D*np.eye(self.M) - self.DBi_plus_BiPBi) dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.D * np.eye(self.M) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision[:,None,None]*dL_dpsi2_beta[None,:,:] self.dL_dpsi2 = self.likelihood.precision[:, None, None] * dL_dpsi2_beta[None, :, :]
else: else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta,self.psi1*self.likelihood.precision.reshape(1,self.N)) self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.N))
self.dL_dpsi2 = None self.dL_dpsi2 = None
else: else:
dL_dpsi2 = self.likelihood.precision*dL_dpsi2_beta dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
#repeat for each of the N psi_2 matrices # repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(dL_dpsi2[None,:,:],self.N,axis=0) self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.N, axis=0)
else: else:
#subsume back into psi1 (==Kmn) # subsume back into psi1 (==Kmn)
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2,self.psi1) self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1)
self.dL_dpsi2 = None self.dL_dpsi2 = None
#the partial derivative vector for the likelihood # the partial derivative vector for the likelihood
if self.likelihood.Nparams ==0: if self.likelihood.Nparams == 0:
#save computation here. # save computation here.
self.partial_for_likelihood = None self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic: elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented" raise NotImplementedError, "heteroscedatic derivates not implemented"
else: else:
#likelihood is not heterscedatic # likelihood is not heterscedatic
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2 self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2) # self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2)
self.partial_for_likelihood += self.likelihood.precision*(0.5*np.sum(self.A*self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V))) self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2 # sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2) # B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else: else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2) # B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2)) B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V)) # C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5 * self.M * np.log(sf2))
return A+B+C+D C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + B + C + D
def _set_params(self, p): def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) self.Z = p[:self.M * self.Q].reshape(self.M, self.Q)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
self._compute_kernel_matrices() self._compute_kernel_matrices()
#if self.auto_scale_factor: # if self.auto_scale_factor:
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) # self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
#if self.auto_scale_factor: # if self.auto_scale_factor:
#if self.likelihood.is_heteroscedastic: # if self.likelihood.is_heteroscedastic:
#self.scale_factor = max(100,np.sqrt(self.psi2_beta_scaled.sum(0).mean())) # self.scale_factor = max(100,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
#else: # else:
#self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) # self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
self.scale_factor = 1. # self.scale_factor = 100.
self._computations() self._computations()
def _get_params(self): def _get_params(self):
return np.hstack([self.Z.flatten(),GP._get_params(self)]) return np.hstack([self.Z.flatten(), GP._get_params(self)])
def _get_param_names(self): def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self) return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], []) + GP._get_param_names(self)
def update_likelihood_approximation(self): def update_likelihood_approximation(self):
""" """
@ -205,9 +216,9 @@ class sparse_GP(GP):
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
raise NotImplementedError, "EP approximation not implemented for uncertain inputs" raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else: else:
self.likelihood.fit_DTC(self.Kmm,self.psi1) self.likelihood.fit_DTC(self.Kmm, self.psi1)
#self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) # self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
@ -217,13 +228,13 @@ class sparse_GP(GP):
""" """
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
""" """
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z) dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_variance) dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_variance) dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z,self.X, self.X_variance) dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else: else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X) dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta return dL_dtheta
@ -243,17 +254,17 @@ class sparse_GP(GP):
def _raw_predict(self, Xnew, which_parts='all', full_cov=False): def _raw_predict(self, Xnew, which_parts='all', full_cov=False):
"""Internal helper function for making predictions, does not account for normalization""" """Internal helper function for making predictions, does not account for normalization"""
Bi,_ = linalg.lapack.flapack.dpotri(self.LB,lower=0) # WTH? this lower switch should be 1, but that doesn't work! Bi, _ = linalg.lapack.flapack.dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
symmetrify(Bi) symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self.Lm,np.eye(self.M) - Bi) Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.M) - Bi)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.Cpsi1V/self.scale_factor) mu = np.dot(Kx.T, self.Cpsi1V / self.scale_factor)
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts) Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) #NOTE this won't work for plotting var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
else: else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts) Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx*np.dot(Kmmi_LmiBLmi, Kx),0) var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
return mu,var[:,None] return mu, var[:, None]