This commit is contained in:
James Hensman 2013-05-08 09:41:03 +01:00
commit 5fe0daa0b1
6 changed files with 167 additions and 111 deletions

View file

@ -203,7 +203,7 @@ class model(parameterised):
else: else:
self._set_params_transformed(initial_parameters) self._set_params_transformed(initial_parameters)
def ensure_default_constraints(self, warn=False): def ensure_default_constraints(self):
""" """
Ensure that any variables which should clearly be positive have been constrained somehow. Ensure that any variables which should clearly be positive have been constrained somehow.
""" """
@ -214,11 +214,11 @@ class model(parameterised):
for s in positive_strings: for s in positive_strings:
for i in self.grep_param_names(s): for i in self.grep_param_names(s):
if not (i in currently_constrained): if not (i in currently_constrained):
to_make_positive.append(re.escape(param_names[i])) #to_make_positive.append(re.escape(param_names[i]))
if warn: to_make_positive.append(i)
print "Warning! constraining %s positive" % s
if len(to_make_positive): if len(to_make_positive):
self.constrain_positive('(' + '|'.join(to_make_positive) + ')') #self.constrain_positive('(' + '|'.join(to_make_positive) + ')')
self.constrain_positive(np.asarray(to_make_positive))
@ -411,9 +411,10 @@ class model(parameterised):
""" """
return an array describing the sesitivity of the model to each input return an array describing the sesitivity of the model to each input
NB. Right now, we're basing this on the lengthscales (or variances) of the kernel. NB. Right now, we're basing this on the lengthscales (or
TODO: proper sensitivity analysis variances) of the kernel. TODO: proper sensitivity analysis
""" where we integrate across the model inputs and evaluate the
effect on the variance of the model output. """
if not hasattr(self, 'kern'): if not hasattr(self, 'kern'):
raise ValueError, "this model has no kernel" raise ValueError, "this model has no kernel"

View file

@ -86,7 +86,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300):
plt.sca(latent_axes) plt.sca(latent_axes)
m.plot_latent() m.plot_latent()
data_show = GPy.util.visualize.vector_show(y) data_show = GPy.util.visualize.vector_show(y)
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes, hist_axes=hist_axes) lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close('all') plt.close('all')
# # plot # # plot

View file

@ -5,6 +5,7 @@
from kernpart import kernpart from kernpart import kernpart
import numpy as np import numpy as np
from ..util.linalg import tdot from ..util.linalg import tdot
from scipy import weave
class linear(kernpart): class linear(kernpart):
""" """
@ -171,33 +172,91 @@ class linear(kernpart):
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :] AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :]
AZZA = AZZA + AZZA.swapaxes(1, 2) AZZA = AZZA + AZZA.swapaxes(1, 2)
target_S += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1) AZZA_2 = AZZA/2.
dpsi2_dmu = (dL_dpsi2[:, :, :, None] * np.tensordot(mu, AZZA, (-1, 0))).sum(1).sum(1) #muAZZA = np.tensordot(mu,AZZA,(-1,0))
target_mu += dpsi2_dmu #target_mu_dummy, target_S_dummy = np.zeros_like(target_mu), np.zeros_like(target_S)
#target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1)
#target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1)
#Using weave, we can exploiut the symmetry of this problem:
code = """
int n, m, mm,q,qq;
double factor,tmp;
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
for(n=0;n<N;n++){
for(m=0;m<M;m++){
for(mm=0;mm<=m;mm++){
//add in a factor of 2 for the off-diagonal terms (and then count them only once)
if(m==mm)
factor = dL_dpsi2(n,m,mm);
else
factor = 2.0*dL_dpsi2(n,m,mm);
for(q=0;q<Q;q++){
//take the dot product of mu[n,:] and AZZA[:,m,mm,q] TODO: blas!
tmp = 0.0;
for(qq=0;qq<Q;qq++){
tmp += mu(n,qq)*AZZA(qq,m,mm,q);
}
target_mu(n,q) += factor*tmp;
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','M','Q','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
# mu2_S = np.sum(self.mu2_S, 0) # Q, #psi2_dZ = dL_dpsi2[:, :, :, None] * self.variances * self.ZAinner[:, :, None, :]
# import ipdb;ipdb.set_trace() #dummy_target = np.zeros_like(target)
# psi2_dZ_real = np.zeros((mu.shape[0], Z.shape[0], Z.shape[1])) #dummy_target += psi2_dZ.sum(0).sum(0)
# for n in range(mu.shape[0]):
# for m in range(Z.shape[0]): AZA = self.variances*self.ZAinner
# tmp = self.variances * (tdot(self._mu[n:n + 1].T) + np.diag(S[n])) code="""
# psi2_dZ_real[n, m, :] = np.dot(tmp, ( int n,m,mm,q;
# self._Z[m:m + 1] * self.variances).T).T #pragma omp parallel for private(n,mm,q)
# tmp = self._Z[m:m + 1] * self.variances for(m=0;m<M;m++){
# tmp = np.dot(tmp, (tdot(self._mu[n:n + 1].T) + np.diag(S[n]))) for(q=0;q<Q;q++){
# psi2_dZ_real[n, m, :] = tmp * self.variances for(mm=0;mm<M;mm++){
# for m_prime in range(Z.shape[0]): for(n=0;n<N;n++){
# if m == m_prime: target(m,q) += dL_dpsi2(n,m,mm)*AZA(n,mm,q);
# psi2_dZ_real[n, m, :] *= 2 }
# prod = (dL_dpsi2[:, :, :, None] * np.eye(Z.shape[0])[None, :, :, None] * (self.ZAinner * self.variances).swapaxes(0, 1)[:, :, None, :]) }
# psi2_dZ = prod.swapaxes(1, 2) + prod }
psi2_dZ = dL_dpsi2[:, :, :, None] * self.variances * self.ZAinner[:, :, None, :] }
target += psi2_dZ.sum(0).sum(0) """
# import ipdb;ipdb.set_trace() support_code = """
# psi2_dZ_old = (dL_dpsi2[:, :, :, None] * (self.mu2_S[:, None, None, :] * (Z * np.square(self.variances)[None, :])[None, None, :, :])).sum(0).sum(1) #include <omp.h>
# target += (dL_dpsi2[:, :, :, None] * psi2_dZ_real[:, :, None, :]).sum(0).sum(0) * 2 # (self.variances * np.dot(self.inner, self.ZA.T)).sum(1) #include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','M','Q','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
#---------------------------------------# #---------------------------------------#
# Precomputations # # Precomputations #

View file

@ -251,6 +251,7 @@ class EP(likelihood):
R = R0.copy() R = R0.copy()
Diag = Diag0.copy() Diag = Diag0.copy()
Sigma_diag = Knn_diag Sigma_diag = Knn_diag
RPT0 = np.dot(R0,P0.T)
""" """
Initial values - Cavity distribution parameters: Initial values - Cavity distribution parameters:
@ -306,13 +307,7 @@ class EP(likelihood):
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde) Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
Diag = Diag0 * Iplus_Dprod_i Diag = Diag0 * Iplus_Dprod_i
P = Iplus_Dprod_i[:,None] * P0 P = Iplus_Dprod_i[:,None] * P0
#Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
#P = (Diag / Diag0)[:,None] * P0
RPT0 = np.dot(R0,P0.T)
L = jitchol(np.eye(M) + np.dot(RPT0,((1. - Iplus_Dprod_i)/Diag0)[:,None]*RPT0.T)) L = jitchol(np.eye(M) + np.dot(RPT0,((1. - Iplus_Dprod_i)/Diag0)[:,None]*RPT0.T))
#L = jitchol(np.eye(M) + np.dot(RPT0,(1./Diag0 - Iplus_Dprod_i/Diag0)[:,None]*RPT0.T))
#L = jitchol(np.eye(M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1) R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T) RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)

View file

@ -9,6 +9,12 @@ from .. import kern
from scipy import stats, linalg from scipy import stats, linalg
from sparse_GP import sparse_GP from sparse_GP import sparse_GP
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class generalized_FITC(sparse_GP): class generalized_FITC(sparse_GP):
""" """
Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC. Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
@ -33,7 +39,7 @@ class generalized_FITC(sparse_GP):
self.Z = Z self.Z = Z
self.M = self.Z.shape[0] self.M = self.Z.shape[0]
self._precision = likelihood.precision self.true_precision = likelihood.precision
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False) sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
@ -51,13 +57,16 @@ class generalized_FITC(sparse_GP):
For a Gaussian (or direct: TODO) likelihood, no iteration is required: For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
The true precison is now 'true_precision' not 'precision'.
""" """
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else: else:
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._precision = self.likelihood.precision # Save the true precision self.true_precision = self.likelihood.precision # Save the true precision
self.likelihood.precision = self._precision/(1. + self._precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation self.likelihood.precision = self.true_precision/(1. + self.true_precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
def _FITC_computations(self): def _FITC_computations(self):
@ -69,23 +78,23 @@ class generalized_FITC(sparse_GP):
- removes the extra terms computed in the sparse_GP approximation - removes the extra terms computed in the sparse_GP approximation
- computes the likelihood gradients wrt the true precision. - computes the likelihood gradients wrt the true precision.
""" """
#NOTE the true precison is now '_precison' not 'precision' #NOTE the true precison is now 'true_precision' not 'precision'
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
# Compute generalized FITC's diagonal term of the covariance # Compute generalized FITC's diagonal term of the covariance
self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1) self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1)
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
#self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
#a = kj
self.Diag0 = self.psi0 - np.diag(self.Qnn) self.Diag0 = self.psi0 - np.diag(self.Qnn)
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self._precision.flatten()) Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.true_precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i self.Diag = self.Diag0 * Iplus_Dprod_i
#self.Diag = self.Diag0/(1.+ self.Diag0 * self._precision.flatten())
self.P = Iplus_Dprod_i[:,None] * self.psi1.T self.P = Iplus_Dprod_i[:,None] * self.psi1.T
#self.P = (self.Diag / self.Diag0)[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1) self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T)) self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
#self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - Iplus_Dprod_i/self.Diag0)[:,None]*self.RPT0.T))
#self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1) self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T) self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT) self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
@ -94,7 +103,16 @@ class generalized_FITC(sparse_GP):
self.mu = self.w + np.dot(self.P,self.gamma) self.mu = self.w + np.dot(self.P,self.gamma)
# Remove extra term from dL_dpsi1 # Remove extra term from dL_dpsi1
self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB self.dL_dpsi1 -= mdot(self.Lmi.T,Lmipsi1*self.likelihood.precision.flatten().reshape(1,self.N))
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
#self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
#########333333
#self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
#########333333
else: else:
raise NotImplementedError, "homoscedastic fitc not implemented" raise NotImplementedError, "homoscedastic fitc not implemented"
# Remove extra term from dL_dpsi1 # Remove extra term from dL_dpsi1
@ -140,8 +158,11 @@ class generalized_FITC(sparse_GP):
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
else: else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2))
D = 0.5*np.trace(self.Cpsi1VVpsi1) #C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V))
#self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
#D_ = 0.5*np.trace(self.Cpsi1VVpsi1)
return A+C+D return A+C+D
def _raw_predict(self, Xnew, which_parts, full_cov=False): def _raw_predict(self, Xnew, which_parts, full_cov=False):

View file

@ -3,7 +3,7 @@
import numpy as np import numpy as np
import pylab as pb import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot, tdot from ..util.linalg import mdot, jitchol, tdot, symmetrify
from ..util.plot import gpplot from ..util.plot import gpplot
from .. import kern from .. import kern
from GP import GP from GP import GP
@ -68,13 +68,11 @@ class sparse_GP(GP):
self.psi2 = None self.psi2 = None
def _computations(self): def _computations(self):
#TODO: find routine to multiply triangular matrices
sf = self.scale_factor sf = self.scale_factor
sf2 = sf**2 sf2 = sf**2
#invert Kmm #factor Kmm
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) self.Lm = jitchol(self.Kmm)
#The rather complex computations of self.A #The rather complex computations of self.A
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
@ -90,7 +88,6 @@ class sparse_GP(GP):
self.A = tdot(tmp) self.A = tdot(tmp)
else: else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf) tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
#self.psi2_beta_scaled = tdot(tmp)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
else: else:
@ -101,20 +98,16 @@ class sparse_GP(GP):
if not np.allclose(evals, clipped_evals): if not np.allclose(evals, clipped_evals):
print "Warning: clipping posterior eigenvalues" print "Warning: clipping posterior eigenvalues"
tmp = evecs*np.sqrt(clipped_evals) tmp = evecs*np.sqrt(clipped_evals)
#self.psi2_beta_scaled = tdot(tmp)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
else: else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf) tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
#self.psi2_beta_scaled = tdot(tmp)
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
#invert B and compute C. C is the posterior covariance of u #factor B
self.B = np.eye(self.M)/sf2 + self.A self.B = np.eye(self.M)/sf2 + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) self.LB = jitchol(self.B)
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y
self.psi1V = np.dot(self.psi1, self.V) self.psi1V = np.dot(self.psi1, self.V)
@ -124,38 +117,6 @@ class sparse_GP(GP):
self._LBi_Lmi_psi1V,_ = linalg.lapack.flapack.dtrtrs(self.LB,np.asfortranarray(tmp),lower=1,trans=0) self._LBi_Lmi_psi1V,_ = linalg.lapack.flapack.dtrtrs(self.LB,np.asfortranarray(tmp),lower=1,trans=0)
tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1) tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1)
self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1) self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1)
#self.Cpsi1V = np.dot(self.C,self.psi1V)
self.E = tdot(self.Cpsi1V/sf)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs:
#self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
self.dL_dpsi2 = 0.5*self.likelihood.precision[:,None,None]*(self.D*(self.Kmmi - self.C/sf2) -self.E)[None,:,:]
else:
#self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
#self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
#self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
self.dL_dpsi1 += np.dot(self.Kmmi - self.C/sf2 -self.E,self.psi1*self.likelihood.precision.reshape(1,self.N))
self.dL_dpsi2 = None
else:
self.dL_dpsi2 = 0.5*self.likelihood.precision*(self.D*(self.Kmmi - self.C/sf2) -self.E)
if self.has_uncertain_inputs:
#repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0)
else:
#subsume back into psi1 (==Kmn)
self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2,self.psi1)
self.dL_dpsi2 = None
# Compute dL_dKmm # Compute dL_dKmm
tmp = tdot(self._LBi_Lmi_psi1V) tmp = tdot(self._LBi_Lmi_psi1V)
@ -165,23 +126,38 @@ class sparse_GP(GP):
tmp += self.D*np.eye(self.M) tmp += self.D*np.eye(self.M)
self.dL_dKmm = backsub_both_sides(self.Lm,tmp) self.dL_dKmm = backsub_both_sides(self.Lm,tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
dL_dpsi2_beta = 0.5*backsub_both_sides(self.Lm,self.D*np.eye(self.M) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision[:,None,None]*dL_dpsi2_beta[None,:,:]
else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta,self.psi1*self.likelihood.precision.reshape(1,self.N))
self.dL_dpsi2 = None
else:
dL_dpsi2 = self.likelihood.precision*dL_dpsi2_beta
if self.has_uncertain_inputs:
#repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(dL_dpsi2[None,:,:],self.N,axis=0)
else:
#subsume back into psi1 (==Kmn)
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2,self.psi1)
self.dL_dpsi2 = None
#the partial derivative vector for the likelihood #the partial derivative vector for the likelihood
if self.likelihood.Nparams ==0: if self.likelihood.Nparams ==0:
#save computation here. #save computation here.
self.partial_for_likelihood = None self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic: elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented" raise NotImplementedError, "heteroscedatic derivates not implemented"
#self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA
#self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB
#self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC
#self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
else: else:
#likelihood is not heterscedatic #likelihood is not heterscedatic
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2 self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2) self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
#self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision self.partial_for_likelihood += self.likelihood.precision*(0.5*np.sum(self.A*self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
#self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.sum(np.square(self._LBi_Lmi_psi1V)))
self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.A,self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
@ -194,7 +170,7 @@ class sparse_GP(GP):
else: else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2) B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2))
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V)) D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V))
return A+B+C+D return A+B+C+D
@ -258,22 +234,26 @@ class sparse_GP(GP):
""" """
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_variance) dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance) dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else: else:
dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X) dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X)
return dL_dZ return dL_dZ
def _raw_predict(self, Xnew, which_parts='all', full_cov=False): def _raw_predict(self, Xnew, which_parts='all', full_cov=False):
"""Internal helper function for making predictions, does not account for normalization""" """Internal helper function for making predictions, does not account for normalization"""
Kx = self.kern.K(self.Z, Xnew) Bi,_ = linalg.lapack.flapack.dpotri(self.LB,lower=0) # WTH? this lower switch should be 1, but that doesn't work!
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self.Lm,np.eye(self.M) - Bi)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.Cpsi1V/self.scale_factor)
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts) Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) #NOTE this won't work for plotting
else: else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts) Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) var = Kxx - np.sum(Kx*np.dot(Kmmi_LmiBLmi, Kx),0)
return mu,var[:,None] return mu,var[:,None]