Merged and fixed conflicts, names still need changing accordingly

This commit is contained in:
Ricardo 2013-06-05 14:22:16 +01:00
commit b3eeacd956
55 changed files with 912 additions and 927 deletions

View file

@ -1,252 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify,pdinv
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
from ..core import sparse_GP
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class FITC(sparse_GP):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
super(FITC, self).__init__(X, likelihood, kernel, normalize_X=normalize_X)
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
The true precison is now 'true_precision' not 'precision'.
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def _computations(self):
#factor Kmm
self.Lm = jitchol(self.Kmm)
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn)
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A
if self.has_uncertain_inputs:
raise NotImplementedError
else:
if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B
self.B = np.eye(self.M) + self.A
self.LB = jitchol(self.B)
self.LBi = chol_inv(self.LB)
self.psi1V = np.dot(self.psi1, self.V_star)
Lmi_psi1V, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
Kmmi = np.dot(self.Lmi.T,self.Lmi)
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
VVT = np.outer(self.V_star,self.V_star)
VV_p_Ki = np.dot(VVT,Kmmipsi1.T)
Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
psi1beta = self.psi1*self.beta_star.T
H = self.Kmm + mdot(self.psi1,psi1beta.T)
LH = jitchol(H)
LHi = chol_inv(LH)
Hi = np.dot(LHi.T,LHi)
betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
gamma_1 = mdot(VVT,self.psi1.T,Hi)
pHip = mdot(self.psi1.T,Hi,self.psi1)
gamma_2 = mdot(self.beta_star*pHip,self.V_star)
gamma_3 = self.V_star * gamma_2
self._dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0: logdet(self.beta_star)
self._dL_dpsi0 += .5 * self.V_star**2 #dA_psi0: yT*beta_star*y
self._dL_dpsi0 += .5 *alpha #dC_dpsi0
self._dL_dpsi0 += 0.5*mdot(self.beta_star*pHip,self.V_star)**2 - self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T #dD_dpsi0
self._dL_dpsi1 = b_psi1_Ki.copy() #dA_dpsi1: logdet(self.beta_star)
self._dL_dpsi1 += -np.dot(psi1beta.T,LBL_inv) #dC_dpsi1
self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1
self._dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm: logdet(self.beta_star)
self._dL_dKmm += .5*(LBL_inv - Kmmi) + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
self._dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm
self._dpsi1_dtheta = 0
self._dpsi1_dX = 0
self._dKmm_dtheta = 0
self._dKmm_dX = 0
self._dpsi1_dX_jkj = 0
self._dpsi1_dtheta_jkj = 0
for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3):
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
#Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
#Diag_dKmm = Diag_dA_dKmm: yT*beta_star*y +Diag_dC_dKmm +Diag_dD_dKmm
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
dbstar_dnoise = self.likelihood.precision * (self.beta_star**2 * self.Diag0[:,None] - self.beta_star)
Lmi_psi1 = mdot(self.Lmi,self.psi1)
LBiLmipsi1 = np.dot(self.LBi,Lmi_psi1)
aux_0 = np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star)
alpha_ = mdot(LBiLmipsi1.T,alpha)
dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise )
dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
dD_dnoise = dD_dnoise_1 + dD_dnoise_2
self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.D * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D
def _log_likelihood_gradients(self):
pass
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X)
dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z)
dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z)
dL_dtheta += self._dKmm_dtheta
dL_dtheta += self._dpsi1_dtheta
return dL_dtheta
def dL_dZ(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z)
dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX
return dL_dZ
def _raw_predict(self, Xnew, which_parts, full_cov=False):
if self.likelihood.is_heteroscedastic:
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.likelihood.precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.Gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P,self.Gamma)
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.M) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
#self.mu_u = mu_u
#self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u,self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T,self.Lmi.T)
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

View file

@ -1,14 +1,11 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from GP_regression import GP_regression
from GP_classification import GP_classification
from sparse_GP_regression import sparse_GP_regression
from sparse_GP_classification import sparse_GP_classification
from FITC_classification import FITC_classification
from GPLVM import GPLVM
from warped_GP import warpedGP
from sparse_GPLVM import sparse_GPLVM
from Bayesian_GPLVM import Bayesian_GPLVM
from gp_regression import GPRegression
from sparse_gp_regression import SparseGPRegression
from sparse_gp_classification import SparseGPClassification
from fitc_classification import FITCClassification
from gplvm import GPLVM
from warped_gp import WarpedGP
from bayesian_gplvm import BayesianGPLVM
from mrd import MRD

View file

@ -2,21 +2,16 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
from GPLVM import GPLVM
from ..core import sparse_GP
from GPy.util.linalg import pdinv
from ..core import SparseGP
from ..likelihoods import Gaussian
from .. import kern
from numpy.linalg.linalg import LinAlgError
import itertools
from matplotlib.colors import colorConverter
from matplotlib.figure import SubplotParams
from GPy.inference.optimization import SCG
from GPy.util import plot_latent
from GPy.models.gplvm import GPLVM
class Bayesian_GPLVM(sparse_GP, GPLVM):
class BayesianGPLVM(SparseGP, GPLVM):
"""
Bayesian Gaussian Process Latent Variable Model
@ -64,7 +59,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._savedpsiKmm = []
self._savedABCD = []
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
self._set_params(self._get_params())
@property
@ -80,19 +75,19 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
def _get_param_names(self):
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
return (X_names + S_names + sparse_GP._get_param_names(self))
return (X_names + S_names + SparseGP._get_param_names(self))
def _get_params(self):
"""
Horizontally stacks the parameters in order to present them to the optimizer.
The resulting 1-D array has this structure:
The resulting 1-input_dim array has this structure:
===============================================================
| mu | S | Z | theta | beta |
===============================================================
"""
x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self)))
return x
def _clipped(self, x):
@ -104,7 +99,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
N, input_dim = self.N, self.input_dim
self.X = x[:self.X.size].reshape(N, input_dim).copy()
self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
sparse_GP._set_params(self, x[(2 * N * input_dim):])
SparseGP._set_params(self, x[(2 * N * input_dim):])
# self.oldps = x
# except (LinAlgError, FloatingPointError, ZeroDivisionError):
# print "\rWARNING: Caught LinAlgError, continueing without setting "
@ -134,7 +129,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.N
def log_likelihood(self):
ll = sparse_GP.log_likelihood(self)
ll = SparseGP.log_likelihood(self)
kl = self.KL_divergence()
# if ll < -2E4:
@ -151,14 +146,14 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
# sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
# B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
A = -0.5 * self.N * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
# B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else:
A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
# B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
A = -0.5 * self.N * self.input_dim * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
# B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.input_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
self._savedABCD.append([self.f_call, A, B, C, D])
@ -181,7 +176,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
# d_dS = (dL_dS).flatten()
# ========================
self.dbound_dmuS = np.hstack((d_dmu, d_dS))
self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self)
self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self)
return self._clipped(np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)))
def plot_latent(self, *args, **kwargs):
@ -200,7 +195,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
means = np.zeros((N_test, input_dim))
covars = np.zeros((N_test, input_dim))
dpsi0 = -0.5 * self.D * self.likelihood.precision
dpsi0 = -0.5 * self.input_dim * self.likelihood.precision
dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
V = self.likelihood.precision * Y
dpsi1 = np.dot(self.Cpsi1V, V.T)
@ -263,7 +258,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
def __getstate__(self):
return (self.likelihood, self.input_dim, self.X, self.X_variance,
self.init, self.M, self.Z, self.kern,
self.init, self.num_inducing, self.Z, self.kern,
self.oldpsave, self._debug)
def __setstate__(self, state):
@ -274,8 +269,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
X = x[start:end].reshape(self.N, self.input_dim)
start, end = end, end + self.X_variance.size
X_v = x[start:end].reshape(self.N, self.input_dim)
start, end = end, end + (self.M * self.input_dim)
Z = x[start:end].reshape(self.M, self.input_dim)
start, end = end, end + (self.num_inducing * self.input_dim)
Z = x[start:end].reshape(self.num_inducing, self.input_dim)
start, end = end, end + self.input_dim
theta = x[start:]
return X, X_v, Z, theta
@ -353,12 +348,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
fig = figs[-1]
ax8 = fig.add_subplot(121)
ax8.text(.5, .5, r"${\mathbf{A,B,C,D}}$", color='k', alpha=.5, transform=ax8.transAxes,
ax8.text(.5, .5, r"${\mathbf{A,B,C,input_dim}}$", color='k', alpha=.5, transform=ax8.transAxes,
ha='center', va='center')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 1], label='A')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 2], label='B')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 3], label='C')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='D')
ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='input_dim')
ax8.legend()
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(.15, 0, 1, .86))

252
GPy/models/fitc.py Normal file
View file

@ -0,0 +1,252 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify, pdinv
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
from GPy.core.sparse_gp import SparseGP
def backsub_both_sides(L, X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1)
return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T
class FITC(SparseGP):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
super(FITC, self).__init__(X, likelihood, kernel, normalize_X=normalize_X)
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in SparseGP.
The true precison is now 'true_precision' not 'precision'.
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_FITC(self.Kmm, self.psi1, self.psi0)
self._set_params(self._get_params()) # update the GP
def _computations(self):
# factor Kmm
self.Lm = jitchol(self.Kmm)
self.Lmi, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.eye(self.num_inducing), lower=1)
Lmipsi1 = np.dot(self.Lmi, self.psi1)
self.Qnn = np.dot(Lmipsi1.T, Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn)
self.beta_star = self.likelihood.precision / (1. + self.likelihood.precision * self.Diag0[:, None]) # Includes Diag0 in the precision
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A
if self.has_uncertain_inputs:
raise NotImplementedError
else:
if self.likelihood.is_heteroscedastic:
assert self.likelihood.input_dim == 1
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B
self.B = np.eye(self.num_inducing) + self.A
self.LB = jitchol(self.B)
self.LBi = chol_inv(self.LB)
self.psi1V = np.dot(self.psi1, self.V_star)
Lmi_psi1V, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T, Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1, b_psi1_Ki)
Kmmi = np.dot(self.Lmi.T, self.Lmi)
LBiLmi = np.dot(self.LBi, self.Lmi)
LBL_inv = np.dot(LBiLmi.T, LBiLmi)
VVT = np.outer(self.V_star, self.V_star)
VV_p_Ki = np.dot(VVT, Kmmipsi1.T)
Ki_pVVp_Ki = np.dot(Kmmipsi1, VV_p_Ki)
psi1beta = self.psi1 * self.beta_star.T
H = self.Kmm + mdot(self.psi1, psi1beta.T)
LH = jitchol(H)
LHi = chol_inv(LH)
Hi = np.dot(LHi.T, LHi)
betapsi1TLmiLBi = np.dot(psi1beta.T, LBiLmi.T)
alpha = np.array([np.dot(a.T, a) for a in betapsi1TLmiLBi])[:, None]
gamma_1 = mdot(VVT, self.psi1.T, Hi)
pHip = mdot(self.psi1.T, Hi, self.psi1)
gamma_2 = mdot(self.beta_star * pHip, self.V_star)
gamma_3 = self.V_star * gamma_2
self._dL_dpsi0 = -0.5 * self.beta_star # dA_dpsi0: logdet(self.beta_star)
self._dL_dpsi0 += .5 * self.V_star ** 2 # dA_psi0: yT*beta_star*y
self._dL_dpsi0 += .5 * alpha # dC_dpsi0
self._dL_dpsi0 += 0.5 * mdot(self.beta_star * pHip, self.V_star) ** 2 - self.V_star * mdot(self.V_star.T, pHip * self.beta_star).T # dD_dpsi0
self._dL_dpsi1 = b_psi1_Ki.copy() # dA_dpsi1: logdet(self.beta_star)
self._dL_dpsi1 += -np.dot(psi1beta.T, LBL_inv) # dC_dpsi1
self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T, Hi, self.psi1, gamma_1) # dD_dpsi1
self._dL_dKmm = -0.5 * np.dot(Kmmipsi1, b_psi1_Ki) # dA_dKmm: logdet(self.beta_star)
self._dL_dKmm += .5 * (LBL_inv - Kmmi) + mdot(LBL_inv, psi1beta, Kmmipsi1.T) # dC_dKmm
self._dL_dKmm += -.5 * mdot(Hi, self.psi1, gamma_1) # dD_dKmm
self._dpsi1_dtheta = 0
self._dpsi1_dX = 0
self._dKmm_dtheta = 0
self._dKmm_dX = 0
self._dpsi1_dX_jkj = 0
self._dpsi1_dtheta_jkj = 0
for i, V_n, alpha_n, gamma_n, gamma_k in zip(range(self.N), self.V_star, alpha, gamma_2, gamma_3):
K_pp_K = np.dot(Kmmipsi1[:, i:(i + 1)], Kmmipsi1[:, i:(i + 1)].T)
# Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1
_dpsi1 = (-V_n ** 2 - alpha_n + 2.*gamma_k - gamma_n ** 2) * Kmmipsi1.T[i:(i + 1), :]
# Diag_dKmm = Diag_dA_dKmm: yT*beta_star*y +Diag_dC_dKmm +Diag_dD_dKmm
_dKmm = .5 * (V_n ** 2 + alpha_n + gamma_n ** 2 - 2.*gamma_k) * K_pp_K # Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1, self.X[i:i + 1, :], self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm, self.Z)
self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm , self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T, self.Z, self.X[i:i + 1, :])
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
dbstar_dnoise = self.likelihood.precision * (self.beta_star ** 2 * self.Diag0[:, None] - self.beta_star)
Lmi_psi1 = mdot(self.Lmi, self.psi1)
LBiLmipsi1 = np.dot(self.LBi, Lmi_psi1)
aux_0 = np.dot(self._LBi_Lmi_psi1V.T, LBiLmipsi1)
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T, LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T, self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise / self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y ** 2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T, self.LBi, Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T, self.LBi, Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star * LBiLmipsi1.T, LBiLmipsi1 * dbstar_dnoise.T * self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1, self.V_star)
alpha_ = mdot(LBiLmipsi1.T, alpha)
dD_dnoise_2 = -0.5 * self.input_dim * np.sum(alpha_ ** 2 * dbstar_dnoise)
dD_dnoise_1 = mdot(self.V_star.T, self.psi1.T, self.Lmi.T, self.LBi.T, self.LBi, self.Lmi, self.psi1, dbstar_dnoise * self.likelihood.Y)
dD_dnoise_2 = 0.5 * mdot(self.V_star.T, self.psi1.T, Hi, self.psi1, dbstar_dnoise * self.psi1.T, Hi, self.psi1, self.V_star)
dD_dnoise = dD_dnoise_1 + dD_dnoise_2
self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.N * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.input_dim * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D
def _log_likelihood_gradients(self):
pass
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0, self.X)
dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1, self.X, self.Z)
dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm, X=self.Z)
dL_dtheta += self._dKmm_dtheta
dL_dtheta += self._dpsi1_dtheta
return dL_dtheta
def dL_dZ(self):
if self.has_uncertain_inputs:
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
else:
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T, self.Z, self.X)
dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm, X=self.Z)
dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX
return dL_dZ
def _raw_predict(self, Xnew, which_parts, full_cov=False):
if self.likelihood.is_heteroscedastic:
Iplus_Dprod_i = 1. / (1. + self.Diag0 * self.likelihood.precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:, None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi, self.psi1)
self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0, ((1. - Iplus_Dprod_i) / self.Diag0)[:, None] * self.RPT0.T))
self.R, info = linalg.flapack.dtrtrs(self.L, self.Lmi, lower=1)
self.RPT = np.dot(self.R, self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T, self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.Gamma = np.dot(self.R.T, np.dot(self.RPT, self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P, self.Gamma)
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V, info = linalg.flapack.dtrtrs(U, self.RPT0.T, lower=1)
C = np.eye(self.num_inducing) - np.dot(V.T, V)
mu_u = np.dot(C, self.RPT0) * (1. / self.Diag0[None, :])
# self.C = C
# self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
# self.mu_u = mu_u
# self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u, self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u, np.dot(self.Sigma, mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T, self.Lmi.T)
mu_star = np.dot(KR0T, mu_H)
if full_cov:
Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx + np.dot(KR0T, np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = (Kxx + np.sum(KR0T.T * np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T), 0))[:, None]
return mu_star[:, None], var
else:
raise NotImplementedError, "homoscedastic fitc not implemented"
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

View file

@ -7,7 +7,11 @@ from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
from ..util.plot import gpplot
from .. import kern
from scipy import stats, linalg
<<<<<<< HEAD:GPy/models/generalized_FITC.py
from sparse_GP import sparse_GP
=======
from ..core import SparseGP
>>>>>>> 7040b26f41f382edfdca3d3f7b689b9bbfc1a54f:GPy/models/generalized_fitc.py
def backsub_both_sides(L,X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
@ -15,7 +19,7 @@ def backsub_both_sides(L,X):
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
class generalized_FITC(sparse_GP):
class GeneralizedFITC(SparseGP):
"""
Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
@ -28,9 +32,9 @@ class generalized_FITC(sparse_GP):
:param X_variance: The variance in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (N x input_dim) | None
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x input_dim) | None
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:type Z: np.ndarray (num_inducing x input_dim) | None
:param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type num_inducing: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
@ -38,13 +42,18 @@ class generalized_FITC(sparse_GP):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.Z = Z
self.M = self.Z.shape[0]
self.num_inducing = self.Z.shape[0]
self.true_precision = likelihood.precision
<<<<<<< HEAD:GPy/models/generalized_FITC.py
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
=======
super(GeneralizedFITC, self).__init__(X, likelihood, kernel=kernel, Z=self.Z, X_variance=X_variance, normalize_X=normalize_X)
self._set_params(self._get_params())
>>>>>>> 7040b26f41f382edfdca3d3f7b689b9bbfc1a54f:GPy/models/generalized_fitc.py
def _set_params(self, p):
self.Z = p[:self.M*self.input_dim].reshape(self.M, self.input_dim)
self.Z = p[:self.num_inducing*self.input_dim].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
self._compute_kernel_matrices()
@ -58,7 +67,7 @@ class generalized_FITC(sparse_GP):
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in SparseGP.
The true precison is now 'true_precision' not 'precision'.
"""
if self.has_uncertain_inputs:
@ -75,14 +84,14 @@ class generalized_FITC(sparse_GP):
but adds a diagonal term to the covariance matrix: diag(Knn - Qnn).
This function:
- computes the FITC diagonal term
- removes the extra terms computed in the sparse_GP approximation
- removes the extra terms computed in the SparseGP approximation
- computes the likelihood gradients wrt the true precision.
"""
#NOTE the true precison is now 'true_precision' not 'precision'
if self.likelihood.is_heteroscedastic:
# Compute generalized FITC's diagonal term of the covariance
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.num_inducing),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1)
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
@ -94,7 +103,7 @@ class generalized_FITC(sparse_GP):
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
@ -122,7 +131,7 @@ class generalized_FITC(sparse_GP):
sf2 = sf**2
# Remove extra term from dL_dKmm
self.dL_dKmm += 0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dKmm += 0.5 * self.input_dim * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dpsi0 = None
#the partial derivative vector for the likelihood
@ -133,8 +142,8 @@ class generalized_FITC(sparse_GP):
else:
raise NotImplementedError, "homoscedastic derivatives not implemented"
#likelihood is not heterscedatic
#self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
#self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
#self.partial_for_likelihood = - 0.5 * self.N*self.input_dim*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
#self.partial_for_likelihood += 0.5 * self.input_dim * trace_dot(self.Bi,self.A)*self.likelihood.precision
#self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
#TODO partial derivative vector for the likelihood not implemented
@ -146,7 +155,7 @@ class generalized_FITC(sparse_GP):
if self.has_uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
#NOTE in sparse_GP this would include the gradient wrt psi0
#NOTE in SparseGP this would include the gradient wrt psi0
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X)
return dL_dtheta
@ -155,11 +164,11 @@ class generalized_FITC(sparse_GP):
""" Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2
if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
A = -0.5*self.N*self.input_dim*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2))
#C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
A = -0.5*self.N*self.input_dim*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
C = -self.input_dim * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.num_inducing*np.log(sf2))
#C = -0.5*self.input_dim * (self.B_logdet + self.num_inducing*np.log(sf2))
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V))
#self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
#D_ = 0.5*np.trace(self.Cpsi1VVpsi1)
@ -177,13 +186,13 @@ class generalized_FITC(sparse_GP):
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
# C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.M) - np.dot(V.T,V)
C = np.eye(self.num_inducing) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
@ -199,12 +208,12 @@ class generalized_FITC(sparse_GP):
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T)) # TODO: RA, is this line needed?
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:
raise NotImplementedError, "homoscedastic fitc not implemented"

View file

@ -15,7 +15,7 @@ class GP_classification(GP):
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to binomial with probit link_function
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
:param kernel: a GPy kernel, defaults to rbf
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
@ -31,7 +31,7 @@ class GP_classification(GP):
kernel = kern.rbf(X.shape[1])
if likelihood is None:
distribution = likelihoods.likelihood_functions.binomial()
distribution = likelihoods.likelihood_functions.Binomial()
likelihood = likelihoods.EP(Y, distribution)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):

View file

@ -7,7 +7,7 @@ from ..core import GP
from .. import likelihoods
from .. import kern
class GP_regression(GP):
class GPRegression(GP):
"""
Gaussian Process model for regression

View file

@ -4,14 +4,13 @@ Created on 10 Apr 2013
@author: Max Zwiessele
'''
from GPy.core import model
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
from GPy.core import sparse_GP
from GPy.core import SparseGP
from GPy.util.linalg import PCA
from scipy import linalg
import numpy
import itertools
import pylab
from GPy.kern.kern import kern
from GPy.models.bayesian_gplvm import BayesianGPLVM
class MRD(model):
"""
@ -38,7 +37,7 @@ class MRD(model):
*concat: PCA on concatenated outputs
*single: PCA on each output
*random: random
:param M:
:param num_inducing:
number of inducing inputs to use
:param Z:
initial inducing inputs
@ -62,22 +61,22 @@ class MRD(model):
assert not ('kernel' in kw), "pass kernels through `kernels` argument"
self.input_dim = input_dim
self.M = M
self.num_inducing = M
self._debug = _debug
self._init = True
X = self._init_X(initx, likelihood_or_Y_list)
Z = self._init_Z(initz, X)
self.bgplvms = [Bayesian_GPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.M, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
del self._init
self.gref = self.bgplvms[0]
nparams = numpy.array([0] + [sparse_GP._get_params(g).size - g.Z.size for g in self.bgplvms])
nparams = numpy.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms])
self.nparams = nparams.cumsum()
self.N = self.gref.N
self.NQ = self.N * self.input_dim
self.MQ = self.M * self.input_dim
self.MQ = self.num_inducing * self.input_dim
model.__init__(self) # @UndefinedVariable
self._set_params(self._get_params())
@ -151,7 +150,7 @@ class MRD(model):
itertools.izip(ns,
itertools.repeat(name)))
return list(itertools.chain(n1var, *(map_names(\
sparse_GP._get_param_names(g)[self.MQ:], n) \
SparseGP._get_param_names(g)[self.MQ:], n) \
for g, n in zip(self.bgplvms, self.names))))
def _get_params(self):
@ -165,14 +164,14 @@ class MRD(model):
X = self.gref.X.ravel()
X_var = self.gref.X_variance.ravel()
Z = self.gref.Z.ravel()
thetas = [sparse_GP._get_params(g)[g.Z.size:] for g in self.bgplvms]
thetas = [SparseGP._get_params(g)[g.Z.size:] for g in self.bgplvms]
params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)])
return params
# def _set_var_params(self, g, X, X_var, Z):
# g.X = X.reshape(self.N, self.input_dim)
# g.X_variance = X_var.reshape(self.N, self.input_dim)
# g.Z = Z.reshape(self.M, self.input_dim)
# g.Z = Z.reshape(self.num_inducing, self.input_dim)
#
# def _set_kern_params(self, g, p):
# g.kern._set_params(p[:g.kern.Nparam])
@ -206,7 +205,7 @@ class MRD(model):
def log_likelihood(self):
ll = -self.gref.KL_divergence()
for g in self.bgplvms:
ll += sparse_GP.log_likelihood(g)
ll += SparseGP.log_likelihood(g)
return ll
def _log_likelihood_gradients(self):
@ -215,7 +214,7 @@ class MRD(model):
dLdmu -= dKLmu
dLdS -= dKLdS
dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten()
dldzt1 = reduce(lambda a, b: a + b, (sparse_GP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
dldzt1 = reduce(lambda a, b: a + b, (SparseGP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
return numpy.hstack((dLdmuS,
dldzt1,
@ -250,9 +249,9 @@ class MRD(model):
if X is None:
X = self.X
if init in "permute":
Z = numpy.random.permutation(X.copy())[:self.M]
Z = numpy.random.permutation(X.copy())[:self.num_inducing]
elif init in "random":
Z = numpy.random.randn(self.M, self.input_dim) * X.var()
Z = numpy.random.randn(self.num_inducing, self.input_dim) * X.var()
self.Z = Z
return Z
@ -274,8 +273,8 @@ class MRD(model):
else:
return pylab.gcf()
def plot_X_1d(self):
return self.gref.plot_X_1d()
def plot_X_1d(self, *a, **kw):
return self.gref.plot_X_1d(*a, **kw)
def plot_X(self, fignum=None, ax=None):
fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X))

View file

@ -1,61 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
# from .. import kern
# from ..core import model
# from ..util.linalg import pdinv, PCA
from GPLVM import GPLVM
from sparse_GP_regression import sparse_GP_regression
class sparse_GPLVM(sparse_GP_regression, GPLVM):
"""
Sparse Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, input_dim, kernel=None, init='PCA', M=10):
X = self.initialise_latent(init, input_dim, Y)
sparse_GP_regression.__init__(self, X, Y, kernel=kernel,M=M)
def _get_param_names(self):
return (sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.N)],[])
+ sparse_GP_regression._get_param_names(self))
def _get_params(self):
return np.hstack((self.X.flatten(), sparse_GP_regression._get_params(self)))
def _set_params(self,x):
self.X = x[:self.X.size].reshape(self.N,self.input_dim).copy()
sparse_GP_regression._set_params(self, x[self.X.size:])
def log_likelihood(self):
return sparse_GP_regression.log_likelihood(self)
def dL_dX(self):
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0,self.X)
dL_dX += self.kern.dK_dX(self.dL_dpsi1.T,self.X,self.Z)
return dL_dX
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dX().flatten(), sparse_GP_regression._log_likelihood_gradients(self)))
def plot(self):
GPLVM.plot(self)
#passing Z without a small amout of jitter will induce the white kernel where we don;t want it!
mu, var, upper, lower = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001)
pb.plot(mu[:, 0] , mu[:, 1], 'ko')
def plot_latent(self, *args, **kwargs):
input_1, input_2 = GPLVM.plot_latent(*args, **kwargs)
pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w')

View file

@ -16,7 +16,7 @@ class sparse_GP_classification(sparse_GP):
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to binomial with probit link_function
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
@ -31,7 +31,7 @@ class sparse_GP_classification(sparse_GP):
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
if likelihood is None:
distribution = likelihoods.likelihood_functions.binomial()
distribution = likelihoods.likelihood_functions.Binomial()
likelihood = likelihoods.EP(Y, distribution)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):

View file

@ -3,17 +3,15 @@
import numpy as np
from ..core import sparse_GP
from ..core import SparseGP
from .. import likelihoods
from .. import kern
from ..likelihoods import likelihood
from GP_regression import GP_regression
class sparse_GP_regression(sparse_GP):
class SparseGPRegression(SparseGP):
"""
Gaussian Process model for regression
This is a thin wrapper around the sparse_GP class, with a set of sensible defalts
This is a thin wrapper around the SparseGP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
@ -29,19 +27,19 @@ class sparse_GP_regression(sparse_GP):
"""
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10, X_variance=None):
#kern defaults to rbf (plus white for stability)
# kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3)
#Z defaults to a subset of the data
# Z defaults to a subset of the data
if Z is None:
i = np.random.permutation(X.shape[0])[:M]
Z = X[i].copy()
else:
assert Z.shape[1]==X.shape[1]
assert Z.shape[1] == X.shape[1]
#likelihood defaults to Gaussian
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
# likelihood defaults to Gaussian
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance)
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance)
self._set_params(self._get_params())

View file

@ -0,0 +1,61 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
from GPy.models.sparse_gp_regression import SparseGPRegression
from GPy.models.gplvm import GPLVM
# from .. import kern
# from ..core import model
# from ..util.linalg import pdinv, PCA
class SparseGPLVM(SparseGPRegression, GPLVM):
"""
Sparse Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, input_dim, kernel=None, init='PCA', M=10):
X = self.initialise_latent(init, input_dim, Y)
SparseGPRegression.__init__(self, X, Y, kernel=kernel, M=M)
def _get_param_names(self):
return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
+ SparseGPRegression._get_param_names(self))
def _get_params(self):
return np.hstack((self.X.flatten(), SparseGPRegression._get_params(self)))
def _set_params(self, x):
self.X = x[:self.X.size].reshape(self.N, self.input_dim).copy()
SparseGPRegression._set_params(self, x[self.X.size:])
def log_likelihood(self):
return SparseGPRegression.log_likelihood(self)
def dL_dX(self):
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X)
dL_dX += self.kern.dK_dX(self.dL_dpsi1.T, self.X, self.Z)
return dL_dX
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dX().flatten(), SparseGPRegression._log_likelihood_gradients(self)))
def plot(self):
GPLVM.plot(self)
# passing Z without a small amout of jitter will induce the white kernel where we don;t want it!
mu, var, upper, lower = SparseGPRegression.predict(self, self.Z + np.random.randn(*self.Z.shape) * 0.0001)
pb.plot(mu[:, 0] , mu[:, 1], 'ko')
def plot_latent(self, *args, **kwargs):
input_1, input_2 = GPLVM.plot_latent(*args, **kwargs)
pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w')

View file

@ -3,25 +3,21 @@
import numpy as np
from .. import kern
from ..core import model
from ..util.linalg import pdinv
from ..util.plot import gpplot
from ..util.warping_functions import *
from GP_regression import GP_regression
from ..core import GP
from .. import likelihoods
from .. import kern
from GPy.util.warping_functions import TanhWarpingFunction_d
from GPy import kern
class warpedGP(GP):
def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False):
class WarpedGP(GP):
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalize_X=False, normalize_Y=False):
if kernel is None:
kernel = kern.rbf(X.shape[1])
if warping_function == None:
self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1)
self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1,) * 1)
Y = self._scale_data(Y)
self.has_uncertain_inputs = False
@ -35,10 +31,10 @@ class warpedGP(GP):
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y-self._Ymin)/(self._Ymax-self._Ymin) - 0.5
return (Y - self._Ymin) / (self._Ymax - self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5)*(self._Ymax - self._Ymin) + self._Ymin
return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters]
@ -68,15 +64,15 @@ class warpedGP(GP):
alpha = np.dot(self.Ki, self.likelihood.Y.flatten())
warping_grads = self.warping_function_gradients(alpha)
warping_grads = np.append(warping_grads[:,:-1].flatten(), warping_grads[0,-1])
warping_grads = np.append(warping_grads[:, :-1].flatten(), warping_grads[0, -1])
return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
def warping_function_gradients(self, Kiy):
grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params,
return_covar_chain = True)
djac_dpsi = ((1.0/grad_y[:,:, None, None])*grad_y_psi).sum(axis=0).sum(axis=0)
dquad_dpsi = (Kiy[:,None,None,None] * grad_psi).sum(axis=0).sum(axis=0)
return_covar_chain=True)
djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
return -dquad_dpsi + djac_dpsi