mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-27 14:25:16 +02:00
generalized fitc + examples
This commit is contained in:
parent
0c0c7b000c
commit
a375cb9a1b
4 changed files with 216 additions and 40 deletions
53
GPy/examples/fitc_class_01.py
Normal file
53
GPy/examples/fitc_class_01.py
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
import pylab as pb
|
||||||
|
import numpy as np
|
||||||
|
import GPy
|
||||||
|
pb.ion()
|
||||||
|
pb.close('all')
|
||||||
|
|
||||||
|
"""
|
||||||
|
Simple 1D classification example
|
||||||
|
:param seed : seed value for data generation (default is 4).
|
||||||
|
:type seed: int
|
||||||
|
"""
|
||||||
|
seed=10000
|
||||||
|
|
||||||
|
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
||||||
|
X = data['X']
|
||||||
|
Y = data['Y'][:, 0:1]
|
||||||
|
Y[Y == -1] = 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#X = np.vstack((np.random.uniform(0,10,(10,1)),np.random.uniform(7,17,(10,1)),np.random.uniform(15,25,(10,1))))
|
||||||
|
#Y = np.vstack((np.zeros((10,1)),np.ones((10,1)),np.zeros((10,1))))
|
||||||
|
|
||||||
|
# Kernel object
|
||||||
|
kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
|
||||||
|
|
||||||
|
# Likelihood object
|
||||||
|
distribution = GPy.likelihoods.likelihood_functions.probit()
|
||||||
|
likelihood = GPy.likelihoods.EP(Y,distribution)
|
||||||
|
|
||||||
|
Z = np.random.uniform(X.min(),X.max(),(10,1))
|
||||||
|
#Z = np.array([0,20])[:,None]
|
||||||
|
print Z
|
||||||
|
|
||||||
|
# Model definition
|
||||||
|
m = GPy.models.generalized_FITC(X,likelihood=likelihood,kernel=kernel,Z=Z,normalize_X=False)
|
||||||
|
m.set('len',2.)
|
||||||
|
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
# Optimize
|
||||||
|
#m.constrain_fixed('iip')
|
||||||
|
m.update_likelihood_approximation()
|
||||||
|
print m.checkgrad(verbose=1)
|
||||||
|
# Parameters optimization:
|
||||||
|
m.optimize()
|
||||||
|
#m.EPEM() #FIXME
|
||||||
|
|
||||||
|
# Plot
|
||||||
|
pb.subplot(211)
|
||||||
|
m.plot_f()
|
||||||
|
pb.subplot(212)
|
||||||
|
m.plot()
|
||||||
|
print(m)
|
||||||
66
GPy/examples/fitc_class_02.py
Normal file
66
GPy/examples/fitc_class_02.py
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
import pylab as pb
|
||||||
|
import numpy as np
|
||||||
|
import GPy
|
||||||
|
pb.close('all')
|
||||||
|
|
||||||
|
seed=10000
|
||||||
|
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
|
||||||
|
|
||||||
|
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
|
||||||
|
:param seed : seed value for data generation.
|
||||||
|
:type seed: int
|
||||||
|
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
|
||||||
|
:type inducing: int
|
||||||
|
"""
|
||||||
|
|
||||||
|
data = GPy.util.datasets.crescent_data(seed=seed)
|
||||||
|
|
||||||
|
# Kernel object
|
||||||
|
kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
|
||||||
|
|
||||||
|
# Likelihood object
|
||||||
|
distribution = GPy.likelihoods.likelihood_functions.probit()
|
||||||
|
likelihood = GPy.likelihoods.EP(data['Y'],distribution)
|
||||||
|
|
||||||
|
sample = np.random.randint(0,data['X'].shape[0],10)
|
||||||
|
Z = data['X'][sample,:]
|
||||||
|
# create sparse GP EP model
|
||||||
|
#m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
|
||||||
|
m = GPy.models.generalized_FITC(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
m.set('len',10.)
|
||||||
|
|
||||||
|
m.update_likelihood_approximation()
|
||||||
|
|
||||||
|
# optimize
|
||||||
|
m.optimize()
|
||||||
|
print(m)
|
||||||
|
|
||||||
|
# plot
|
||||||
|
m.plot()
|
||||||
|
fitc = m
|
||||||
|
|
||||||
|
pb.figure()
|
||||||
|
# Kernel object
|
||||||
|
kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
|
||||||
|
|
||||||
|
# Likelihood object
|
||||||
|
distribution = GPy.likelihoods.likelihood_functions.probit()
|
||||||
|
likelihood = GPy.likelihoods.EP(data['Y'],distribution)
|
||||||
|
|
||||||
|
sample = np.random.randint(0,data['X'].shape[0],10)
|
||||||
|
Z = data['X'][sample,:]
|
||||||
|
# create sparse GP EP model
|
||||||
|
m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
m.set('len',10.)
|
||||||
|
|
||||||
|
m.update_likelihood_approximation()
|
||||||
|
|
||||||
|
# optimize
|
||||||
|
m.optimize()
|
||||||
|
print(m)
|
||||||
|
|
||||||
|
# plot
|
||||||
|
m.plot()
|
||||||
|
variational = m
|
||||||
53
GPy/examples/fitc_regr_01.py
Normal file
53
GPy/examples/fitc_regr_01.py
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
import pylab as pb
|
||||||
|
import numpy as np
|
||||||
|
import GPy
|
||||||
|
pb.ion()
|
||||||
|
pb.close('all')
|
||||||
|
|
||||||
|
N = 400
|
||||||
|
M = 10
|
||||||
|
# sample inputs and outputs
|
||||||
|
X = np.random.uniform(-3.,3.,(N,1))
|
||||||
|
Y = np.sin(X)+np.random.randn(N,1)*0.05
|
||||||
|
|
||||||
|
"""Run a 1D example of a sparse GP regression."""
|
||||||
|
"""
|
||||||
|
rbf = GPy.kern.rbf(1)
|
||||||
|
noise = GPy.kern.white(1)
|
||||||
|
kernel = rbf + noise
|
||||||
|
Z = np.random.uniform(-3,3,(M,1))
|
||||||
|
likelihood = GPy.likelihoods.Gaussian(Y)
|
||||||
|
m = GPy.models.sparse_GP(X, likelihood, kernel, Z)
|
||||||
|
m.scale_factor=10000
|
||||||
|
m.constrain_positive('(variance|lengthscale|precision)')
|
||||||
|
m.checkgrad(verbose=1)
|
||||||
|
m.optimize('tnc', messages = 1)
|
||||||
|
pb.figure()
|
||||||
|
m.plot()
|
||||||
|
|
||||||
|
variational = m
|
||||||
|
"""
|
||||||
|
|
||||||
|
# construct kernel
|
||||||
|
rbf = GPy.kern.rbf(1)
|
||||||
|
noise = GPy.kern.white(1)
|
||||||
|
kernel = rbf + noise
|
||||||
|
#Z = np.random.uniform(-3,3,(M,1))
|
||||||
|
Z = variational.Z
|
||||||
|
likelihood = GPy.likelihoods.Gaussian(Y)
|
||||||
|
# create simple GP model
|
||||||
|
m = GPy.models.generalized_FITC(X, likelihood, kernel, Z=Z)
|
||||||
|
m.constrain_positive('(variance|lengthscale|precision)')
|
||||||
|
#m.constrain_fixed('iip')
|
||||||
|
m.checkgrad(verbose=1)
|
||||||
|
m.optimize('tnc', messages = 1)
|
||||||
|
#pb.figure()
|
||||||
|
#m.plot()
|
||||||
|
"""
|
||||||
|
Xnew = X.copy().flatten()
|
||||||
|
Xnew.sort()
|
||||||
|
Xnew = Xnew[:,None]
|
||||||
|
mean,var,low,up = m.predict(Xnew)
|
||||||
|
GPy.util.plot.gpplot(Xnew,mean,low,up)
|
||||||
|
fitc = m
|
||||||
|
"""
|
||||||
|
|
@ -32,14 +32,8 @@ class generalized_FITC(sparse_GP):
|
||||||
|
|
||||||
def __init__(self, X, likelihood, kernel, Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False):
|
def __init__(self, X, likelihood, kernel, Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False):
|
||||||
|
|
||||||
if type(Z) == int:
|
|
||||||
#FIXME
|
|
||||||
self.M = Z
|
|
||||||
self.Z = (np.random.random_sample(self.D*self.M)*(self.X.max()-self.X.min())+self.X.min()).reshape(self.M,-1)
|
|
||||||
elif type(Z) == np.ndarray:
|
|
||||||
self.Z = Z
|
self.Z = Z
|
||||||
self.M = self.Z.shape[0]
|
self.M = self.Z.shape[0]
|
||||||
|
|
||||||
self._precision = likelihood.precision
|
self._precision = likelihood.precision
|
||||||
|
|
||||||
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False)
|
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_uncertainty=None, Xslices=None,Zslices=None, normalize_X=False)
|
||||||
|
|
@ -64,25 +58,26 @@ class generalized_FITC(sparse_GP):
|
||||||
else:
|
else:
|
||||||
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
|
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
|
||||||
self._precision = self.likelihood.precision # Save the true precision
|
self._precision = self.likelihood.precision # Save the true precision
|
||||||
self.likelihood.precision = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation
|
self.likelihood.precision = self._precision/(1. + self._precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation
|
||||||
self._set_params(self._get_params()) # update the GP
|
self._set_params(self._get_params()) # update the GP
|
||||||
|
|
||||||
|
|
||||||
def _FITC_computations(self):
|
def _FITC_computations(self):
|
||||||
"""
|
"""
|
||||||
FITC approximation doesn't have the correction term in the log-likelihood bound,
|
FITC approximation doesn't have the correction term in the log-likelihood bound,
|
||||||
but adds a diagonal term to the covariance matrix.
|
but adds a diagonal term to the covariance matrix: diag(Knn - Qnn).
|
||||||
This function:
|
This function:
|
||||||
- computes the diagonal term
|
- computes the FITC diagonal term
|
||||||
- eliminates the extra terms computed in the sparse_GP approximation
|
- removes the extra terms computed in the sparse_GP approximation
|
||||||
- computes the likelihood gradients wrt the true precision.
|
- computes the likelihood gradients wrt the true precision.
|
||||||
"""
|
"""
|
||||||
|
#NOTE the true precison is now '_precison' not 'precision'
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
|
|
||||||
# Compute generalized FITC's diagonal term of the covariance
|
# Compute generalized FITC's diagonal term of the covariance
|
||||||
self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
|
self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
|
||||||
self.Diag0 = self.psi0 - np.diag(self.Qnn)
|
self.Diag0 = self.psi0 - np.diag(self.Qnn)
|
||||||
|
|
||||||
self.Diag = self.Diag0/(1.+ self.Diag0 * self._precision.flatten())
|
self.Diag = self.Diag0/(1.+ self.Diag0 * self._precision.flatten())
|
||||||
|
|
||||||
self.P = (self.Diag / self.Diag0)[:,None] * self.psi1.T
|
self.P = (self.Diag / self.Diag0)[:,None] * self.psi1.T
|
||||||
self.RPT0 = np.dot(self.Lmi,self.psi1)
|
self.RPT0 = np.dot(self.Lmi,self.psi1)
|
||||||
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
|
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
|
||||||
|
|
@ -92,14 +87,13 @@ class generalized_FITC(sparse_GP):
|
||||||
self.w = self.Diag * self.likelihood.v_tilde
|
self.w = self.Diag * self.likelihood.v_tilde
|
||||||
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
|
self.gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
|
||||||
self.mu = self.w + np.dot(self.P,self.gamma)
|
self.mu = self.w + np.dot(self.P,self.gamma)
|
||||||
self.mu_tilde = (self.likelihood.v_tilde/self.likelihood.tau_tilde)[:,None]
|
|
||||||
|
|
||||||
# Remove extra term from dL_dpsi1
|
# Remove extra term from dL_dpsi1
|
||||||
self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
|
self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
|
||||||
else:
|
else:
|
||||||
|
raise NotImplementedError, "homoscedastic fitc not implemented"
|
||||||
# Remove extra term from dL_dpsi1
|
# Remove extra term from dL_dpsi1
|
||||||
self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision) #dB
|
#self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision) #dB
|
||||||
|
|
||||||
|
|
||||||
sf = self.scale_factor
|
sf = self.scale_factor
|
||||||
sf2 = sf**2
|
sf2 = sf**2
|
||||||
|
|
@ -110,18 +104,16 @@ class generalized_FITC(sparse_GP):
|
||||||
|
|
||||||
#the partial derivative vector for the likelihood
|
#the partial derivative vector for the likelihood
|
||||||
if self.likelihood.Nparams == 0:
|
if self.likelihood.Nparams == 0:
|
||||||
#save computation here.
|
|
||||||
self.partial_for_likelihood = None
|
self.partial_for_likelihood = None
|
||||||
elif self.likelihood.is_heteroscedastic:
|
elif self.likelihood.is_heteroscedastic:
|
||||||
raise NotImplementedError, "heteroscedatic derivates not implemented"
|
raise NotImplementedError, "heteroscedastic derivates not implemented"
|
||||||
else:
|
else:
|
||||||
|
raise NotImplementedError, "homoscedastic derivatives not implemented"
|
||||||
#likelihood is not heterscedatic
|
#likelihood is not heterscedatic
|
||||||
self.partial_for_likelihood = - 0.5 * self.N*self.D*self._precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self._precision**2
|
#self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
|
||||||
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self._precision
|
#self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
|
||||||
self.partial_for_likelihood += self._precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
|
#self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
|
||||||
|
|
||||||
#TODO partial derivative vector for the likelihood not implemented
|
#TODO partial derivative vector for the likelihood not implemented
|
||||||
#NOTE the true precison is now '_precison' not 'precision'
|
|
||||||
|
|
||||||
def dL_dtheta(self):
|
def dL_dtheta(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -147,23 +139,15 @@ class generalized_FITC(sparse_GP):
|
||||||
D = 0.5*np.trace(self.Cpsi1VVpsi1)
|
D = 0.5*np.trace(self.Cpsi1VVpsi1)
|
||||||
return A+C+D
|
return A+C+D
|
||||||
|
|
||||||
def _raw_predict(self, Xnew, slices, full_cov=True):
|
def _raw_predict(self, Xnew, slices, full_cov=False):
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
"""
|
"""
|
||||||
Make a prediction for the vsGP model
|
Make a prediction for the generalized FITC model
|
||||||
|
|
||||||
Arguments
|
Arguments
|
||||||
---------
|
---------
|
||||||
X : Input prediction data - Nx1 numpy array (floats)
|
X : Input prediction data - Nx1 numpy array (floats)
|
||||||
"""
|
"""
|
||||||
Kx = self.kern.K(self.Z, Xnew)
|
|
||||||
if full_cov:
|
|
||||||
Kxx = self.kern.K(Xnew)
|
|
||||||
else:
|
|
||||||
Kxx = self.kern.K(Xnew)#FIXME
|
|
||||||
#raise NotImplementedError
|
|
||||||
#Kxx = self.kern.Kdiag(Xnew)
|
|
||||||
|
|
||||||
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
|
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
|
||||||
|
|
||||||
# Ci = I + (RPT0)Di(RPT0).T
|
# Ci = I + (RPT0)Di(RPT0).T
|
||||||
|
|
@ -184,13 +168,19 @@ class generalized_FITC(sparse_GP):
|
||||||
self.mu_H = mu_H
|
self.mu_H = mu_H
|
||||||
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
|
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
|
||||||
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
|
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
|
||||||
|
Kx = self.kern.K(self.Z, Xnew)
|
||||||
KR0T = np.dot(Kx.T,self.Lmi.T)
|
KR0T = np.dot(Kx.T,self.Lmi.T)
|
||||||
mu_star = np.dot(KR0T,mu_H)
|
mu_star = np.dot(KR0T,mu_H)
|
||||||
sigma2_star = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
|
if full_cov:
|
||||||
vdiag = np.diag(sigma2_star)
|
Kxx = self.kern.K(Xnew)
|
||||||
return mu_star[:,None],vdiag[:,None]
|
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
|
||||||
else:
|
else:
|
||||||
"""Internal helper function for making predictions, does not account for normalization"""
|
Kxx = self.kern.Kdiag(Xnew)
|
||||||
|
Kxx_ = self.kern.K(Xnew)
|
||||||
|
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
|
||||||
|
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
|
||||||
|
return mu_star[:,None],var
|
||||||
|
"""
|
||||||
Kx = self.kern.K(self.Z, Xnew)
|
Kx = self.kern.K(self.Z, Xnew)
|
||||||
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
|
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
|
|
@ -199,5 +189,19 @@ class generalized_FITC(sparse_GP):
|
||||||
else:
|
else:
|
||||||
Kxx = self.kern.Kdiag(Xnew)
|
Kxx = self.kern.Kdiag(Xnew)
|
||||||
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
|
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
|
||||||
|
a = kjk
|
||||||
return mu,var[:,None]
|
return mu,var[:,None]
|
||||||
|
"""
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "homoscedastic fitc not implemented"
|
||||||
|
"""
|
||||||
|
Kx = self.kern.K(self.Z, Xnew)
|
||||||
|
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
|
||||||
|
if full_cov:
|
||||||
|
Kxx = self.kern.K(Xnew)
|
||||||
|
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
|
||||||
|
else:
|
||||||
|
Kxx = self.kern.Kdiag(Xnew)
|
||||||
|
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
|
||||||
|
return mu,var[:,None]
|
||||||
|
"""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue