mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-11 15:15:15 +02:00
Merge pull request #314 from zhenwendai/devel_pullrequest
Fix the issue of negative predicted variance of normal GP
This commit is contained in:
commit
5c53bc45e2
12 changed files with 268 additions and 149 deletions
|
|
@ -212,36 +212,9 @@ class GP(Model):
|
|||
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
|
||||
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
"""
|
||||
if kern is None:
|
||||
kern = self.kern
|
||||
|
||||
Kx = kern.K(self._predictive_variable, Xnew)
|
||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||
if len(mu.shape)==1:
|
||||
mu = mu.reshape(-1,1)
|
||||
if full_cov:
|
||||
Kxx = kern.K(Xnew)
|
||||
if self.posterior.woodbury_inv.ndim == 2:
|
||||
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
|
||||
elif self.posterior.woodbury_inv.ndim == 3: # Missing data
|
||||
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
|
||||
from ..util.linalg import mdot
|
||||
for i in range(var.shape[2]):
|
||||
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
|
||||
var = var
|
||||
else:
|
||||
Kxx = kern.Kdiag(Xnew)
|
||||
if self.posterior.woodbury_inv.ndim == 2:
|
||||
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
|
||||
elif self.posterior.woodbury_inv.ndim == 3: # Missing data
|
||||
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
|
||||
for i in range(var.shape[1]):
|
||||
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
|
||||
var = var
|
||||
#add in the mean function
|
||||
mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov)
|
||||
if self.mean_function is not None:
|
||||
mu += self.mean_function.f(Xnew)
|
||||
|
||||
return mu, var
|
||||
|
||||
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None):
|
||||
|
|
|
|||
|
|
@ -2,3 +2,4 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from paramz.transformations import *
|
||||
from paramz.transformations import __fixed__
|
||||
|
|
|
|||
|
|
@ -113,85 +113,3 @@ class SparseGP(GP):
|
|||
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
|
||||
self._Zgrad = self.Z.gradient.copy()
|
||||
|
||||
|
||||
def _raw_predict(self, Xnew, full_cov=False, kern=None):
|
||||
"""
|
||||
Make a prediction for the latent function values.
|
||||
|
||||
For certain inputs we give back a full_cov of shape NxN,
|
||||
if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of,
|
||||
we take only the diagonal elements across N.
|
||||
|
||||
For uncertain inputs, the SparseGP bound produces cannot predict the full covariance matrix full_cov for now.
|
||||
The implementation of that will follow. However, for each dimension the
|
||||
covariance changes, so if full_cov is False (standard), we return the variance
|
||||
for each dimension [NxD].
|
||||
"""
|
||||
|
||||
if kern is None: kern = self.kern
|
||||
|
||||
if not isinstance(Xnew, VariationalPosterior):
|
||||
# Kx = kern.K(self._predictive_variable, Xnew)
|
||||
# mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||
# if full_cov:
|
||||
# Kxx = kern.K(Xnew)
|
||||
# if self.posterior.woodbury_inv.ndim == 2:
|
||||
# var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
|
||||
# elif self.posterior.woodbury_inv.ndim == 3:
|
||||
# var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
|
||||
# for i in range(var.shape[2]):
|
||||
# var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
|
||||
# var = var
|
||||
# else:
|
||||
# Kxx = kern.Kdiag(Xnew)
|
||||
# if self.posterior.woodbury_inv.ndim == 2:
|
||||
# var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
|
||||
# elif self.posterior.woodbury_inv.ndim == 3:
|
||||
# var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
|
||||
# for i in range(var.shape[1]):
|
||||
# var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
|
||||
# var = var
|
||||
# #add in the mean function
|
||||
# if self.mean_function is not None:
|
||||
# mu += self.mean_function.f(Xnew)
|
||||
mu, var = super(SparseGP, self)._raw_predict(Xnew, full_cov, kern)
|
||||
else:
|
||||
psi0_star = kern.psi0(self._predictive_variable, Xnew)
|
||||
psi1_star = kern.psi1(self._predictive_variable, Xnew)
|
||||
psi2_star = kern.psi2n(self._predictive_variable, Xnew)
|
||||
la = self.posterior.woodbury_vector
|
||||
mu = np.dot(psi1_star, la) # TODO: dimensions?
|
||||
N,M,D = psi0_star.shape[0],psi1_star.shape[1], la.shape[1]
|
||||
|
||||
if full_cov:
|
||||
raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.")
|
||||
var = np.zeros((Xnew.shape[0], la.shape[1], la.shape[1]))
|
||||
di = np.diag_indices(la.shape[1])
|
||||
else:
|
||||
tmp = psi2_star - psi1_star[:,:,None]*psi1_star[:,None,:]
|
||||
var = (tmp.reshape(-1,M).dot(la).reshape(N,M,D)*la[None,:,:]).sum(1) + psi0_star[:,None]
|
||||
if self.posterior.woodbury_inv.ndim==2:
|
||||
var += -psi2_star.reshape(N,-1).dot(self.posterior.woodbury_inv.flat)[:,None]
|
||||
else:
|
||||
var += -psi2_star.reshape(N,-1).dot(self.posterior.woodbury_inv.reshape(-1,D))
|
||||
assert np.all(var>=-1e-5), "The predicted variance goes negative!: "+str(var)
|
||||
var = np.clip(var,1e-15,np.inf)
|
||||
|
||||
# for i in range(Xnew.shape[0]):
|
||||
# _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
|
||||
# psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var))
|
||||
# tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
|
||||
#
|
||||
# var_ = mdot(la.T, tmp, la)
|
||||
# p0 = psi0_star[i]
|
||||
# t = np.atleast_3d(self.posterior.woodbury_inv)
|
||||
# t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2)
|
||||
#
|
||||
# if full_cov:
|
||||
# var_[di] += p0
|
||||
# var_[di] += -t2
|
||||
# var[i] = var_
|
||||
# else:
|
||||
# var[i] = np.diag(var_)+p0-t2
|
||||
|
||||
return mu, var
|
||||
|
|
|
|||
|
|
@ -1,14 +1,13 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from .posterior import Posterior
|
||||
from .posterior import PosteriorExact as Posterior
|
||||
from ...util.linalg import pdinv, dpotrs, tdot
|
||||
from ...util import diag
|
||||
import numpy as np
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
|
||||
class ExactGaussianInference(LatentFunctionInference):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian.
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol
|
||||
from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol, dtrtrs, tdot
|
||||
from GPy.core.parameterization.variational import VariationalPosterior
|
||||
|
||||
class Posterior(object):
|
||||
"""
|
||||
|
|
@ -187,3 +188,85 @@ class Posterior(object):
|
|||
if self._K_chol is None:
|
||||
self._K_chol = jitchol(self._K)
|
||||
return self._K_chol
|
||||
|
||||
def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
|
||||
woodbury_vector = self.woodbury_vector
|
||||
woodbury_inv = self.woodbury_inv
|
||||
|
||||
if not isinstance(Xnew, VariationalPosterior):
|
||||
Kx = kern.K(pred_var, Xnew)
|
||||
mu = np.dot(Kx.T, woodbury_vector)
|
||||
if len(mu.shape)==1:
|
||||
mu = mu.reshape(-1,1)
|
||||
if full_cov:
|
||||
Kxx = kern.K(Xnew)
|
||||
if woodbury_inv.ndim == 2:
|
||||
var = Kxx - np.dot(Kx.T, np.dot(woodbury_inv, Kx))
|
||||
elif woodbury_inv.ndim == 3: # Missing data
|
||||
var = np.empty((Kxx.shape[0],Kxx.shape[1],woodbury_inv.shape[2]))
|
||||
from ...util.linalg import mdot
|
||||
for i in range(var.shape[2]):
|
||||
var[:, :, i] = (Kxx - mdot(Kx.T, woodbury_inv[:, :, i], Kx))
|
||||
var = var
|
||||
else:
|
||||
Kxx = kern.Kdiag(Xnew)
|
||||
if woodbury_inv.ndim == 2:
|
||||
var = (Kxx - np.sum(np.dot(woodbury_inv.T, Kx) * Kx, 0))[:,None]
|
||||
elif woodbury_inv.ndim == 3: # Missing data
|
||||
var = np.empty((Kxx.shape[0],woodbury_inv.shape[2]))
|
||||
for i in range(var.shape[1]):
|
||||
var[:, i] = (Kxx - (np.sum(np.dot(woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
|
||||
var = var
|
||||
else:
|
||||
psi0_star = kern.psi0(pred_var, Xnew)
|
||||
psi1_star = kern.psi1(pred_var, Xnew)
|
||||
psi2_star = kern.psi2n(pred_var, Xnew)
|
||||
la = woodbury_vector
|
||||
mu = np.dot(psi1_star, la) # TODO: dimensions?
|
||||
N,M,D = psi0_star.shape[0],psi1_star.shape[1], la.shape[1]
|
||||
|
||||
if full_cov:
|
||||
raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.")
|
||||
var = np.zeros((Xnew.shape[0], la.shape[1], la.shape[1]))
|
||||
di = np.diag_indices(la.shape[1])
|
||||
else:
|
||||
tmp = psi2_star - psi1_star[:,:,None]*psi1_star[:,None,:]
|
||||
var = (tmp.reshape(-1,M).dot(la).reshape(N,M,D)*la[None,:,:]).sum(1) + psi0_star[:,None]
|
||||
if woodbury_inv.ndim==2:
|
||||
var += -psi2_star.reshape(N,-1).dot(woodbury_inv.flat)[:,None]
|
||||
else:
|
||||
var += -psi2_star.reshape(N,-1).dot(woodbury_inv.reshape(-1,D))
|
||||
var = np.clip(var,1e-15,np.inf)
|
||||
return mu, var
|
||||
|
||||
class PosteriorExact(Posterior):
|
||||
|
||||
def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
|
||||
|
||||
Kx = kern.K(pred_var, Xnew)
|
||||
mu = np.dot(Kx.T, self.woodbury_vector)
|
||||
if len(mu.shape)==1:
|
||||
mu = mu.reshape(-1,1)
|
||||
if full_cov:
|
||||
Kxx = kern.K(Xnew)
|
||||
if self._woodbury_chol.ndim == 2:
|
||||
tmp = dtrtrs(self._woodbury_chol, Kx)[0]
|
||||
var = Kxx - tdot(tmp.T)
|
||||
elif self._woodbury_chol.ndim == 3: # Missing data
|
||||
var = np.empty((Kxx.shape[0],Kxx.shape[1],self._woodbury_chol.shape[2]))
|
||||
for i in range(var.shape[2]):
|
||||
tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0]
|
||||
var[:, :, i] = (Kxx - tdot(tmp.T))
|
||||
var = var
|
||||
else:
|
||||
Kxx = kern.Kdiag(Xnew)
|
||||
if self._woodbury_chol.ndim == 2:
|
||||
tmp = dtrtrs(self._woodbury_chol, Kx)[0]
|
||||
var = (Kxx - np.square(tmp).sum(0))[:,None]
|
||||
elif self._woodbury_chol.ndim == 3: # Missing data
|
||||
var = np.empty((Kxx.shape[0],self._woodbury_chol.shape[2]))
|
||||
for i in range(var.shape[1]):
|
||||
tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0]
|
||||
var[:, i] = (Kxx - np.square(tmp).sum(0))
|
||||
var = var
|
||||
return mu, var
|
||||
|
|
|
|||
|
|
@ -37,16 +37,14 @@ class Metropolis_Hastings(object):
|
|||
|
||||
def sample(self, Ntotal=10000, Nburn=1000, Nthin=10, tune=True, tune_throughout=False, tune_interval=400):
|
||||
current = self.model.optimizer_array
|
||||
fcurrent = self.model.log_likelihood() + self.model.log_prior() + \
|
||||
self.model._log_det_jacobian()
|
||||
fcurrent = self.model.log_likelihood() + self.model.log_prior()
|
||||
accepted = np.zeros(Ntotal,dtype=np.bool)
|
||||
for it in range(Ntotal):
|
||||
print("sample %d of %d\r"%(it,Ntotal),end="\t")
|
||||
print("sample %d of %d\r"%(it+1,Ntotal),end="")
|
||||
sys.stdout.flush()
|
||||
prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
|
||||
self.model.optimizer_array = prop
|
||||
fprop = self.model.log_likelihood() + self.model.log_prior() + \
|
||||
self.model._log_det_jacobian()
|
||||
fprop = self.model.log_likelihood() + self.model.log_prior()
|
||||
|
||||
if fprop>fcurrent:#sample accepted, going 'uphill'
|
||||
accepted[it] = True
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ The module for psi-statistics for RBF kernel
|
|||
import numpy as np
|
||||
from paramz.caching import Cache_this
|
||||
from . import PSICOMP_RBF
|
||||
from ....util import gpu_init
|
||||
|
||||
gpu_code = """
|
||||
// define THREADNUM
|
||||
|
|
@ -238,8 +237,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
|
|||
self.fall_back = PSICOMP_RBF()
|
||||
|
||||
from pycuda.compiler import SourceModule
|
||||
from ....util.gpu_init import initGPU
|
||||
initGPU()
|
||||
import GPy.util.gpu_init
|
||||
|
||||
self.GPU_direct = GPU_direct
|
||||
self.gpuCache = None
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import numpy as np
|
|||
from paramz.caching import Cache_this
|
||||
from . import PSICOMP_RBF
|
||||
|
||||
|
||||
gpu_code = """
|
||||
// define THREADNUM
|
||||
|
||||
|
|
@ -287,8 +286,7 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
|
|||
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
|
||||
|
||||
from pycuda.compiler import SourceModule
|
||||
from ....util.gpu_init import initGPU
|
||||
initGPU()
|
||||
import GPy.util.gpu_init
|
||||
|
||||
self.GPU_direct = GPU_direct
|
||||
self.gpuCache = None
|
||||
|
|
|
|||
|
|
@ -94,6 +94,86 @@ class IBPPrior(VariationalPrior):
|
|||
variational_posterior.tau.gradient[:,0] = -((tau[:,0]-gamma-ad)*polygamma(1,tau[:,0])+common)
|
||||
variational_posterior.tau.gradient[:,1] = -((tau[:,1]+gamma-2)*polygamma(1,tau[:,1])+common)
|
||||
|
||||
class SLVMPosterior(SpikeAndSlabPosterior):
|
||||
'''
|
||||
The SpikeAndSlab distribution for variational approximations.
|
||||
'''
|
||||
def __init__(self, means, variances, binary_prob, tau=None, name='latent space'):
|
||||
"""
|
||||
binary_prob : the probability of the distribution on the slab part.
|
||||
"""
|
||||
from paramz.transformations import Logexp
|
||||
super(SLVMPosterior, self).__init__(means, variances, binary_prob, group_spike=False, name=name)
|
||||
self.tau = Param("tau_", np.ones((self.gamma.shape[1],2)), Logexp())
|
||||
self.link_parameter(self.tau)
|
||||
|
||||
def set_gradients(self, grad):
|
||||
self.mean.gradient, self.variance.gradient, self.gamma.gradient, self.tau.gradient = grad
|
||||
|
||||
def __getitem__(self, s):
|
||||
if isinstance(s, (int, slice, tuple, list, np.ndarray)):
|
||||
import copy
|
||||
n = self.__new__(self.__class__, self.name)
|
||||
dc = self.__dict__.copy()
|
||||
dc['mean'] = self.mean[s]
|
||||
dc['variance'] = self.variance[s]
|
||||
dc['binary_prob'] = self.binary_prob[s]
|
||||
dc['tau'] = self.tau
|
||||
dc['parameters'] = copy.copy(self.parameters)
|
||||
n.__dict__.update(dc)
|
||||
n.parameters[dc['mean']._parent_index_] = dc['mean']
|
||||
n.parameters[dc['variance']._parent_index_] = dc['variance']
|
||||
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
|
||||
n.parameters[dc['tau']._parent_index_] = dc['tau']
|
||||
n._gradient_array_ = None
|
||||
oversize = self.size - self.mean.size - self.variance.size - self.gamma.size - self.tau.size
|
||||
n.size = n.mean.size + n.variance.size + n.gamma.size+ n.tau.size + oversize
|
||||
n.ndim = n.mean.ndim
|
||||
n.shape = n.mean.shape
|
||||
n.num_data = n.mean.shape[0]
|
||||
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
|
||||
return n
|
||||
else:
|
||||
return super(IBPPosterior, self).__getitem__(s)
|
||||
|
||||
class SLVMPrior(VariationalPrior):
|
||||
def __init__(self, input_dim, alpha =1., beta=1., Z=None, name='SLVMPrior', **kw):
|
||||
super(SLVMPrior, self).__init__(name=name, **kw)
|
||||
self.input_dim = input_dim
|
||||
self.variance = 1.
|
||||
self.alpha = alpha
|
||||
self.beta = beta
|
||||
self.Z = Z
|
||||
if Z is not None:
|
||||
assert np.all(np.unique(Z)==np.array([0,1]))
|
||||
|
||||
def KL_divergence(self, variational_posterior):
|
||||
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma.values, variational_posterior.tau.values
|
||||
|
||||
var_mean = np.square(mu)/self.variance
|
||||
var_S = (S/self.variance - np.log(S))
|
||||
part1 = (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
|
||||
|
||||
from scipy.special import betaln,digamma
|
||||
part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + betaln(self.alpha,self.beta)*self.input_dim \
|
||||
-betaln(tau[:,0], tau[:,1]).sum() + ((tau[:,0]-(gamma*self.Z).sum(0)-self.alpha)*digamma(tau[:,0])).sum() + \
|
||||
((tau[:,1]-((1-gamma)*self.Z).sum(0)-self.beta)*digamma(tau[:,1])).sum() + ((self.Z.sum(0)+self.alpha+self.beta-tau[:,0]-tau[:,1])*digamma(tau.sum(axis=1))).sum()
|
||||
|
||||
return part1+part2
|
||||
|
||||
def update_gradients_KL(self, variational_posterior):
|
||||
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma.values, variational_posterior.tau.values
|
||||
|
||||
variational_posterior.mean.gradient -= gamma*mu/self.variance
|
||||
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * gamma /2.
|
||||
from scipy.special import digamma,polygamma
|
||||
dgamma = np.log(gamma/(1.-gamma))+ (digamma(tau[:,1])-digamma(tau[:,0]))*self.Z
|
||||
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||
common = (self.Z.sum(0)+self.alpha+self.beta-tau[:,0]-tau[:,1])*polygamma(1,tau.sum(axis=1))
|
||||
variational_posterior.tau.gradient[:,0] = -((tau[:,0]-(gamma*self.Z).sum(0)-self.alpha)*polygamma(1,tau[:,0])+common)
|
||||
variational_posterior.tau.gradient[:,1] = -((tau[:,1]-((1-gamma)*self.Z).sum(0)-self.beta)*polygamma(1,tau[:,1])+common)
|
||||
|
||||
|
||||
class SSGPLVM(SparseGP_MPI):
|
||||
"""
|
||||
Spike-and-Slab Gaussian Process Latent Variable Model
|
||||
|
|
@ -107,7 +187,7 @@ class SSGPLVM(SparseGP_MPI):
|
|||
|
||||
"""
|
||||
def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, IBP=False, alpha=2., tau=None, mpi_comm=None, pi=None, learnPi=False,normalizer=False, sharedX=False, variational_prior=None,**kwargs):
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, IBP=False,SLVM=False, alpha=2., beta=2., connM=None, tau=None, mpi_comm=None, pi=None, learnPi=False,normalizer=False, sharedX=False, variational_prior=None,**kwargs):
|
||||
|
||||
self.group_spike = group_spike
|
||||
self.init = init
|
||||
|
|
@ -152,6 +232,9 @@ class SSGPLVM(SparseGP_MPI):
|
|||
if IBP:
|
||||
self.variational_prior = IBPPrior(input_dim=input_dim, alpha=alpha) if variational_prior is None else variational_prior
|
||||
X = IBPPosterior(X, X_variance, gamma, tau=tau,sharedX=sharedX)
|
||||
elif SLVM:
|
||||
self.variational_prior = SLVMPrior(input_dim=input_dim, alpha=alpha, beta=beta, Z=connM) if variational_prior is None else variational_prior
|
||||
X = SLVMPosterior(X, X_variance, gamma, tau=tau)
|
||||
else:
|
||||
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) if variational_prior is None else variational_prior
|
||||
X = SpikeAndSlabPosterior(X, X_variance, gamma, group_spike=group_spike,sharedX=sharedX)
|
||||
|
|
|
|||
|
|
@ -50,7 +50,6 @@ class InferenceXTestCase(unittest.TestCase):
|
|||
x, mi = m.infer_newX(m.Y, optimize=True)
|
||||
np.testing.assert_array_almost_equal(m.X, mi.X, decimal=2)
|
||||
|
||||
|
||||
class HMCSamplerTest(unittest.TestCase):
|
||||
|
||||
def test_sampling(self):
|
||||
|
|
@ -65,6 +64,21 @@ class HMCSamplerTest(unittest.TestCase):
|
|||
|
||||
hmc = GPy.inference.mcmc.HMC(m,stepsize=1e-2)
|
||||
s = hmc.sample(num_samples=3)
|
||||
|
||||
class MCMCSamplerTest(unittest.TestCase):
|
||||
|
||||
def test_sampling(self):
|
||||
np.random.seed(1)
|
||||
x = np.linspace(0.,2*np.pi,100)[:,None]
|
||||
y = -np.cos(x)+np.random.randn(*x.shape)*0.3+1
|
||||
|
||||
m = GPy.models.GPRegression(x,y)
|
||||
m.kern.lengthscale.set_prior(GPy.priors.Gamma.from_EV(1.,10.))
|
||||
m.kern.variance.set_prior(GPy.priors.Gamma.from_EV(1.,10.))
|
||||
m.likelihood.variance.set_prior(GPy.priors.Gamma.from_EV(1.,10.))
|
||||
|
||||
mcmc = GPy.inference.mcmc.Metropolis_Hastings(m)
|
||||
mcmc.sample(Ntotal=100, Nburn=10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -22,6 +22,44 @@ class MiscTests(unittest.TestCase):
|
|||
self.assertTrue(m.checkgrad())
|
||||
m.predict(m.X)
|
||||
|
||||
def test_raw_predict_numerical_stability(self):
|
||||
"""
|
||||
Test whether the predicted variance of normal GP goes negative under numerical unstable situation.
|
||||
Thanks simbartonels@github for reporting the bug and providing the following example.
|
||||
"""
|
||||
|
||||
# set seed for reproducability
|
||||
np.random.seed(3)
|
||||
# Definition of the Branin test function
|
||||
def branin(X):
|
||||
y = (X[:,1]-5.1/(4*np.pi**2)*X[:,0]**2+5*X[:,0]/np.pi-6)**2
|
||||
y += 10*(1-1/(8*np.pi))*np.cos(X[:,0])+10
|
||||
return(y)
|
||||
# Training set defined as a 5*5 grid:
|
||||
xg1 = np.linspace(-5,10,5)
|
||||
xg2 = np.linspace(0,15,5)
|
||||
X = np.zeros((xg1.size * xg2.size,2))
|
||||
for i,x1 in enumerate(xg1):
|
||||
for j,x2 in enumerate(xg2):
|
||||
X[i+xg1.size*j,:] = [x1,x2]
|
||||
Y = branin(X)[:,None]
|
||||
# Fit a GP
|
||||
# Create an exponentiated quadratic plus bias covariance function
|
||||
k = GPy.kern.RBF(input_dim=2, ARD = True)
|
||||
# Build a GP model
|
||||
m = GPy.models.GPRegression(X,Y,k)
|
||||
# fix the noise variance
|
||||
m.likelihood.variance.fix(1e-5)
|
||||
# Randomize the model and optimize
|
||||
m.randomize()
|
||||
m.optimize()
|
||||
# Compute the mean of model prediction on 1e5 Monte Carlo samples
|
||||
Xp = np.random.uniform(size=(1e5,2))
|
||||
Xp[:,0] = Xp[:,0]*15-5
|
||||
Xp[:,1] = Xp[:,1]*15
|
||||
_, var = m.predict(Xp)
|
||||
self.assertTrue(np.all(var>=0.))
|
||||
|
||||
def test_raw_predict(self):
|
||||
k = GPy.kern.RBF(1)
|
||||
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
|
||||
|
|
@ -119,6 +157,7 @@ class MiscTests(unittest.TestCase):
|
|||
# Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
|
||||
Kinv = m.posterior.woodbury_inv
|
||||
K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new))
|
||||
K_hat = np.clip(K_hat, 1e-15, np.inf)
|
||||
|
||||
mu, covar = m._raw_predict(self.X_new, full_cov=True)
|
||||
self.assertEquals(mu.shape, (self.N_new, self.D))
|
||||
|
|
@ -677,7 +716,19 @@ class GradientTests(np.testing.TestCase):
|
|||
lik = GPy.likelihoods.Gaussian()
|
||||
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kernel=kern, likelihood=lik)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
|
||||
def test_ssgplvm(self):
|
||||
from GPy import kern
|
||||
from GPy.models import SSGPLVM
|
||||
from GPy.examples.dimensionality_reduction import _simulate_matern
|
||||
|
||||
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9
|
||||
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False)
|
||||
Y = Ylist[0]
|
||||
k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||
# k = kern.RBF(Q, ARD=True, lengthscale=10.)
|
||||
m = SSGPLVM(Y, Q, init="rand", num_inducing=num_inducing, kernel=k, group_spike=True)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Running unit tests, please be (very) patient...")
|
||||
|
|
|
|||
|
|
@ -11,31 +11,34 @@ gpu_context = None
|
|||
MPI_enabled = False
|
||||
|
||||
try:
|
||||
from mpi4py import MPI
|
||||
MPI_enabled = True
|
||||
import pycuda.autoinit
|
||||
gpu_initialized = True
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
def initGPU():
|
||||
try:
|
||||
if MPI_enabled and MPI.COMM_WORLD.size>1:
|
||||
from .parallel import get_id_within_node
|
||||
gpuid = get_id_within_node()
|
||||
import pycuda.driver
|
||||
pycuda.driver.init()
|
||||
if gpuid>=pycuda.driver.Device.count():
|
||||
print('['+MPI.Get_processor_name()+'] more processes than the GPU numbers!')
|
||||
raise
|
||||
gpu_device = pycuda.driver.Device(gpuid)
|
||||
gpu_context = gpu_device.make_context()
|
||||
gpu_initialized = True
|
||||
else:
|
||||
import pycuda.autoinit
|
||||
gpu_initialized = True
|
||||
except:
|
||||
pass
|
||||
# def initGPU():
|
||||
# try:
|
||||
# from mpi4py import MPI
|
||||
# MPI_enabled = True
|
||||
# except:
|
||||
# pass
|
||||
# try:
|
||||
# if MPI_enabled and MPI.COMM_WORLD.size>1:
|
||||
# from .parallel import get_id_within_node
|
||||
# gpuid = get_id_within_node()
|
||||
# import pycuda.driver
|
||||
# pycuda.driver.init()
|
||||
# if gpuid>=pycuda.driver.Device.count():
|
||||
# print('['+MPI.Get_processor_name()+'] more processes than the GPU numbers!')
|
||||
# raise
|
||||
# gpu_device = pycuda.driver.Device(gpuid)
|
||||
# gpu_context = gpu_device.make_context()
|
||||
# gpu_initialized = True
|
||||
# else:
|
||||
# import pycuda.autoinit
|
||||
# gpu_initialized = True
|
||||
# except:
|
||||
# pass
|
||||
|
||||
def closeGPU():
|
||||
if gpu_context is not None:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue