From 080e4ebd60c8f0b9caaf163983c952f00bcc21f5 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Fri, 11 Dec 2015 10:46:46 +0000 Subject: [PATCH 01/11] add the import of transformation __fixed__ --- GPy/core/parameterization/transformations.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPy/core/parameterization/transformations.py b/GPy/core/parameterization/transformations.py index 1799a06d..936c7a64 100644 --- a/GPy/core/parameterization/transformations.py +++ b/GPy/core/parameterization/transformations.py @@ -1,4 +1,5 @@ # Copyright (c) 2014, Max Zwiessele, James Hensman # Licensed under the BSD 3-clause license (see LICENSE.txt) -from paramz.transformations import * \ No newline at end of file +from paramz.transformations import * +from paramz.transformations import __fixed__ \ No newline at end of file From 60b2a57ea81fb72c7d8cfbe22693d0f6a14c1b5d Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Thu, 14 Jan 2016 09:41:59 +0000 Subject: [PATCH 02/11] implement slvm --- GPy/models/ss_gplvm.py | 85 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py index 2c413ecd..c8ff1664 100644 --- a/GPy/models/ss_gplvm.py +++ b/GPy/models/ss_gplvm.py @@ -94,6 +94,86 @@ class IBPPrior(VariationalPrior): variational_posterior.tau.gradient[:,0] = -((tau[:,0]-gamma-ad)*polygamma(1,tau[:,0])+common) variational_posterior.tau.gradient[:,1] = -((tau[:,1]+gamma-2)*polygamma(1,tau[:,1])+common) +class SLVMPosterior(SpikeAndSlabPosterior): + ''' + The SpikeAndSlab distribution for variational approximations. + ''' + def __init__(self, means, variances, binary_prob, tau=None, name='latent space'): + """ + binary_prob : the probability of the distribution on the slab part. + """ + from paramz.transformations import Logexp + super(SLVMPosterior, self).__init__(means, variances, binary_prob, group_spike=False, name=name) + self.tau = Param("tau_", np.ones((self.gamma.shape[1],2)), Logexp()) + self.link_parameter(self.tau) + + def set_gradients(self, grad): + self.mean.gradient, self.variance.gradient, self.gamma.gradient, self.tau.gradient = grad + + def __getitem__(self, s): + if isinstance(s, (int, slice, tuple, list, np.ndarray)): + import copy + n = self.__new__(self.__class__, self.name) + dc = self.__dict__.copy() + dc['mean'] = self.mean[s] + dc['variance'] = self.variance[s] + dc['binary_prob'] = self.binary_prob[s] + dc['tau'] = self.tau + dc['parameters'] = copy.copy(self.parameters) + n.__dict__.update(dc) + n.parameters[dc['mean']._parent_index_] = dc['mean'] + n.parameters[dc['variance']._parent_index_] = dc['variance'] + n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob'] + n.parameters[dc['tau']._parent_index_] = dc['tau'] + n._gradient_array_ = None + oversize = self.size - self.mean.size - self.variance.size - self.gamma.size - self.tau.size + n.size = n.mean.size + n.variance.size + n.gamma.size+ n.tau.size + oversize + n.ndim = n.mean.ndim + n.shape = n.mean.shape + n.num_data = n.mean.shape[0] + n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1 + return n + else: + return super(IBPPosterior, self).__getitem__(s) + +class SLVMPrior(VariationalPrior): + def __init__(self, input_dim, alpha =1., beta=1., Z=None, name='SLVMPrior', **kw): + super(SLVMPrior, self).__init__(name=name, **kw) + self.input_dim = input_dim + self.variance = 1. + self.alpha = alpha + self.beta = beta + self.Z = Z + if Z is not None: + assert np.all(np.unique(Z)==np.array([0,1])) + + def KL_divergence(self, variational_posterior): + mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma.values, variational_posterior.tau.values + + var_mean = np.square(mu)/self.variance + var_S = (S/self.variance - np.log(S)) + part1 = (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2. + + from scipy.special import betaln,digamma + part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + betaln(self.alpha,self.beta)*self.input_dim \ + -betaln(tau[:,0], tau[:,1]).sum() + ((tau[:,0]-(gamma*self.Z).sum(0)-self.alpha)*digamma(tau[:,0])).sum() + \ + ((tau[:,1]-((1-gamma)*self.Z).sum(0)-self.beta)*digamma(tau[:,1])).sum() + ((self.Z.sum(0)+self.alpha+self.beta-tau[:,0]-tau[:,1])*digamma(tau.sum(axis=1))).sum() + + return part1+part2 + + def update_gradients_KL(self, variational_posterior): + mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma.values, variational_posterior.tau.values + + variational_posterior.mean.gradient -= gamma*mu/self.variance + variational_posterior.variance.gradient -= (1./self.variance - 1./S) * gamma /2. + from scipy.special import digamma,polygamma + dgamma = np.log(gamma/(1.-gamma))+ (digamma(tau[:,1])-digamma(tau[:,0]))*self.Z + variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2. + common = (self.Z.sum(0)+self.alpha+self.beta-tau[:,0]-tau[:,1])*polygamma(1,tau.sum(axis=1)) + variational_posterior.tau.gradient[:,0] = -((tau[:,0]-(gamma*self.Z).sum(0)-self.alpha)*polygamma(1,tau[:,0])+common) + variational_posterior.tau.gradient[:,1] = -((tau[:,1]-((1-gamma)*self.Z).sum(0)-self.beta)*polygamma(1,tau[:,1])+common) + + class SSGPLVM(SparseGP_MPI): """ Spike-and-Slab Gaussian Process Latent Variable Model @@ -107,7 +187,7 @@ class SSGPLVM(SparseGP_MPI): """ def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10, - Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, IBP=False, alpha=2., tau=None, mpi_comm=None, pi=None, learnPi=False,normalizer=False, sharedX=False, variational_prior=None,**kwargs): + Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, IBP=False,SLVM=False, alpha=2., beta=2., connM=None, tau=None, mpi_comm=None, pi=None, learnPi=False,normalizer=False, sharedX=False, variational_prior=None,**kwargs): self.group_spike = group_spike self.init = init @@ -152,6 +232,9 @@ class SSGPLVM(SparseGP_MPI): if IBP: self.variational_prior = IBPPrior(input_dim=input_dim, alpha=alpha) if variational_prior is None else variational_prior X = IBPPosterior(X, X_variance, gamma, tau=tau,sharedX=sharedX) + elif SLVM: + self.variational_prior = SLVMPrior(input_dim=input_dim, alpha=alpha, beta=beta, Z=connM) if variational_prior is None else variational_prior + X = SLVMPosterior(X, X_variance, gamma, tau=tau) else: self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) if variational_prior is None else variational_prior X = SpikeAndSlabPosterior(X, X_variance, gamma, group_spike=group_spike,sharedX=sharedX) From 5f417565fb900176fe275eaefc81f0f91bf06e59 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Thu, 14 Jan 2016 15:56:48 +0000 Subject: [PATCH 03/11] slvm gamma mean-field --- GPy/models/ss_gplvm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py index c8ff1664..4bc9a173 100644 --- a/GPy/models/ss_gplvm.py +++ b/GPy/models/ss_gplvm.py @@ -104,7 +104,7 @@ class SLVMPosterior(SpikeAndSlabPosterior): """ from paramz.transformations import Logexp super(SLVMPosterior, self).__init__(means, variances, binary_prob, group_spike=False, name=name) - self.tau = Param("tau_", np.ones((self.gamma.shape[1],2)), Logexp()) + self.tau = Param("tau_", np.ones((self.gamma.shape[1],2))*2, Logexp()) self.link_parameter(self.tau) def set_gradients(self, grad): @@ -152,7 +152,7 @@ class SLVMPrior(VariationalPrior): var_mean = np.square(mu)/self.variance var_S = (S/self.variance - np.log(S)) - part1 = (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2. + part1 = ((np.log(self.variance)-1. +var_mean + var_S)).sum()/2. from scipy.special import betaln,digamma part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + betaln(self.alpha,self.beta)*self.input_dim \ @@ -164,11 +164,11 @@ class SLVMPrior(VariationalPrior): def update_gradients_KL(self, variational_posterior): mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma.values, variational_posterior.tau.values - variational_posterior.mean.gradient -= gamma*mu/self.variance - variational_posterior.variance.gradient -= (1./self.variance - 1./S) * gamma /2. + variational_posterior.mean.gradient -= mu/self.variance + variational_posterior.variance.gradient -= (1./self.variance - 1./S) /2. from scipy.special import digamma,polygamma dgamma = np.log(gamma/(1.-gamma))+ (digamma(tau[:,1])-digamma(tau[:,0]))*self.Z - variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2. + variational_posterior.binary_prob.gradient -= dgamma common = (self.Z.sum(0)+self.alpha+self.beta-tau[:,0]-tau[:,1])*polygamma(1,tau.sum(axis=1)) variational_posterior.tau.gradient[:,0] = -((tau[:,0]-(gamma*self.Z).sum(0)-self.alpha)*polygamma(1,tau[:,0])+common) variational_posterior.tau.gradient[:,1] = -((tau[:,1]-((1-gamma)*self.Z).sum(0)-self.beta)*polygamma(1,tau[:,1])+common) From 8b279175c5894b02c11f694a35e57490ae69acfa Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Thu, 21 Jan 2016 11:22:57 +0000 Subject: [PATCH 04/11] fix the issue of negative prediction variance of normal GP --- GPy/core/gp.py | 6 +++ GPy/core/sparse_gp.py | 7 +++- .../exact_gaussian_inference.py | 3 +- .../latent_function_inference/posterior.py | 34 ++++++++++++++++- GPy/testing/inference_tests.py | 1 - GPy/testing/model_tests.py | 38 +++++++++++++++++++ 6 files changed, 84 insertions(+), 5 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index ae710355..ac08677d 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -212,6 +212,12 @@ class GP(Model): = N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*} \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance} """ + if hasattr(self.posterior, '_raw_predict'): + mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov) + if self.mean_function is not None: + mu += self.mean_function.f(Xnew) + return mu, var + if kern is None: kern = self.kern diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index d71eecc3..a0f50ce9 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -126,7 +126,12 @@ class SparseGP(GP): The implementation of that will follow. However, for each dimension the covariance changes, so if full_cov is False (standard), we return the variance for each dimension [NxD]. - """ + """ + if hasattr(self.posterior, '_raw_predict'): + mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov) + if self.mean_function is not None: + mu += self.mean_function.f(Xnew) + return mu, var if kern is None: kern = self.kern diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py index 0ab85586..74f66fe6 100644 --- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py +++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py @@ -1,14 +1,13 @@ # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from .posterior import Posterior +from .posterior import PosteriorExact as Posterior from ...util.linalg import pdinv, dpotrs, tdot from ...util import diag import numpy as np from . import LatentFunctionInference log_2_pi = np.log(2*np.pi) - class ExactGaussianInference(LatentFunctionInference): """ An object for inference when the likelihood is Gaussian. diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py index fbd72f57..c9b8f492 100644 --- a/GPy/inference/latent_function_inference/posterior.py +++ b/GPy/inference/latent_function_inference/posterior.py @@ -2,7 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol +from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol, dtrtrs, tdot class Posterior(object): """ @@ -187,3 +187,35 @@ class Posterior(object): if self._K_chol is None: self._K_chol = jitchol(self._K) return self._K_chol + +class PosteriorExact(Posterior): + + def _raw_predict(self, kern, Xnew, pred_var, full_cov=False): + + Kx = kern.K(pred_var, Xnew) + mu = np.dot(Kx.T, self.woodbury_vector) + if len(mu.shape)==1: + mu = mu.reshape(-1,1) + if full_cov: + Kxx = kern.K(Xnew) + if self._woodbury_chol.ndim == 2: + tmp = dtrtrs(self._woodbury_chol, Kx)[0] + var = Kxx - tdot(tmp.T) + elif self._woodbury_chol.ndim == 3: # Missing data + var = np.empty((Kxx.shape[0],Kxx.shape[1],self._woodbury_chol.shape[2])) + for i in range(var.shape[2]): + tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0] + var[:, :, i] = (Kxx - tdot(tmp.T)) + var = var + else: + Kxx = kern.Kdiag(Xnew) + if self._woodbury_chol.ndim == 2: + tmp = dtrtrs(self._woodbury_chol, Kx)[0] + var = (Kxx - np.square(tmp).sum(0))[:,None] + elif self._woodbury_chol.ndim == 3: # Missing data + var = np.empty((Kxx.shape[0],self._woodbury_chol.shape[2])) + for i in range(var.shape[1]): + tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0] + var[:, i] = (Kxx - np.square(tmp).sum(0)) + var = var + return mu, var diff --git a/GPy/testing/inference_tests.py b/GPy/testing/inference_tests.py index 7a091589..ebda08c6 100644 --- a/GPy/testing/inference_tests.py +++ b/GPy/testing/inference_tests.py @@ -50,6 +50,5 @@ class InferenceXTestCase(unittest.TestCase): x, mi = m.infer_newX(m.Y, optimize=True) np.testing.assert_array_almost_equal(m.X, mi.X, decimal=2) - if __name__ == "__main__": unittest.main() diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 1212d746..009b4848 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -22,6 +22,44 @@ class MiscTests(unittest.TestCase): self.assertTrue(m.checkgrad()) m.predict(m.X) + def test_raw_predict_numerical_stability(self): + """ + Test whether the predicted variance of normal GP goes negative under numerical unstable situation. + Thanks simbartonels@github for reporting the bug and providing the following example. + """ + + # set seed for reproducability + np.random.seed(3) + # Definition of the Branin test function + def branin(X): + y = (X[:,1]-5.1/(4*np.pi**2)*X[:,0]**2+5*X[:,0]/np.pi-6)**2 + y += 10*(1-1/(8*np.pi))*np.cos(X[:,0])+10 + return(y) + # Training set defined as a 5*5 grid: + xg1 = np.linspace(-5,10,5) + xg2 = np.linspace(0,15,5) + X = np.zeros((xg1.size * xg2.size,2)) + for i,x1 in enumerate(xg1): + for j,x2 in enumerate(xg2): + X[i+xg1.size*j,:] = [x1,x2] + Y = branin(X)[:,None] + # Fit a GP + # Create an exponentiated quadratic plus bias covariance function + k = GPy.kern.RBF(input_dim=2, ARD = True) + # Build a GP model + m = GPy.models.GPRegression(X,Y,k) + # fix the noise variance + m.likelihood.variance.fix(1e-5) + # Randomize the model and optimize + m.randomize() + m.optimize() + # Compute the mean of model prediction on 1e5 Monte Carlo samples + Xp = np.random.uniform(size=(1e5,2)) + Xp[:,0] = Xp[:,0]*15-5 + Xp[:,1] = Xp[:,1]*15 + _, var = m.predict(Xp) + self.assertTrue(np.all(var>=0.)) + def test_raw_predict(self): k = GPy.kern.RBF(1) m = GPy.models.GPRegression(self.X, self.Y, kernel=k) From 0dd52981d0398a4e325b733611510e54818698ca Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Tue, 9 Feb 2016 16:48:23 +0000 Subject: [PATCH 05/11] get rid of mpi4py import --- GPy/util/gpu_init.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/GPy/util/gpu_init.py b/GPy/util/gpu_init.py index 0c496db3..94763d8b 100644 --- a/GPy/util/gpu_init.py +++ b/GPy/util/gpu_init.py @@ -10,15 +10,12 @@ gpu_device = None gpu_context = None MPI_enabled = False -try: - from mpi4py import MPI - MPI_enabled = True -except: - pass - - - def initGPU(): + try: + from mpi4py import MPI + MPI_enabled = True + except: + pass try: if MPI_enabled and MPI.COMM_WORLD.size>1: from .parallel import get_id_within_node From a77a6755495fe3871ca9aa53dd107554f26cc01b Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Tue, 9 Feb 2016 17:22:38 +0000 Subject: [PATCH 06/11] fix gpu initialziation --- GPy/kern/src/psi_comp/rbf_psi_gpucomp.py | 3 -- GPy/kern/src/psi_comp/ssrbf_psi_gpucomp.py | 2 - GPy/util/gpu_init.py | 52 ++++++++++++---------- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/GPy/kern/src/psi_comp/rbf_psi_gpucomp.py b/GPy/kern/src/psi_comp/rbf_psi_gpucomp.py index baab83ec..e9268a3e 100644 --- a/GPy/kern/src/psi_comp/rbf_psi_gpucomp.py +++ b/GPy/kern/src/psi_comp/rbf_psi_gpucomp.py @@ -5,7 +5,6 @@ The module for psi-statistics for RBF kernel import numpy as np from paramz.caching import Cache_this from . import PSICOMP_RBF -from ....util import gpu_init gpu_code = """ // define THREADNUM @@ -238,8 +237,6 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF): self.fall_back = PSICOMP_RBF() from pycuda.compiler import SourceModule - from ....util.gpu_init import initGPU - initGPU() self.GPU_direct = GPU_direct self.gpuCache = None diff --git a/GPy/kern/src/psi_comp/ssrbf_psi_gpucomp.py b/GPy/kern/src/psi_comp/ssrbf_psi_gpucomp.py index 844f944e..62876785 100644 --- a/GPy/kern/src/psi_comp/ssrbf_psi_gpucomp.py +++ b/GPy/kern/src/psi_comp/ssrbf_psi_gpucomp.py @@ -287,8 +287,6 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF): def __init__(self, threadnum=128, blocknum=15, GPU_direct=False): from pycuda.compiler import SourceModule - from ....util.gpu_init import initGPU - initGPU() self.GPU_direct = GPU_direct self.gpuCache = None diff --git a/GPy/util/gpu_init.py b/GPy/util/gpu_init.py index 94763d8b..19339b91 100644 --- a/GPy/util/gpu_init.py +++ b/GPy/util/gpu_init.py @@ -10,29 +10,35 @@ gpu_device = None gpu_context = None MPI_enabled = False -def initGPU(): - try: - from mpi4py import MPI - MPI_enabled = True - except: - pass - try: - if MPI_enabled and MPI.COMM_WORLD.size>1: - from .parallel import get_id_within_node - gpuid = get_id_within_node() - import pycuda.driver - pycuda.driver.init() - if gpuid>=pycuda.driver.Device.count(): - print('['+MPI.Get_processor_name()+'] more processes than the GPU numbers!') - raise - gpu_device = pycuda.driver.Device(gpuid) - gpu_context = gpu_device.make_context() - gpu_initialized = True - else: - import pycuda.autoinit - gpu_initialized = True - except: - pass +try: + import pycuda.autoinit + gpu_initialized = True +except: + pass + +# def initGPU(): +# try: +# from mpi4py import MPI +# MPI_enabled = True +# except: +# pass +# try: +# if MPI_enabled and MPI.COMM_WORLD.size>1: +# from .parallel import get_id_within_node +# gpuid = get_id_within_node() +# import pycuda.driver +# pycuda.driver.init() +# if gpuid>=pycuda.driver.Device.count(): +# print('['+MPI.Get_processor_name()+'] more processes than the GPU numbers!') +# raise +# gpu_device = pycuda.driver.Device(gpuid) +# gpu_context = gpu_device.make_context() +# gpu_initialized = True +# else: +# import pycuda.autoinit +# gpu_initialized = True +# except: +# pass def closeGPU(): if gpu_context is not None: From 3917d9cdbafb4c3a793a27709fb6dab94e6ed2e4 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Tue, 9 Feb 2016 17:25:25 +0000 Subject: [PATCH 07/11] fix gpu initialziation --- GPy/kern/src/psi_comp/rbf_psi_gpucomp.py | 1 + GPy/kern/src/psi_comp/ssrbf_psi_gpucomp.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/GPy/kern/src/psi_comp/rbf_psi_gpucomp.py b/GPy/kern/src/psi_comp/rbf_psi_gpucomp.py index e9268a3e..4eda1b32 100644 --- a/GPy/kern/src/psi_comp/rbf_psi_gpucomp.py +++ b/GPy/kern/src/psi_comp/rbf_psi_gpucomp.py @@ -237,6 +237,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF): self.fall_back = PSICOMP_RBF() from pycuda.compiler import SourceModule + import GPy.util.gpu_init self.GPU_direct = GPU_direct self.gpuCache = None diff --git a/GPy/kern/src/psi_comp/ssrbf_psi_gpucomp.py b/GPy/kern/src/psi_comp/ssrbf_psi_gpucomp.py index 62876785..04063dcc 100644 --- a/GPy/kern/src/psi_comp/ssrbf_psi_gpucomp.py +++ b/GPy/kern/src/psi_comp/ssrbf_psi_gpucomp.py @@ -7,7 +7,6 @@ import numpy as np from paramz.caching import Cache_this from . import PSICOMP_RBF - gpu_code = """ // define THREADNUM @@ -287,6 +286,7 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF): def __init__(self, threadnum=128, blocknum=15, GPU_direct=False): from pycuda.compiler import SourceModule + import GPy.util.gpu_init self.GPU_direct = GPU_direct self.gpuCache = None From 23b929c7e46a95c2e7faaad82ed7b1921ae4edd5 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Mon, 22 Feb 2016 20:56:39 +0000 Subject: [PATCH 08/11] fallback original slvm kl divergence --- GPy/models/ss_gplvm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py index 4bc9a173..c8ff1664 100644 --- a/GPy/models/ss_gplvm.py +++ b/GPy/models/ss_gplvm.py @@ -104,7 +104,7 @@ class SLVMPosterior(SpikeAndSlabPosterior): """ from paramz.transformations import Logexp super(SLVMPosterior, self).__init__(means, variances, binary_prob, group_spike=False, name=name) - self.tau = Param("tau_", np.ones((self.gamma.shape[1],2))*2, Logexp()) + self.tau = Param("tau_", np.ones((self.gamma.shape[1],2)), Logexp()) self.link_parameter(self.tau) def set_gradients(self, grad): @@ -152,7 +152,7 @@ class SLVMPrior(VariationalPrior): var_mean = np.square(mu)/self.variance var_S = (S/self.variance - np.log(S)) - part1 = ((np.log(self.variance)-1. +var_mean + var_S)).sum()/2. + part1 = (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2. from scipy.special import betaln,digamma part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + betaln(self.alpha,self.beta)*self.input_dim \ @@ -164,11 +164,11 @@ class SLVMPrior(VariationalPrior): def update_gradients_KL(self, variational_posterior): mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma.values, variational_posterior.tau.values - variational_posterior.mean.gradient -= mu/self.variance - variational_posterior.variance.gradient -= (1./self.variance - 1./S) /2. + variational_posterior.mean.gradient -= gamma*mu/self.variance + variational_posterior.variance.gradient -= (1./self.variance - 1./S) * gamma /2. from scipy.special import digamma,polygamma dgamma = np.log(gamma/(1.-gamma))+ (digamma(tau[:,1])-digamma(tau[:,0]))*self.Z - variational_posterior.binary_prob.gradient -= dgamma + variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2. common = (self.Z.sum(0)+self.alpha+self.beta-tau[:,0]-tau[:,1])*polygamma(1,tau.sum(axis=1)) variational_posterior.tau.gradient[:,0] = -((tau[:,0]-(gamma*self.Z).sum(0)-self.alpha)*polygamma(1,tau[:,0])+common) variational_posterior.tau.gradient[:,1] = -((tau[:,1]-((1-gamma)*self.Z).sum(0)-self.beta)*polygamma(1,tau[:,1])+common) From e9cc56e8e8720e895797b9a20f5ea3a30177da9f Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Thu, 10 Mar 2016 18:14:25 +0000 Subject: [PATCH 09/11] add ssgplvm model test --- GPy/testing/model_tests.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 009b4848..8ff06e65 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -704,7 +704,19 @@ class GradientTests(np.testing.TestCase): lik = GPy.likelihoods.Gaussian() m = GPy.models.GPVariationalGaussianApproximation(X, Y, kernel=kern, likelihood=lik) self.assertTrue(m.checkgrad()) - + + def test_ssgplvm(self): + from GPy import kern + from GPy.models import SSGPLVM + from GPy.examples.dimensionality_reduction import _simulate_matern + + D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9 + _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False) + Y = Ylist[0] + k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) + # k = kern.RBF(Q, ARD=True, lengthscale=10.) + m = SSGPLVM(Y, Q, init="rand", num_inducing=num_inducing, kernel=k, group_spike=True) + self.assertTrue(m.checkgrad()) if __name__ == "__main__": print("Running unit tests, please be (very) patient...") From f2b813551a7dcde7b522d3486d9bef4219039015 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Thu, 10 Mar 2016 18:37:53 +0000 Subject: [PATCH 10/11] bug fix for mcmc sampler and add test case --- GPy/inference/mcmc/samplers.py | 8 +++----- GPy/testing/inference_tests.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/GPy/inference/mcmc/samplers.py b/GPy/inference/mcmc/samplers.py index 5ec684cc..d2f11a49 100644 --- a/GPy/inference/mcmc/samplers.py +++ b/GPy/inference/mcmc/samplers.py @@ -37,16 +37,14 @@ class Metropolis_Hastings(object): def sample(self, Ntotal=10000, Nburn=1000, Nthin=10, tune=True, tune_throughout=False, tune_interval=400): current = self.model.optimizer_array - fcurrent = self.model.log_likelihood() + self.model.log_prior() + \ - self.model._log_det_jacobian() + fcurrent = self.model.log_likelihood() + self.model.log_prior() accepted = np.zeros(Ntotal,dtype=np.bool) for it in range(Ntotal): - print("sample %d of %d\r"%(it,Ntotal),end="\t") + print("sample %d of %d\r"%(it+1,Ntotal),end="") sys.stdout.flush() prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale) self.model.optimizer_array = prop - fprop = self.model.log_likelihood() + self.model.log_prior() + \ - self.model._log_det_jacobian() + fprop = self.model.log_likelihood() + self.model.log_prior() if fprop>fcurrent:#sample accepted, going 'uphill' accepted[it] = True diff --git a/GPy/testing/inference_tests.py b/GPy/testing/inference_tests.py index 33ec3258..4bd2bc4f 100644 --- a/GPy/testing/inference_tests.py +++ b/GPy/testing/inference_tests.py @@ -64,6 +64,21 @@ class HMCSamplerTest(unittest.TestCase): hmc = GPy.inference.mcmc.HMC(m,stepsize=1e-2) s = hmc.sample(num_samples=3) + +class MCMCSamplerTest(unittest.TestCase): + + def test_sampling(self): + np.random.seed(1) + x = np.linspace(0.,2*np.pi,100)[:,None] + y = -np.cos(x)+np.random.randn(*x.shape)*0.3+1 + + m = GPy.models.GPRegression(x,y) + m.kern.lengthscale.set_prior(GPy.priors.Gamma.from_EV(1.,10.)) + m.kern.variance.set_prior(GPy.priors.Gamma.from_EV(1.,10.)) + m.likelihood.variance.set_prior(GPy.priors.Gamma.from_EV(1.,10.)) + + mcmc = GPy.inference.mcmc.Metropolis_Hastings(m) + mcmc.sample(Ntotal=100, Nburn=10) if __name__ == "__main__": unittest.main() From 67ba9b60c668bb2018672bb590f5509719b6eba1 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Thu, 17 Mar 2016 11:29:33 +0000 Subject: [PATCH 11/11] move _raw_predict into posterior object --- GPy/core/gp.py | 35 +------- GPy/core/sparse_gp.py | 87 ------------------- .../latent_function_inference/posterior.py | 51 +++++++++++ GPy/testing/model_tests.py | 1 + 4 files changed, 53 insertions(+), 121 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 0b87b1d5..d9db66ef 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -212,42 +212,9 @@ class GP(Model): = N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*} \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance} """ - if hasattr(self.posterior, '_raw_predict'): - mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov) - if self.mean_function is not None: - mu += self.mean_function.f(Xnew) - return mu, var - - if kern is None: - kern = self.kern - - Kx = kern.K(self._predictive_variable, Xnew) - mu = np.dot(Kx.T, self.posterior.woodbury_vector) - if len(mu.shape)==1: - mu = mu.reshape(-1,1) - if full_cov: - Kxx = kern.K(Xnew) - if self.posterior.woodbury_inv.ndim == 2: - var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx)) - elif self.posterior.woodbury_inv.ndim == 3: # Missing data - var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2])) - from ..util.linalg import mdot - for i in range(var.shape[2]): - var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx)) - var = var - else: - Kxx = kern.Kdiag(Xnew) - if self.posterior.woodbury_inv.ndim == 2: - var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None] - elif self.posterior.woodbury_inv.ndim == 3: # Missing data - var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2])) - for i in range(var.shape[1]): - var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0))) - var = var - #add in the mean function + mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov) if self.mean_function is not None: mu += self.mean_function.f(Xnew) - return mu, var def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None): diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 488b7026..7c0c1d18 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -113,90 +113,3 @@ class SparseGP(GP): self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) self._Zgrad = self.Z.gradient.copy() - - def _raw_predict(self, Xnew, full_cov=False, kern=None): - """ - Make a prediction for the latent function values. - - For certain inputs we give back a full_cov of shape NxN, - if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of, - we take only the diagonal elements across N. - - For uncertain inputs, the SparseGP bound produces cannot predict the full covariance matrix full_cov for now. - The implementation of that will follow. However, for each dimension the - covariance changes, so if full_cov is False (standard), we return the variance - for each dimension [NxD]. - """ - if hasattr(self.posterior, '_raw_predict'): - mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov) - if self.mean_function is not None: - mu += self.mean_function.f(Xnew) - return mu, var - - if kern is None: kern = self.kern - - if not isinstance(Xnew, VariationalPosterior): - # Kx = kern.K(self._predictive_variable, Xnew) - # mu = np.dot(Kx.T, self.posterior.woodbury_vector) - # if full_cov: - # Kxx = kern.K(Xnew) - # if self.posterior.woodbury_inv.ndim == 2: - # var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx)) - # elif self.posterior.woodbury_inv.ndim == 3: - # var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2])) - # for i in range(var.shape[2]): - # var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx)) - # var = var - # else: - # Kxx = kern.Kdiag(Xnew) - # if self.posterior.woodbury_inv.ndim == 2: - # var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None] - # elif self.posterior.woodbury_inv.ndim == 3: - # var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2])) - # for i in range(var.shape[1]): - # var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0))) - # var = var - # #add in the mean function - # if self.mean_function is not None: - # mu += self.mean_function.f(Xnew) - mu, var = super(SparseGP, self)._raw_predict(Xnew, full_cov, kern) - else: - psi0_star = kern.psi0(self._predictive_variable, Xnew) - psi1_star = kern.psi1(self._predictive_variable, Xnew) - psi2_star = kern.psi2n(self._predictive_variable, Xnew) - la = self.posterior.woodbury_vector - mu = np.dot(psi1_star, la) # TODO: dimensions? - N,M,D = psi0_star.shape[0],psi1_star.shape[1], la.shape[1] - - if full_cov: - raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.") - var = np.zeros((Xnew.shape[0], la.shape[1], la.shape[1])) - di = np.diag_indices(la.shape[1]) - else: - tmp = psi2_star - psi1_star[:,:,None]*psi1_star[:,None,:] - var = (tmp.reshape(-1,M).dot(la).reshape(N,M,D)*la[None,:,:]).sum(1) + psi0_star[:,None] - if self.posterior.woodbury_inv.ndim==2: - var += -psi2_star.reshape(N,-1).dot(self.posterior.woodbury_inv.flat)[:,None] - else: - var += -psi2_star.reshape(N,-1).dot(self.posterior.woodbury_inv.reshape(-1,D)) - assert np.all(var>=-1e-5), "The predicted variance goes negative!: "+str(var) - var = np.clip(var,1e-15,np.inf) - -# for i in range(Xnew.shape[0]): -# _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]] -# psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var)) -# tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]])) -# -# var_ = mdot(la.T, tmp, la) -# p0 = psi0_star[i] -# t = np.atleast_3d(self.posterior.woodbury_inv) -# t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2) -# -# if full_cov: -# var_[di] += p0 -# var_[di] += -t2 -# var[i] = var_ -# else: -# var[i] = np.diag(var_)+p0-t2 - - return mu, var diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py index c9b8f492..73b9dff0 100644 --- a/GPy/inference/latent_function_inference/posterior.py +++ b/GPy/inference/latent_function_inference/posterior.py @@ -3,6 +3,7 @@ import numpy as np from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol, dtrtrs, tdot +from GPy.core.parameterization.variational import VariationalPosterior class Posterior(object): """ @@ -187,6 +188,56 @@ class Posterior(object): if self._K_chol is None: self._K_chol = jitchol(self._K) return self._K_chol + + def _raw_predict(self, kern, Xnew, pred_var, full_cov=False): + woodbury_vector = self.woodbury_vector + woodbury_inv = self.woodbury_inv + + if not isinstance(Xnew, VariationalPosterior): + Kx = kern.K(pred_var, Xnew) + mu = np.dot(Kx.T, woodbury_vector) + if len(mu.shape)==1: + mu = mu.reshape(-1,1) + if full_cov: + Kxx = kern.K(Xnew) + if woodbury_inv.ndim == 2: + var = Kxx - np.dot(Kx.T, np.dot(woodbury_inv, Kx)) + elif woodbury_inv.ndim == 3: # Missing data + var = np.empty((Kxx.shape[0],Kxx.shape[1],woodbury_inv.shape[2])) + from ...util.linalg import mdot + for i in range(var.shape[2]): + var[:, :, i] = (Kxx - mdot(Kx.T, woodbury_inv[:, :, i], Kx)) + var = var + else: + Kxx = kern.Kdiag(Xnew) + if woodbury_inv.ndim == 2: + var = (Kxx - np.sum(np.dot(woodbury_inv.T, Kx) * Kx, 0))[:,None] + elif woodbury_inv.ndim == 3: # Missing data + var = np.empty((Kxx.shape[0],woodbury_inv.shape[2])) + for i in range(var.shape[1]): + var[:, i] = (Kxx - (np.sum(np.dot(woodbury_inv[:, :, i].T, Kx) * Kx, 0))) + var = var + else: + psi0_star = kern.psi0(pred_var, Xnew) + psi1_star = kern.psi1(pred_var, Xnew) + psi2_star = kern.psi2n(pred_var, Xnew) + la = woodbury_vector + mu = np.dot(psi1_star, la) # TODO: dimensions? + N,M,D = psi0_star.shape[0],psi1_star.shape[1], la.shape[1] + + if full_cov: + raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.") + var = np.zeros((Xnew.shape[0], la.shape[1], la.shape[1])) + di = np.diag_indices(la.shape[1]) + else: + tmp = psi2_star - psi1_star[:,:,None]*psi1_star[:,None,:] + var = (tmp.reshape(-1,M).dot(la).reshape(N,M,D)*la[None,:,:]).sum(1) + psi0_star[:,None] + if woodbury_inv.ndim==2: + var += -psi2_star.reshape(N,-1).dot(woodbury_inv.flat)[:,None] + else: + var += -psi2_star.reshape(N,-1).dot(woodbury_inv.reshape(-1,D)) + var = np.clip(var,1e-15,np.inf) + return mu, var class PosteriorExact(Posterior): diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 55e5309c..e83fb993 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -157,6 +157,7 @@ class MiscTests(unittest.TestCase): # Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression Kinv = m.posterior.woodbury_inv K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new)) + K_hat = np.clip(K_hat, 1e-15, np.inf) mu, covar = m._raw_predict(self.X_new, full_cov=True) self.assertEquals(mu.shape, (self.N_new, self.D))