From f4ecb47464714fccbb89dfe9246bb7575a568944 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 8 Nov 2013 14:08:19 +0000 Subject: [PATCH 1/9] added getstate/setstate for product kernel --- GPy/kern/parts/prod.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/GPy/kern/parts/prod.py b/GPy/kern/parts/prod.py index 0549ea22..e386a292 100644 --- a/GPy/kern/parts/prod.py +++ b/GPy/kern/parts/prod.py @@ -130,3 +130,14 @@ class Prod(Kernpart): self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1) self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2) + def getstate(self): + return [self._get_params(), self.k1, self.k2, self.slice1, self.slice2, self.name] + + def setstate(self, state): + params, self.k1, self.k2, self.slice1, self.slice2, self.name = state + self._X, self._X2, self._params = np.empty(shape=(3,1)) + self._set_params(params) + + + + From c3d84f1d9d0b9cf6a99b4f9fdbaf99ae656f24a0 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 8 Nov 2013 17:39:52 +0000 Subject: [PATCH 2/9] predictive_mean and predictive_variance now use gp_var as a parameter, rather than gp_std --- GPy/likelihoods/noise_models/bernoulli_noise.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/GPy/likelihoods/noise_models/bernoulli_noise.py b/GPy/likelihoods/noise_models/bernoulli_noise.py index 2c4116da..17390e55 100644 --- a/GPy/likelihoods/noise_models/bernoulli_noise.py +++ b/GPy/likelihoods/noise_models/bernoulli_noise.py @@ -71,15 +71,19 @@ class Bernoulli(NoiseDistribution): return Z_hat, mu_hat, sigma2_hat - def _predictive_mean_analytical(self,mu,sigma): + def _predictive_mean_analytical(self,mu,variance): + if isinstance(self.gp_link,gp_transformations.Probit): - return stats.norm.cdf(mu/np.sqrt(1+sigma**2)) + return stats.norm.cdf(mu/np.sqrt(1+variance)) + elif isinstance(self.gp_link,gp_transformations.Heaviside): - return stats.norm.cdf(mu/sigma) + return stats.norm.cdf(mu/np.sqrt(variance)) + else: raise NotImplementedError - def _predictive_variance_analytical(self,mu,sigma, pred_mean): + def _predictive_variance_analytical(self,mu,variance, pred_mean): + if isinstance(self.gp_link,gp_transformations.Heaviside): return 0. else: From e3173c4ff43380d9a8f50585ad8d34ae58029c60 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 8 Nov 2013 17:40:27 +0000 Subject: [PATCH 3/9] numerical predictions fixed, sampling predictions are not working --- .../noise_models/noise_distributions.py | 120 +++++++++--------- 1 file changed, 61 insertions(+), 59 deletions(-) diff --git a/GPy/likelihoods/noise_models/noise_distributions.py b/GPy/likelihoods/noise_models/noise_distributions.py index 77cc82a4..79d9ffeb 100644 --- a/GPy/likelihoods/noise_models/noise_distributions.py +++ b/GPy/likelihoods/noise_models/noise_distributions.py @@ -11,6 +11,7 @@ from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf import gp_transformations from GPy.util.misc import chain_1, chain_2, chain_3 from scipy.integrate import quad +import warnings class NoiseDistribution(object): """ @@ -103,23 +104,27 @@ class NoiseDistribution(object): def int_1(f): return self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f)) z, accuracy = quad(int_1, -np.inf, np.inf) - z /= np.sqrt(2*np.pi/tau) + #z /= np.sqrt(2*np.pi/tau) #Compute second integral for first moment def int_2(f): return f*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f)) mean, accuracy = quad(int_2, -np.inf, np.inf) - mean /= np.sqrt(2*np.pi/tau) + #mean /= np.sqrt(2*np.pi/tau) mean /= z #Compute integral for variance def int_3(f): return (f**2)*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f)) Ef2, accuracy = quad(int_3, -np.inf, np.inf) - Ef2 /= np.sqrt(2*np.pi/tau) + #Ef2 /= np.sqrt(2*np.pi/tau) Ef2 /= z variance = Ef2 - mean**2 + #Add constant to the zeroth moment + #NOTE: this constant is not needed in the other moments because it cancells out. + z /= np.sqrt(2*np.pi/tau) + return z, mean, variance def _predictive_mean_analytical(self,mu,sigma): @@ -142,7 +147,7 @@ class NoiseDistribution(object): """ raise NotImplementedError - def _predictive_mean_numerical(self,mu,sigma): + def _predictive_mean_numerical(self,mu,variance): """ Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) ) @@ -150,49 +155,51 @@ class NoiseDistribution(object): :param sigma: standard deviation of posterior """ - #FIXME: Quadrature does not work! - raise NotImplementedError - sigma2 = sigma**2 - #Compute first moment - def int_mean(f): - return self._mean(f)*np.exp(-(0.5/sigma2)*np.square(f - mu)) - scaled_mean, accuracy = quad(int_mean, -np.inf, np.inf) - mean = scaled_mean / np.sqrt(2*np.pi*(sigma2)) + def int_mean(f,m,v): + return self._mean(f)*np.exp(-(0.5/v)*np.square(f - m)) + scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] + mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance)) return mean - def _predictive_variance_numerical(self,mu,sigma,predictive_mean=None): + def _predictive_variance_numerical(self,mu,variance,predictive_mean=None): """ - Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) + Numerical approximation to the predictive variance: V(Y_star) + + The following variance decomposition is used: + V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) :param mu: mean of posterior :param sigma: standard deviation of posterior :predictive_mean: output's predictive mean, if None _predictive_mean function will be called. """ - sigma2 = sigma**2 - normalizer = np.sqrt(2*np.pi*sigma2) + #sigma2 = sigma**2 + normalizer = np.sqrt(2*np.pi*variance) # E( V(Y_star|f_star) ) - #Compute expected value of variance - def int_var(f): - return self._variance(f)*np.exp(-(0.5/sigma2)*np.square(f - mu)) - scaled_exp_variance, accuracy = quad(int_var, -np.inf, np.inf) - exp_var = scaled_exp_variance / normalizer + def int_var(f,m,v): + return self._variance(f)*np.exp(-(0.5/v)*np.square(f - m)) + scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] + exp_var = np.array(scaled_exp_variance)[:,None] / normalizer #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2 + + #E( E(Y_star|f_star)**2 ) if predictive_mean is None: - predictive_mean = self.predictive_mean(mu,sigma) - + predictive_mean = self.predictive_mean(mu,variance) predictive_mean_sq = predictive_mean**2 - def int_pred_mean_sq(f): - return predictive_mean_sq*np.exp(-(0.5/(sigma2))*np.square(f - mu)) - scaled_exp_exp2, accuracy = quad(int_pred_mean_sq, -np.inf, np.inf) - exp_exp2 = scaled_exp_exp2 / normalizer + def int_pred_mean_sq(f,m,v,predictive_mean_sq): + return predictive_mean_sq*np.exp(-(0.5/v)*np.square(f - m)) - var_exp = exp_exp2 - predictive_mean**2 - # V(Y_star | f_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) + scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)] + exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer + + #E( E(Y_star|f_star) )**2 + var_exp = exp_exp2 - predictive_mean_sq + + # V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) return exp_var + var_exp def pdf_link(self, link_f, y, extra_data=None): @@ -375,8 +382,7 @@ class NoiseDistribution(object): assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names()) return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta - def predictive_values(self, mu, var, full_cov=False, num_samples=30000, - sampling=False): + def predictive_values(self, mu, var, full_cov=False, sampling=False, num_samples=10000): """ Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction. @@ -392,37 +398,33 @@ class NoiseDistribution(object): """ - #Get gp_samples f* using posterior mean and variance - if not full_cov: - gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()), - size=num_samples).T + if sampling: + #Get gp_samples f* using posterior mean and variance + if not full_cov: + gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()), + size=num_samples).T + else: + gp_samples = np.random.multivariate_normal(mu.flatten(), var, + size=num_samples).T + #Push gp samples (f*) through likelihood to give p(y*|f*) + samples = self.samples(gp_samples) + axis=-1 + + #Calculate mean, variance and precentiles from samples + print "WARNING: Using sampling to calculate mean, variance and predictive quantiles." + pred_mean = np.mean(samples, axis=axis)[:,None] + pred_var = np.var(samples, axis=axis)[:,None] + q1 = np.percentile(samples, 2.5, axis=axis)[:,None] + q3 = np.percentile(samples, 97.5, axis=axis)[:,None] + else: - gp_samples = np.random.multivariate_normal(mu.flatten(), var, - size=num_samples).T - #Push gp samples (f*) through likelihood to give p(y*|f*) - samples = self.samples(gp_samples) - axis=-1 + pred_mean = self.predictive_mean(mu, var) + pred_var = self.predictive_variance(mu, var, pred_mean) + print "WARNING: Predictive quantiles are only computed when sampling." + q1 = np.repeat(np.nan,pred_mean.size)[:,None] + q3 = q1.copy() - if self.analytical_mean and not sampling: - pred_mean = self.predictive_mean(mu, np.sqrt(var)) - else: - pred_mean = np.mean(samples, axis=axis) - - if self.analytical_variance and not sampling: - pred_var = self.predictive_variance(mu, np.sqrt(var), pred_mean) - else: - pred_var = np.var(samples, axis=axis) - - #Calculate quantiles from samples - q1 = np.percentile(samples, 2.5, axis=axis) - q3 = np.percentile(samples, 97.5, axis=axis) - print "WARNING: Using sampling to calculate predictive quantiles" - - pred_mean = np.vstack(pred_mean) - pred_var = np.vstack(pred_var) - q1 = np.vstack(q1) - q3 = np.vstack(q3) return pred_mean, pred_var, q1, q3 def samples(self, gp): From 604e60d5cfaeaa126cb549e88e7e9685f00a1d04 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 11 Nov 2013 08:39:58 +0000 Subject: [PATCH 4/9] Bug fixed in numerical approx. to the predictive variance. --- .../noise_models/gp_transformations.py | 2 ++ .../noise_models/noise_distributions.py | 21 ++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/GPy/likelihoods/noise_models/gp_transformations.py b/GPy/likelihoods/noise_models/gp_transformations.py index 65730418..5155a69d 100644 --- a/GPy/likelihoods/noise_models/gp_transformations.py +++ b/GPy/likelihoods/noise_models/gp_transformations.py @@ -78,9 +78,11 @@ class Probit(GPTransformation): return std_norm_pdf(f) def d2transf_df2(self,f): + #FIXME return -f * std_norm_pdf(f) def d3transf_df3(self,f): + #FIXME f2 = f**2 return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2) diff --git a/GPy/likelihoods/noise_models/noise_distributions.py b/GPy/likelihoods/noise_models/noise_distributions.py index 79d9ffeb..8ee7a2cd 100644 --- a/GPy/likelihoods/noise_models/noise_distributions.py +++ b/GPy/likelihoods/noise_models/noise_distributions.py @@ -99,31 +99,29 @@ class NoiseDistribution(object): :param tau: cavity distribution 1st natural parameter (precision) :param v: cavity distribution 2nd natural paramenter (mu*precision) """ - #Compute first integral for zeroth moment + #Compute first integral for zeroth moment. + #NOTE constant np.sqrt(2*pi/tau) added at the end of the function mu = v/tau def int_1(f): return self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f)) - z, accuracy = quad(int_1, -np.inf, np.inf) - #z /= np.sqrt(2*np.pi/tau) + z_scaled, accuracy = quad(int_1, -np.inf, np.inf) #Compute second integral for first moment def int_2(f): return f*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f)) mean, accuracy = quad(int_2, -np.inf, np.inf) - #mean /= np.sqrt(2*np.pi/tau) - mean /= z + mean /= z_scaled #Compute integral for variance def int_3(f): return (f**2)*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f)) Ef2, accuracy = quad(int_3, -np.inf, np.inf) - #Ef2 /= np.sqrt(2*np.pi/tau) - Ef2 /= z + Ef2 /= z_scaled variance = Ef2 - mean**2 #Add constant to the zeroth moment #NOTE: this constant is not needed in the other moments because it cancells out. - z /= np.sqrt(2*np.pi/tau) + z = z_scaled/np.sqrt(2*np.pi/tau) return z, mean, variance @@ -185,18 +183,17 @@ class NoiseDistribution(object): #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2 - #E( E(Y_star|f_star)**2 ) + #E( E(Y_star|f_star) )**2 if predictive_mean is None: predictive_mean = self.predictive_mean(mu,variance) predictive_mean_sq = predictive_mean**2 + #E( E(Y_star|f_star)**2 ) def int_pred_mean_sq(f,m,v,predictive_mean_sq): - return predictive_mean_sq*np.exp(-(0.5/v)*np.square(f - m)) - + return self._mean(f)**2*np.exp(-(0.5/v)*np.square(f - m)) scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)] exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer - #E( E(Y_star|f_star) )**2 var_exp = exp_exp2 - predictive_mean_sq # V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) From d7a4e34b3d6f0ea5590b57a4960b33971d678f62 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 11 Nov 2013 09:26:22 +0000 Subject: [PATCH 5/9] fixed product kern get and set state --- GPy/kern/parts/prod.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/GPy/kern/parts/prod.py b/GPy/kern/parts/prod.py index e386a292..7441ae9f 100644 --- a/GPy/kern/parts/prod.py +++ b/GPy/kern/parts/prod.py @@ -19,7 +19,10 @@ class Prod(Kernpart): """ def __init__(self,k1,k2,tensor=False): self.num_params = k1.num_params + k2.num_params - self.name = '['+k1.name + '**' + k2.name +']' + if tensor: + self.name = '['+k1.name + '**' + k2.name +']' + else: + self.name = '['+k1.name + '*' + k2.name +']' self.k1 = k1 self.k2 = k2 if tensor: @@ -130,13 +133,12 @@ class Prod(Kernpart): self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1) self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2) - def getstate(self): - return [self._get_params(), self.k1, self.k2, self.slice1, self.slice2, self.name] + def __getstate__(self): + return [self.k1, self.k2, self.slice1, self.slice2, self.name, self.input_dim, self.num_params] - def setstate(self, state): - params, self.k1, self.k2, self.slice1, self.slice2, self.name = state + def __setstate__(self, state): + self.k1, self.k2, self.slice1, self.slice2, self.name, self.input_dim, self.num_params = state self._X, self._X2, self._params = np.empty(shape=(3,1)) - self._set_params(params) From 4be40da23a3086b004de75da1652c4f633bb715c Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 11 Nov 2013 14:23:10 +0000 Subject: [PATCH 6/9] Changes in plot function: sampling vs numerical approximation --- GPy/core/gp_base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index b6e4ebc0..cb968520 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -173,7 +173,8 @@ class GPBase(Model): upper = m + 2*np.sqrt(v) Y = self.likelihood.Y else: - m, v, lower, upper = self.predict(Xgrid, which_parts=which_parts) + m, v, lower, upper = self.predict(Xgrid, which_parts=which_parts,sampling=False) #Compute the exact mean + m_, v_, lower, upper = self.predict(Xgrid, which_parts=which_parts,sampling=True,num_samples=15000) #Apporximate the percentiles Y = self.likelihood.data for d in which_data_ycols: gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) @@ -210,7 +211,7 @@ class GPBase(Model): m, _ = self._raw_predict(Xgrid, which_parts=which_parts) Y = self.likelihood.Y else: - m, _, _, _ = self.predict(Xgrid, which_parts=which_parts,num_samples=100) #FIXME we need a balance between accuracy and speed to define num_samples + m, _, _, _ = self.predict(Xgrid, which_parts=which_parts,sampling=False) Y = self.likelihood.data for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T From 7184cee6afb4a8d1a1909e45f7814348b024e4d2 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 11 Nov 2013 14:23:55 +0000 Subject: [PATCH 7/9] Added **likelihood_params to predictive_values --- GPy/likelihoods/gaussian.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 85c028b4..c12d8e6d 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -69,7 +69,7 @@ class Gaussian(likelihood): self.covariance_matrix = np.eye(self.N) * x self._variance = x - def predictive_values(self, mu, var, full_cov): + def predictive_values(self, mu, var, full_cov, **likelihood_args): """ Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval """ From e7c7ae8ff41af329c1fc5dc76d98bf4f4e7fb6d9 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Tue, 12 Nov 2013 12:06:38 +0000 Subject: [PATCH 8/9] adding docstring for symmetric kern --- GPy/kern/constructors.py | 12 ++++++++++++ GPy/kern/parts/prod.py | 10 +++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index 392f43ba..b60c7479 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -450,6 +450,18 @@ def prod(k1,k2,tensor=False): def symmetric(k): """ Construct a symmetric kernel from an existing kernel + + The symmetric kernel works by adding two GP functions together, and computing the overall covariance. + + Let f ~ GP(x | 0, k(x, x')). Now let g = f(x) + f(-x). + + It's easy to see that g is a symmetric function: g(x) = g(-x). + + by construction, g, is a gaussian Process with mean 0 and covariance + + k(x, x') + k(-x, x') + k(x, -x') + k(-x, -x') + + This constructor builds a covariance function of this form from the initial kernel """ k_ = k.copy() k_.parts = [symmetric.Symmetric(p) for p in k.parts] diff --git a/GPy/kern/parts/prod.py b/GPy/kern/parts/prod.py index 7441ae9f..f517262c 100644 --- a/GPy/kern/parts/prod.py +++ b/GPy/kern/parts/prod.py @@ -133,12 +133,12 @@ class Prod(Kernpart): self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1) self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2) - def __getstate__(self): - return [self.k1, self.k2, self.slice1, self.slice2, self.name, self.input_dim, self.num_params] + #def __getstate__(self): + #return [self.k1, self.k2, self.slice1, self.slice2, self.name, self.input_dim, self.num_params] - def __setstate__(self, state): - self.k1, self.k2, self.slice1, self.slice2, self.name, self.input_dim, self.num_params = state - self._X, self._X2, self._params = np.empty(shape=(3,1)) + #def __setstate__(self, state): + #self.k1, self.k2, self.slice1, self.slice2, self.name, self.input_dim, self.num_params = state + #self._X, self._X2, self._params = np.empty(shape=(3,1)) From 5fd031fd6351c7c202fa36a500d5129505f722e2 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Tue, 12 Nov 2013 12:17:55 +0000 Subject: [PATCH 9/9] added block matrix utility --- GPy/util/block_matrices.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 GPy/util/block_matrices.py diff --git a/GPy/util/block_matrices.py b/GPy/util/block_matrices.py new file mode 100644 index 00000000..8fd5f89d --- /dev/null +++ b/GPy/util/block_matrices.py @@ -0,0 +1,24 @@ +import numpy as np + +def get_blocks(A, blocksizes): + assert (A.shape[0]==A.shape[1]) and len(A.shape)==2, "can;t blockify this non-square matrix" + N = np.sum(blocksizes) + assert A.shape[0] == N, "bad blocksizes" + num_blocks = len(blocksizes) + B = np.empty(shape=(num_blocks, num_blocks), dtype=np.object) + count_i = 0 + for Bi, i in enumerate(blocksizes): + count_j = 0 + for Bj, j in enumerate(blocksizes): + B[Bi, Bj] = A[count_i:count_i + i, count_j : count_j + j] + count_j += j + count_i += i + return B + + + +if __name__=='__main__': + A = np.zeros((5,5)) + B = get_blocks(A,[2,3]) + B[0,0] += 7 + print B