From 393b9e94ba1847c6d81ad8ebdbb6cfb4528b8c82 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Tue, 10 Feb 2015 16:15:37 +0000 Subject: [PATCH 01/13] added more stable expectations for Bernoulli --- GPy/likelihoods/bernoulli.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index ff2ab30a..26de274b 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -77,6 +77,32 @@ class Bernoulli(Likelihood): return Z_hat, mu_hat, sigma2_hat + def variational_expectations(self, Y, m, v, gh_points=None): + if isinstance(self.gp_link, link_functions.Probit): + + if gh_points is None: + gh_x, gh_w = np.polynomial.hermite.hermgauss(20) + else: + gh_x, gh_w = gh_points + + from scipy import stats + + shape = m.shape + m,v,Y = m.flatten(), v.flatten(), Y.flatten() + Ysign = np.where(Y==1,1,-1) + X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + (m*Ysign)[:,None] + p = stats.norm.cdf(X) + p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability + N = stats.norm.pdf(X) + F = np.log(p).dot(gh_w) + NoverP = N/p + dF_dm = (NoverP*Ysign[:,None]).dot(gh_w) + dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(gh_w) + return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), None + else: + raise NotImplementedError + + def predictive_mean(self, mu, variance, Y_metadata=None): if isinstance(self.gp_link, link_functions.Probit): From 482bd1472c7c3bbd3f08cf964c841d922a3e1421 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 11 Feb 2015 11:49:45 +0000 Subject: [PATCH 02/13] reconfigured svgp inference a little --- GPy/core/svgp.py | 6 +-- .../latent_function_inference/svgp.py | 8 +--- GPy/kern/_src/prod.py | 40 +++++++++++++------ 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/GPy/core/svgp.py b/GPy/core/svgp.py index 603a64a5..42044b1b 100644 --- a/GPy/core/svgp.py +++ b/GPy/core/svgp.py @@ -36,12 +36,12 @@ class SVGP(SparseGP): KL_scale = 1.0 import climin.util - #Make a climin slicer to make drawing minibatches much quicker + #Make a climin slicer to make drawing minibatches much quicker. Annoyingly, this doesn;t pickle. self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize) X_batch, Y_batch = self.new_batch() #create the SVI inference method - inf_method = svgp_inf(KL_scale=KL_scale, batch_scale=batch_scale) + inf_method = svgp_inf() SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, inference_method=inf_method, name=name, Y_metadata=Y_metadata, normalizer=False) @@ -53,7 +53,7 @@ class SVGP(SparseGP): self.link_parameter(self.m) def parameters_changed(self): - self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata) + self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0])) #update the kernel gradients self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z) diff --git a/GPy/inference/latent_function_inference/svgp.py b/GPy/inference/latent_function_inference/svgp.py index ba36b74b..52db242c 100644 --- a/GPy/inference/latent_function_inference/svgp.py +++ b/GPy/inference/latent_function_inference/svgp.py @@ -5,11 +5,8 @@ import numpy as np from posterior import Posterior class SVGP(LatentFunctionInference): - def __init__(self, KL_scale=1., batch_scale=1.): - self.KL_scale = KL_scale - self.batch_scale = batch_scale - def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None): + def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None, KL_scale=1.0, batch_scale=1.0): num_inducing = Z.shape[0] num_data, num_outputs = Y.shape @@ -44,9 +41,6 @@ class SVGP(LatentFunctionInference): dKL_dS = 0.5*(Kmmi[:,:,None] - Si) dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T) - KL_scale = self.KL_scale - batch_scale = self.batch_scale - KL, dKL_dKmm, dKL_dS, dKL_dm = KL_scale*KL, KL_scale*dKL_dKmm, KL_scale*dKL_dS, KL_scale*dKL_dm #quadrature for the likelihood F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v) diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index dd9a5fe4..bff6d841 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -42,25 +42,41 @@ class Prod(CombinationKernel): return reduce(np.multiply, (p.Kdiag(X) for p in which_parts)) def update_gradients_full(self, dL_dK, X, X2=None): - k = self.K(X,X2)*dL_dK - for p in self.parts: - p.update_gradients_full(k/p.K(X,X2),X,X2) + if len(self.parts)==2: + self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2) + self.parts[1].update_gradients_full(dL_dK*self.parts[0].K(X,X2), X, X2) + else: + k = self.K(X,X2)*dL_dK + for p in self.parts: + p.update_gradients_full(k/p.K(X,X2),X,X2) def update_gradients_diag(self, dL_dKdiag, X): - k = self.Kdiag(X)*dL_dKdiag - for p in self.parts: - p.update_gradients_diag(k/p.Kdiag(X),X) + if len(self.parts)==2: + self.parts[0].update_gradients_diag(dL_dKdiag*self.parts[1].Kdiag(X), X) + self.parts[1].update_gradients_diag(dL_dKdiag*self.parts[0].Kdiag(X), X) + else: + k = self.Kdiag(X)*dL_dKdiag + for p in self.parts: + p.update_gradients_diag(k/p.Kdiag(X),X) def gradients_X(self, dL_dK, X, X2=None): target = np.zeros(X.shape) - k = self.K(X,X2)*dL_dK - for p in self.parts: - target += p.gradients_X(k/p.K(X,X2),X,X2) + if len(self.parts)==2: + target += self.parts[0].gradients_X(dL_dK*self.parts[1].K(X, X2), X, X2) + target += self.parts[1].gradients_X(dL_dK*self.parts[0].K(X, X2), X, X2) + else: + k = self.K(X,X2)*dL_dK + for p in self.parts: + target += p.gradients_X(k/p.K(X,X2),X,X2) return target def gradients_X_diag(self, dL_dKdiag, X): target = np.zeros(X.shape) - k = self.Kdiag(X)*dL_dKdiag - for p in self.parts: - target += p.gradients_X_diag(k/p.Kdiag(X),X) + if len(self.parts)==2: + target += self.parts[0].gradients_X_diag(dL_dKdiag*self.parts[1].Kdiag(X), X) + target += self.parts[1].gradients_X_diag(dL_dKdiag*self.parts[0].Kdiag(X), X) + else: + k = self.Kdiag(X)*dL_dKdiag + for p in self.parts: + target += p.gradients_X_diag(k/p.Kdiag(X),X) return target From 9f7ae611edea307ed185f45c835f08e582f41951 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Tue, 17 Feb 2015 10:49:06 +0000 Subject: [PATCH 03/13] force set_XY to update the model --- GPy/core/gp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 25066381..3252ac08 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -124,6 +124,7 @@ class GP(Model): else: self.X = ObsAr(X) self.update_model(True) + self._trigger_params_changed() def set_X(self,X): """ From 6435b83c7af412972214245065af3fd115d2cd6d Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Sat, 21 Feb 2015 14:43:10 +0100 Subject: [PATCH 04/13] [sparse gp] prediction with uncertain inputs --- GPy/core/sparse_gp.py | 52 +++++++++++++++++++++++-------- GPy/models/sparse_gp_minibatch.py | 41 ++---------------------- 2 files changed, 42 insertions(+), 51 deletions(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 51dbd5db..9004c9c7 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -6,7 +6,8 @@ from gp import GP from parameterization.param import Param from ..inference.latent_function_inference import var_dtc from .. import likelihoods -from parameterization.variational import VariationalPosterior +from parameterization.variational import VariationalPosterior, NormalPosterior +from ..util.linalg import mdot import logging from GPy.inference.latent_function_inference.posterior import Posterior @@ -102,7 +103,15 @@ class SparseGP(GP): def _raw_predict(self, Xnew, full_cov=False, kern=None): """ - Make a prediction for the latent function values + Make a prediction for the latent function values. + + For certain inputs we give back a full_cov of shape NxN, + if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of, + we take only the diagonal elements across N. + + For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we + return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD). + This is for both with and without missing data. """ if kern is None: kern = self.kern @@ -121,15 +130,32 @@ class SparseGP(GP): Kxx = kern.Kdiag(Xnew) var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T else: - Kx = kern.psi1(self.Z, Xnew).T - mu = np.dot(Kx.T, self.posterior.woodbury_vector) - if full_cov: - Kxx = kern.K(Xnew.mean) - if self.posterior.woodbury_inv.ndim == 2: - var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx)) - elif self.posterior.woodbury_inv.ndim == 3: - var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2) - else: - Kxx = kern.psi0(self.Z, Xnew) - var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T + psi0_star = self.kern.psi0(self.Z, Xnew) + psi1_star = self.kern.psi1(self.Z, Xnew) + #psi2_star = self.kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code. + la = self.posterior.woodbury_vector + mu = np.dot(psi1_star, la) # TODO: dimensions? + + if full_cov: + var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1])) + di = np.diag_indices(la.shape[1]) + else: + var = np.empty((Xnew.shape[0], la.shape[1])) + + for i in range(Xnew.shape[0]): + _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]] + psi2_star = self.kern.psi2(self.Z, NormalPosterior(_mu, _var)) + tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]])) + + var_ = mdot(la.T, tmp, la) + p0 = psi0_star[i] + t = self.posterior.woodbury_inv + t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2) + + if full_cov: + var_[di] += p0 + var_[di] += -t2 + var[i] = var_ + else: + var[i] = np.diag(var_)+p0-t2 return mu, var diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index 8925d4d7..e827bb70 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -3,6 +3,7 @@ import numpy as np from ..core.parameterization.param import Param +from ..core.sparse_gp import SparseGP from ..core.gp import GP from ..inference.latent_function_inference import var_dtc from .. import likelihoods @@ -16,14 +17,9 @@ from GPy.inference.optimization.stochastics import SparseGPStochastics,\ #SparseGPMissing logger = logging.getLogger("sparse gp") -class SparseGPMiniBatch(GP): +class SparseGPMiniBatch(SparseGP): """ - A general purpose Sparse GP model -''' -Created on 3 Nov 2014 - -@author: maxz -''' + A general purpose Sparse GP model, allowing missing data and stochastics across dimensions. This model allows (approximate) inference using variational DTC or FITC (Gaussian likelihoods) as well as non-conjugate sparse methods based on @@ -315,34 +311,3 @@ Created on 3 Nov 2014 else: self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata) self._outer_values_update(self.full_values) - - def _raw_predict(self, Xnew, full_cov=False, kern=None): - """ - Make a prediction for the latent function values - """ - - if kern is None: kern = self.kern - - if not isinstance(Xnew, VariationalPosterior): - Kx = kern.K(self.Z, Xnew) - mu = np.dot(Kx.T, self.posterior.woodbury_vector) - if full_cov: - Kxx = kern.K(Xnew) - if self.posterior.woodbury_inv.ndim == 2: - var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx)) - elif self.posterior.woodbury_inv.ndim == 3: - var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2) - var = var - else: - Kxx = kern.Kdiag(Xnew) - var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T - else: - Kx = kern.psi1(self.Z, Xnew) - mu = np.dot(Kx, self.posterior.woodbury_vector) - if full_cov: - raise NotImplementedError, "TODO" - else: - Kxx = kern.psi0(self.Z, Xnew) - psi2 = kern.psi2(self.Z, Xnew) - var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) - return mu, var From b49228d1d0cc84b8abb4c13dd720409072e2f369 Mon Sep 17 00:00:00 2001 From: javiergonzalezh Date: Mon, 23 Feb 2015 15:02:50 +0000 Subject: [PATCH 05/13] minor error in corregionalization corrected --- GPy/kern/_src/coregionalize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py index cbfe1ccb..291402ec 100644 --- a/GPy/kern/_src/coregionalize.py +++ b/GPy/kern/_src/coregionalize.py @@ -154,9 +154,9 @@ class Coregionalize(Kern): def _gradient_reduce_numpy(self, dL_dK, index, index2): index, index2 = index[:,0], index2[:,0] dL_dK_small = np.zeros_like(self.B) - for i in range(k.output_dim): + for i in range(self.output_dim): tmp1 = dL_dK[index==i] - for j in range(k.output_dim): + for j in range(self.output_dim): dL_dK_small[j,i] = tmp1[:,index2==j].sum() return dL_dK_small From 11cc5d16f40b2ae7001b643aa94d65579f3e41ab Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Mon, 23 Feb 2015 16:19:14 +0000 Subject: [PATCH 06/13] add save param_array --- GPy/core/parameterization/parameter_core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 9a903079..bee160b2 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -1042,6 +1042,9 @@ class Parameterizable(OptimizationHandlable): p = param_to_array(p) d = f.create_dataset(n,p.shape,dtype=p.dtype) d[:] = p + if hasattr(self, 'param_array'): + d = f.create_dataset('param_array',self.param_array.shape, dtype=self.param_array.dtype) + d[:] = self.param_array f.close() except: raise 'Fails to write the parameters into a HDF5 file!' From 85ae035e32bee114b7230cc18665fd9ef864cccd Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 24 Feb 2015 11:55:22 +0000 Subject: [PATCH 07/13] [var_dtc] constant jitter 1e-10 --- GPy/inference/latent_function_inference/var_dtc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index d61e7f0f..eaa02009 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -21,7 +21,7 @@ class VarDTC(LatentFunctionInference): For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it. """ - const_jitter = 1e-6 + const_jitter = 1e-10 def __init__(self, limit=1): #self._YYTfactor_cache = caching.cache() from ...util.caching import Cacher From 72c6b6698376ba5b3772647309d88401caba45be Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 24 Feb 2015 11:56:06 +0000 Subject: [PATCH 08/13] [updateable] update field in observable --- GPy/core/parameterization/updateable.py | 1 - 1 file changed, 1 deletion(-) diff --git a/GPy/core/parameterization/updateable.py b/GPy/core/parameterization/updateable.py index 278ba8cd..379e92e1 100644 --- a/GPy/core/parameterization/updateable.py +++ b/GPy/core/parameterization/updateable.py @@ -11,7 +11,6 @@ class Updateable(Observable): A model can be updated or not. Make sure updates can be switched on and off. """ - _updates = True def __init__(self, *args, **kwargs): super(Updateable, self).__init__(*args, **kwargs) From 924899069e583819d634198627eca68a30d44c4a Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 24 Feb 2015 11:56:36 +0000 Subject: [PATCH 09/13] [optimization] experimental auto detect of ipython notebook --- GPy/core/model.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index c63a29e5..35b046fd 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -255,7 +255,16 @@ class Model(Parameterized): else: optimizer = optimization.get_optimizer(optimizer) opt = optimizer(start, model=self, max_iters=max_iters, **kwargs) - + + try: + from IPython.display import display + from IPython.html import widgets + display(widgets.TextWidget()) + ipython_notebook = True + except: + # Not in Ipython notebook + ipython_notebook = False + with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook) as vo: opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads) vo.finish(opt) @@ -402,7 +411,7 @@ class Model(Parameterized): model_details = [['Model', self.name + '
'], ['Log-likelihood', '{}
'.format(float(self.log_likelihood()))], ["Number of Parameters", '{}
'.format(self.size)], - ["Updates", '{}
'.format(self._updates)], + ["Updates", '{}
'.format(self._update_on)], ] from operator import itemgetter to_print = ["""