From 2f5d5dd3bfd5338f1707f921882682bcafedec74 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Wed, 5 Mar 2014 14:16:53 +0000 Subject: [PATCH 1/5] Made sampling default for non-gaussian likelihoods as a quick fix to allow plotting again for likelihoods without predictive values --- GPy/examples/regression.py | 2 +- GPy/kern/_src/sympykern.py | 29 ++++++++++++++--------------- GPy/likelihoods/likelihood.py | 2 +- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index aa6bbbf9..cc23410a 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -284,7 +284,7 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True): kern = GPy.kern.RBF(1) poisson_lik = GPy.likelihoods.Poisson() - laplace_inf = GPy.inference.latent_function_inference.LaplaceInference() + laplace_inf = GPy.inference.latent_function_inference.Laplace() # create simple GP Model m = GPy.core.GP(X, Y, kernel=kern, likelihood=poisson_lik, inference_method=laplace_inf) diff --git a/GPy/kern/_src/sympykern.py b/GPy/kern/_src/sympykern.py index 920f47f3..9878ec68 100644 --- a/GPy/kern/_src/sympykern.py +++ b/GPy/kern/_src/sympykern.py @@ -1,11 +1,10 @@ # Check Matthew Rocklin's blog post. -try: +try: import sympy as sp sympy_available=True from sympy.utilities.lambdify import lambdify except ImportError: sympy_available=False - exit() import numpy as np from kern import Kern @@ -36,7 +35,7 @@ class Sympykern(Kern): super(Sympykern, self).__init__(input_dim, name) self._sp_k = k - + # pull the variable names out of the symbolic covariance function. sp_vars = [e for e in k.atoms() if e.is_Symbol] self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:])) @@ -51,7 +50,7 @@ class Sympykern(Kern): self._sp_kdiag = k for x, z in zip(self._sp_x, self._sp_z): self._sp_kdiag = self._sp_kdiag.subs(z, x) - + # If it is a multi-output covariance, add an input for indexing the outputs. self._real_input_dim = x_dim # Check input dim is number of xs + 1 if output_dim is >1 @@ -73,7 +72,7 @@ class Sympykern(Kern): # Extract names of shared parameters (those without a subscript) self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j] - + self.num_split_params = len(self._sp_theta_i) self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i] # Add split parameters to the model. @@ -82,11 +81,11 @@ class Sympykern(Kern): setattr(self, theta, Param(theta, np.ones(self.output_dim), None)) self.add_parameter(getattr(self, theta)) - + self.num_shared_params = len(self._sp_theta) for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j): self._sp_kdiag = self._sp_kdiag.subs(theta_j, theta_i) - + else: self.num_split_params = 0 self._split_theta_names = [] @@ -107,10 +106,10 @@ class Sympykern(Kern): derivative_arguments = self._sp_x + self._sp_theta if self.output_dim > 1: derivative_arguments += self._sp_theta_i - + self.derivatives = {theta.name : sp.diff(self._sp_k,theta).simplify() for theta in derivative_arguments} self.diag_derivatives = {theta.name : sp.diff(self._sp_kdiag,theta).simplify() for theta in derivative_arguments} - + # This gives the parameters for the arg list. self.arg_list = self._sp_x + self._sp_z + self._sp_theta self.diag_arg_list = self._sp_x + self._sp_theta @@ -137,7 +136,7 @@ class Sympykern(Kern): for key in self.derivatives.keys(): setattr(self, '_Kdiag_diff_' + key, lambdify(self.diag_arg_list, self.diag_derivatives[key], 'numpy')) - def K(self,X,X2=None): + def K(self,X,X2=None): self._K_computations(X, X2) return self._K_function(**self._arguments) @@ -145,11 +144,11 @@ class Sympykern(Kern): def Kdiag(self,X): self._K_computations(X) return self._Kdiag_function(**self._diag_arguments) - + def _param_grad_helper(self,partial,X,Z,target): pass - + def gradients_X(self, dL_dK, X, X2=None): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) @@ -168,7 +167,7 @@ class Sympykern(Kern): gf = getattr(self, '_Kdiag_diff_' + x.name) dX[:, i] = gf(**self._diag_arguments)*dL_dK return dX - + def update_gradients_full(self, dL_dK, X, X2=None): # Need to extract parameters to local variables first self._K_computations(X, X2) @@ -193,7 +192,7 @@ class Sympykern(Kern): gradient += np.asarray([A[np.where(self._output_ind2==i)].T.sum() for i in np.arange(self.output_dim)]) setattr(parameter, 'gradient', gradient) - + def update_gradients_diag(self, dL_dKdiag, X): self._K_computations(X) @@ -209,7 +208,7 @@ class Sympykern(Kern): setattr(parameter, 'gradient', np.asarray([a[np.where(self._output_ind==i)].sum() for i in np.arange(self.output_dim)])) - + def _K_computations(self, X, X2=None): """Set up argument lists for the derivatives.""" # Could check if this needs doing or not, there could diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index 701a5a2f..aff55533 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -358,7 +358,7 @@ class Likelihood(Parameterized): return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta - def predictive_values(self, mu, var, full_cov=False, sampling=False, num_samples=10000): + def predictive_values(self, mu, var, full_cov=False, sampling=True, num_samples=10000): """ Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction. From 99d6b8220c07fc5863b6ff33e97f9d3050d679bb Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Wed, 5 Mar 2014 18:45:14 +0000 Subject: [PATCH 2/5] [SSGPLVM] implemented linear kernel --- GPy/kern/_src/linear.py | 126 +++++++++++++----- .../{rbf_psi_comp => psi_comp}/__init__.py | 0 GPy/kern/_src/psi_comp/linear_psi_comp.py | 51 +++++++ .../ssrbf_psi_comp.py | 0 GPy/kern/_src/rbf.py | 2 +- GPy/kern/_src/ssrbf.py | 2 +- 6 files changed, 147 insertions(+), 34 deletions(-) rename GPy/kern/_src/{rbf_psi_comp => psi_comp}/__init__.py (100%) create mode 100644 GPy/kern/_src/psi_comp/linear_psi_comp.py rename GPy/kern/_src/{rbf_psi_comp => psi_comp}/ssrbf_psi_comp.py (100%) diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 1521026d..8ed7733f 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -6,10 +6,12 @@ import numpy as np from scipy import weave from kern import Kern from ...util.linalg import tdot -from ...util.misc import fast_array_equal, param_to_array +from ...util.misc import param_to_array from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp from ...util.caching import Cache_this +from ...core.parameterization import variational +from psi_comp import linear_psi_comp class Linear(Kern): """ @@ -104,49 +106,109 @@ class Linear(Kern): #---------------------------------------# def psi0(self, Z, variational_posterior): - return np.sum(self.variances * self._mu2S(variational_posterior), 1) + if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + gamma = variational_posterior.binary_prob + mu = variational_posterior.mean + S = variational_posterior.variance + return np.einsum('q,nq,nq->n',self.variances,gamma,np.square(mu)+S) +# return (self.variances*gamma*(np.square(mu)+S)).sum(axis=1) + else: + return np.sum(self.variances * self._mu2S(variational_posterior), 1) def psi1(self, Z, variational_posterior): - return self.K(variational_posterior.mean, Z) #the variance, it does nothing + if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + gamma = variational_posterior.binary_prob + mu = variational_posterior.mean + return np.einsum('nq,q,mq,nq->nm',gamma,self.variances,Z,mu) +# return (self.variances*gamma*mu).sum(axis=1) + else: + return self.K(variational_posterior.mean, Z) #the variance, it does nothing @Cache_this(limit=1) def psi2(self, Z, variational_posterior): - ZA = Z * self.variances - ZAinner = self._ZAinner(variational_posterior, Z) - return np.dot(ZAinner, ZA.T) + if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + gamma = variational_posterior.binary_prob + mu = variational_posterior.mean + S = variational_posterior.variance + mu2 = np.square(mu) + variances2 = np.square(self.variances) + tmp = np.einsum('nq,q,mq,nq->nm',gamma,self.variances,Z,mu) + return np.einsum('nq,q,mq,oq,nq->nmo',gamma,variances2,Z,Z,mu2+S)+\ + np.einsum('nm,no->nmo',tmp,tmp) - np.einsum('nq,q,mq,oq,nq->nmo',np.square(gamma),variances2,Z,Z,mu2) + else: + ZA = Z * self.variances + ZAinner = self._ZAinner(variational_posterior, Z) + return np.dot(ZAinner, ZA.T) def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - #psi1 - self.update_gradients_full(dL_dpsi1, variational_posterior.mean, Z) - # psi0: - tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior) - if self.ARD: self.variances.gradient += tmp.sum(0) - else: self.variances.gradient += tmp.sum() - #psi2 - if self.ARD: - tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * Z[None, None, :, :]) - self.variances.gradient += 2.*tmp.sum(0).sum(0).sum(0) + if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + gamma = variational_posterior.binary_prob + mu = variational_posterior.mean + S = variational_posterior.variance + mu2S = np.square(mu)+S + + _dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma) + grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\ + np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance) + self.variances.gradient = grad else: - self.variances.gradient += 2.*np.sum(dL_dpsi2 * self.psi2(Z, variational_posterior))/self.variances + #psi1 + self.update_gradients_full(dL_dpsi1, variational_posterior.mean, Z) + # psi0: + tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior) + if self.ARD: self.variances.gradient += tmp.sum(0) + else: self.variances.gradient += tmp.sum() + #psi2 + if self.ARD: + tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * Z[None, None, :, :]) + self.variances.gradient += 2.*tmp.sum(0).sum(0).sum(0) + else: + self.variances.gradient += 2.*np.sum(dL_dpsi2 * self.psi2(Z, variational_posterior))/self.variances def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - #psi1 - grad = self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean) - #psi2 - self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad) - return grad + if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + gamma = variational_posterior.binary_prob + mu = variational_posterior.mean + S = variational_posterior.variance + _, _, _, _, _dpsi2_dZ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma) + + grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\ + np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ) + + return grad + else: + #psi1 + grad = self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean) + #psi2 + self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad) + return grad def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape) - # psi0 - grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances) - grad_S += dL_dpsi0[:, None] * self.variances - # psi1 - grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) - # psi2 - self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S) - - return grad_mu, grad_S + if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + gamma = variational_posterior.binary_prob + mu = variational_posterior.mean + S = variational_posterior.variance + mu2S = np.square(mu)+S + _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma) + + grad_gamma = np.einsum('n,q,nq->nq',dL_dpsi0,self.variances,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,self.variances,Z,mu) +\ + np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dgamma) + grad_mu = np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*self.variances,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,self.variances,Z) +\ + np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dmu) + grad_S = np.einsum('n,nq,q->nq',dL_dpsi0,gamma,self.variances) + np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dS) + + return grad_mu, grad_S, grad_gamma + else: + grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape) + # psi0 + grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances) + grad_S += dL_dpsi0[:, None] * self.variances + # psi1 + grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) + # psi2 + self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S) + + return grad_mu, grad_S #--------------------------------------------------# # Helpers for psi statistics # diff --git a/GPy/kern/_src/rbf_psi_comp/__init__.py b/GPy/kern/_src/psi_comp/__init__.py similarity index 100% rename from GPy/kern/_src/rbf_psi_comp/__init__.py rename to GPy/kern/_src/psi_comp/__init__.py diff --git a/GPy/kern/_src/psi_comp/linear_psi_comp.py b/GPy/kern/_src/psi_comp/linear_psi_comp.py new file mode 100644 index 00000000..22147366 --- /dev/null +++ b/GPy/kern/_src/psi_comp/linear_psi_comp.py @@ -0,0 +1,51 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +""" +The package for the Psi statistics computation of the linear kernel for SSGPLVM +""" + +import numpy as np +from GPy.util.caching import Cache_this + +#@Cache_this(limit=1) +def _psi2computations(variance, Z, mu, S, gamma): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 and psi2 + # Produced intermediate results: + # _psi2 NxMxM + # _psi2_dvariance NxMxMxQ + # _psi2_dZ NxMxQ + # _psi2_dgamma NxMxMxQ + # _psi2_dmu NxMxMxQ + # _psi2_dS NxMxMxQ + + mu2 = np.square(mu) + gamma2 = np.square(gamma) + variance2 = np.square(variance) + mu2S = mu2+S # NxQ + common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM + + _dpsi2_dvariance = np.einsum('nq,q,mq,oq->nmoq',2.*(gamma*mu2S-gamma2*mu2),variance,Z,Z)+\ + np.einsum('nq,mq,nq,no->nmoq',gamma,Z,mu,common_sum)+\ + np.einsum('nq,oq,nq,nm->nmoq',gamma,Z,mu,common_sum) + + _dpsi2_dgamma = np.einsum('q,mq,oq,nq->nmoq',variance2,Z,Z,(mu2S-2.*gamma*mu2))+\ + np.einsum('q,mq,nq,no->nmoq',variance,Z,mu,common_sum)+\ + np.einsum('q,oq,nq,nm->nmoq',variance,Z,mu,common_sum) + + _dpsi2_dmu = np.einsum('q,mq,oq,nq,nq->nmoq',variance2,Z,Z,mu,2.*(gamma-gamma2))+\ + np.einsum('nq,q,mq,no->nmoq',gamma,variance,Z,common_sum)+\ + np.einsum('nq,q,oq,nm->nmoq',gamma,variance,Z,common_sum) + + _dpsi2_dS = np.einsum('nq,q,mq,oq->nmoq',gamma,variance2,Z,Z) + + _dpsi2_dZ = 2.*(np.einsum('nq,q,mq,nq->nmq',gamma,variance2,Z,mu2S)+np.einsum('nq,q,nq,nm->nmq',gamma,variance,mu,common_sum) + -np.einsum('nq,q,mq,nq->nmq',gamma2,variance2,Z,mu2)) + + return _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ \ No newline at end of file diff --git a/GPy/kern/_src/rbf_psi_comp/ssrbf_psi_comp.py b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py similarity index 100% rename from GPy/kern/_src/rbf_psi_comp/ssrbf_psi_comp.py rename to GPy/kern/_src/psi_comp/ssrbf_psi_comp.py diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 498ab0ac..cd6c41e9 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -8,7 +8,7 @@ from ...util.misc import param_to_array from stationary import Stationary from GPy.util.caching import Cache_this from ...core.parameterization import variational -from rbf_psi_comp import ssrbf_psi_comp +from psi_comp import ssrbf_psi_comp class RBF(Stationary): """ diff --git a/GPy/kern/_src/ssrbf.py b/GPy/kern/_src/ssrbf.py index 391ef1c7..c566c414 100644 --- a/GPy/kern/_src/ssrbf.py +++ b/GPy/kern/_src/ssrbf.py @@ -7,7 +7,7 @@ import numpy as np from ...util.linalg import tdot from ...util.config import * from stationary import Stationary -from rbf_psi_comp import ssrbf_psi_comp +from psi_comp import ssrbf_psi_comp class SSRBF(Stationary): """ From a7b1f30c467badccb0f5b93b112de08eec32b933 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Thu, 6 Mar 2014 10:24:19 +0000 Subject: [PATCH 3/5] [SSGPLVM] support linear kernel with ARD off --- GPy/kern/_src/linear.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 8ed7733f..60645d11 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -110,6 +110,7 @@ class Linear(Kern): gamma = variational_posterior.binary_prob mu = variational_posterior.mean S = variational_posterior.variance + return np.einsum('q,nq,nq->n',self.variances,gamma,np.square(mu)+S) # return (self.variances*gamma*(np.square(mu)+S)).sum(axis=1) else: @@ -150,7 +151,10 @@ class Linear(Kern): _dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma) grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\ np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance) - self.variances.gradient = grad + if self.ARD: + self.variances.gradient = grad + else: + self.variances.gradient = grad.sum() else: #psi1 self.update_gradients_full(dL_dpsi1, variational_posterior.mean, Z) From 8177d63309b154d76c33dafa3fea893e5a8792c6 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Thu, 6 Mar 2014 16:32:19 +0000 Subject: [PATCH 4/5] [SSGPLVM] new plot variational posterior --- GPy/core/parameterization/variational.py | 2 +- GPy/examples/dimensionality_reduction.py | 25 +++++++++++ GPy/plotting/matplot_dep/variational_plots.py | 45 +++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index 71921ab1..8bc7ca59 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -117,4 +117,4 @@ class SpikeAndSlabPosterior(VariationalPosterior): import sys assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ...plotting.matplot_dep import variational_plots - return variational_plots.plot(self,*args) + return variational_plots.plot_SpikeSlab(self,*args) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 9ebb54a2..818dff69 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -515,3 +515,28 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose lvm_visualizer.close() return m + +def ssgplvm_simulation_linear(): + import numpy as np + import GPy + N, D, Q = 1000, 20, 5 + pi = 0.2 + + def sample_X(Q, pi): + x = np.empty(Q) + dies = np.random.rand(Q) + for q in xrange(Q): + if dies[q] Date: Thu, 6 Mar 2014 16:52:39 +0000 Subject: [PATCH 5/5] add const_jitter back to varDTC --- GPy/inference/latent_function_inference/var_dtc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 1d998fcb..d35080f6 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -79,7 +79,7 @@ class VarDTC(object): # kernel computations, using BGPLVM notation Kmm = kern.K(Z) - Lm = jitchol(Kmm) + Lm = jitchol(Kmm+np.eye(Z.shape[0])*self.const_jitter) # The rather complex computations of A if uncertain_inputs: