From d90d67a8c18fec52590e29f68cb411c00dbc1677 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Feb 2014 11:45:18 +0000 Subject: [PATCH 1/2] Revert "changed to 'update_gradients_q_variational'" This reverts commit f311bfdf17c78bc4f56f03514d4e28b26e2e5057. --- GPy/core/parameterization/variational.py | 4 ++-- GPy/kern/_src/rbf.py | 7 +++---- GPy/models/bayesian_gplvm.py | 4 +++- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index 05ce2109..d1c0faf8 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -63,7 +63,7 @@ class NormalPosterior(VariationalPosterior): from ...plotting.matplot_dep import variational_plots return variational_plots.plot(self,*args) -class SpikeAndSlabPosterior(VariationalPosterior): +class SpikeAndSlab(VariationalPosterior): ''' The SpikeAndSlab distribution for variational approximations. ''' @@ -71,7 +71,7 @@ class SpikeAndSlabPosterior(VariationalPosterior): """ binary_prob : the probability of the distribution on the slab part. """ - super(SpikeAndSlabPosterior, self).__init__(means, variances, name) + super(SpikeAndSlab, self).__init__(means, variances, name) self.gamma = Param("binary_prob",binary_prob,) self.add_parameter(self.gamma) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index e23e9e2c..0c8588a2 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -182,7 +182,7 @@ class RBF(Kern): return grad - def update_gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): + def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): mu = posterior_variational.mean S = posterior_variational.variance self._psi_computations(Z, mu, S) @@ -194,9 +194,8 @@ class RBF(Kern): tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) - - posterior_variational.mean.gradient = grad_mu - posterior_variational.variance.gradient = grad_S + + return grad_mu, grad_S def gradients_X(self, dL_dK, X, X2=None): #if self._X is None or X.base is not self._X.base or X2 is not None: diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index a8d643b9..7b09e0b1 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -63,7 +63,9 @@ class BayesianGPLVM(SparseGP, GPLVM): super(BayesianGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.q) - self.kern.update_gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) + # TODO: This has to go into kern + # maybe a update_gradients_q_variational? + self.q.mean.gradient, self.q.variance.gradient = self.kern.gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) # update for the KL divergence self.variational_prior.update_gradients_KL(self.q) From b200b9fa903f0d35dde61c68467dec0e4b8838af Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Feb 2014 14:47:43 +0000 Subject: [PATCH 2/2] input_sensitivity and ard plotting --- GPy/core/parameterization/parameter_core.py | 4 + GPy/examples/dimensionality_reduction.py | 5 +- GPy/kern/_src/add.py | 24 ++++-- GPy/kern/_src/bias.py | 6 +- GPy/kern/_src/kern.py | 20 +++-- GPy/kern/_src/linear.py | 3 + GPy/kern/_src/rbf.py | 4 + GPy/models/bayesian_gplvm.py | 5 +- GPy/models/gplvm.py | 23 ++--- GPy/plotting/matplot_dep/kernel_plots.py | 96 ++++++++++++--------- GPy/util/__init__.py | 1 + 11 files changed, 108 insertions(+), 83 deletions(-) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index c2c8a05a..28d63b02 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -340,6 +340,10 @@ class Parameterizable(Constrainable): if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) return names + @property + def num_params(self): + return len(self._parameters_) + def _add_parameter_name(self, param): pname = adjust_name_for_printing(param.name) # and makes sure to not delete programmatically added parameters diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index b6030eb7..a2686d73 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -164,12 +164,11 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, _np.random.seed(0) data = GPy.util.datasets.oil() - kernel = GPy.kern.RBF(Q, 1., [.1] * Q, ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, 1., _np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2)) Y = data['X'][:N] m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k) m.data_labels = data['Y'][:N].argmax(axis=1) - m['.*noise.var'] = Y.var() / 100. - + if optimize: m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index d5515d98..45800dbf 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -83,7 +83,7 @@ class Add(Kern): from white import White from rbf import RBF #from rbf_inv import RBFInv - #from bias import Bias + from bias import Bias from linear import Linear #ffrom fixed import Fixed @@ -131,11 +131,11 @@ class Add(Kern): def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - from white import white - from rbf import rbf + from white import White + from rbf import RBF #from rbf_inv import rbfinv - #from bias import bias - from linear import linear + from bias import Bias + from linear import Linear #ffrom fixed import fixed target = np.zeros(Z.shape) @@ -146,15 +146,15 @@ class Add(Kern): for p2, is2 in zip(self._parameters_, self.input_slices): if p2 is p1: continue - if isinstance(p2, white): + if isinstance(p2, White): continue - elif isinstance(p2, bias): + elif isinstance(p2, Bias): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. else: - eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. - target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) + target += p1.gradients_z_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) return target def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): @@ -195,6 +195,12 @@ class Add(Kern): from ..plotting.matplot_dep import kernel_plots kernel_plots.plot(self,*args) + def input_sensitivity(self): + in_sen = np.zeros((self.input_dim, self.num_params)) + for i, [p, i_s] in enumerate(zip(self._parameters_, self.input_slices)): + in_sen[i_s, i] = p.input_sensitivity() + return in_sen + def _getstate(self): """ Get the current state of the class, diff --git a/GPy/kern/_src/bias.py b/GPy/kern/_src/bias.py index e1938c95..4eaa9b5c 100644 --- a/GPy/kern/_src/bias.py +++ b/GPy/kern/_src/bias.py @@ -28,12 +28,12 @@ class Bias(Kern): self.variance.gradient = dL_dK.sum() def update_gradients_diag(self, dL_dKdiag, X): - self.variance.gradient = dL_dK.sum() + self.variance.gradient = dL_dKdiag.sum() - def gradients_X(self, dL_dK,X, X2, target): + def gradients_X(self, dL_dK,X, X2): return np.zeros(X.shape) - def gradients_X_diag(self,dL_dKdiag,X,target): + def gradients_X_diag(self,dL_dKdiag,X): return np.zeros(X.shape) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 8bd9b6d1..172dbdd1 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -61,16 +61,20 @@ class Kern(Parameterized): def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): raise NotImplementedError - def plot_ARD(self, *args): - """If an ARD kernel is present, plot a bar representation using matplotlib - - See GPy.plotting.matplot_dep.plot_ARD - """ + def plot_ARD(self, *args, **kw): + if "matplotlib" in sys.modules: + from ...plotting.matplot_dep import kernel_plots + self.plot_ARD.__doc__ += kernel_plots.plot_ARD.__doc__ assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ...plotting.matplot_dep import kernel_plots - return kernel_plots.plot_ARD(self,*args) - - + return kernel_plots.plot_ARD(self,*args,**kw) + + def input_sensitivity(self): + """ + Returns the sensitivity for each dimension of this kernel. + """ + return np.zeros(self.input_dim) + def __add__(self, other): """ Overloading of the '+' operator. for more control, see self.add """ return self.add(other) diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index a66b3705..2c4e9fa9 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -252,3 +252,6 @@ class Linear(Kern): return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]! + def input_sensitivity(self): + if self.ARD: return self.variances + else: return self.variances.repeat(self.input_dim) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 0c8588a2..a0e23b2b 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -382,3 +382,7 @@ class RBF(Kern): type_converters=weave.converters.blitz, **self.weave_options) return mudist, mudist_sq, psi2_exponent, psi2 + + def input_sensitivity(self): + if self.ARD: return 1./self.lengthscale + else: return (1./self.lengthscale).repeat(self.input_dim) \ No newline at end of file diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 7b09e0b1..74b8abe0 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -10,7 +10,7 @@ from ..inference.optimization import SCG from ..util import linalg from ..core.parameterization.variational import NormalPosterior, NormalPrior -class BayesianGPLVM(SparseGP, GPLVM): +class BayesianGPLVM(SparseGP): """ Bayesian Gaussian Process Latent Variable Model @@ -25,7 +25,8 @@ class BayesianGPLVM(SparseGP, GPLVM): def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs): if X == None: - X = self.initialise_latent(init, input_dim, Y) + from ..util.initialization import initialize_latent + X = initialize_latent(init, input_dim, Y) self.init = init if X_variance is None: diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index 630b1e8c..c2fd46fe 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -28,28 +28,20 @@ class GPLVM(GP): :type init: 'PCA'|'random' """ if X is None: - X = self.initialise_latent(init, input_dim, Y) + from ..util.initialization import initialize_latent + X = initialize_latent(init, input_dim, Y) if kernel is None: - kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2)) + kernel = kern.RBF(input_dim, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2)) likelihood = Gaussian() super(GPLVM, self).__init__(X, Y, kernel, likelihood, name='GPLVM') - self.X = Param('X', X) + self.X = Param('latent_mean', X) self.add_parameter(self.X, index=0) - def initialise_latent(self, init, input_dim, Y): - Xr = np.random.randn(Y.shape[0], input_dim) - if init == 'PCA': - PC = PCA(Y, input_dim)[0] - Xr[:PC.shape[0], :PC.shape[1]] = PC - else: - pass - return Xr - def parameters_changed(self): - GP.parameters_changed(self) - self.X.gradient = self.kern.gradients_X(self.posterior.dL_dK, self.X) + super(GPLVM, self).parameters_changed() + self.X.gradient = self.kern.gradients_X(self._dL_dK, self.X, None) def _getstate(self): return GP._getstate(self) @@ -79,7 +71,8 @@ class GPLVM(GP): pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5) def plot_latent(self, *args, **kwargs): - return util.plot_latent.plot_latent(self, *args, **kwargs) + from ..plotting.matplot_dep import dim_reduction_plots + return dim_reduction_plots.plot_latent(self, *args, **kwargs) def plot_magnification(self, *args, **kwargs): return util.plot_latent.plot_magnification(self, *args, **kwargs) diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py index 3436c4ff..6d4a7f0f 100644 --- a/GPy/plotting/matplot_dep/kernel_plots.py +++ b/GPy/plotting/matplot_dep/kernel_plots.py @@ -9,8 +9,41 @@ from matplotlib.transforms import offset_copy from ...kern import Linear + +def add_bar_labels(fig, ax, bars, bottom=0): + transOffset = offset_copy(ax.transData, fig=fig, + x=0., y= -2., units='points') + transOffsetUp = offset_copy(ax.transData, fig=fig, + x=0., y=1., units='points') + for bar in bars: + for i, [patch, num] in enumerate(zip(bar.patches, np.arange(len(bar.patches)))): + if len(bottom) == len(bar): b = bottom[i] + else: b = bottom + height = patch.get_height() + b + xi = patch.get_x() + patch.get_width() / 2. + va = 'top' + c = 'w' + t = TextPath((0, 0), "${xi}$".format(xi=xi), rotation=0, usetex=True, ha='center') + transform = transOffset + if patch.get_extents().height <= t.get_extents().height + 3: + va = 'bottom' + c = 'k' + transform = transOffsetUp + ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va, transform=transform) + + ax.set_xticks([]) + + +def plot_bars(fig, ax, x, ard_params, color, name, bottom=0): + from ...util.misc import param_to_array + return ax.bar(left=x, height=param_to_array(ard_params), width=.8, + bottom=bottom, align='center', + color=color, edgecolor='k', linewidth=1.2, + label=name.replace("_"," ")) + def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): - """If an ARD kernel is present, plot a bar representation using matplotlib + """ + If an ARD kernel is present, plot a bar representation using matplotlib :param fignum: figure number of the plot :param ax: matplotlib axis to plot on @@ -24,50 +57,27 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): ax = fig.add_subplot(111) else: fig = ax.figure + + if title is None: + ax.set_title('ARD parameters, %s kernel' % kernel.name) + else: + ax.set_title(title) + Tango.reset() - xticklabels = [] bars = [] - x0 = 0 - #for p in kernel._parameters_: - p = kernel - c = Tango.nextMedium() - if hasattr(p, 'ARD') and p.ARD: - if title is None: - ax.set_title('ARD parameters, %s kernel' % p.name) - else: - ax.set_title(title) - if isinstance(p, Linear): - ard_params = p.variances - else: - ard_params = 1. / p.lengthscale - x = np.arange(x0, x0 + len(ard_params)) - from ...util.misc import param_to_array - bars.append(ax.bar(x, param_to_array(ard_params), align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name.replace("_"," "))) - xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))]) - x0 += len(ard_params) - x = np.arange(x0) - transOffset = offset_copy(ax.transData, fig=fig, - x=0., y= -2., units='points') - transOffsetUp = offset_copy(ax.transData, fig=fig, - x=0., y=1., units='points') - for bar in bars: - for patch, num in zip(bar.patches, np.arange(len(bar.patches))): - height = patch.get_height() - xi = patch.get_x() + patch.get_width() / 2. - va = 'top' - c = 'w' - t = TextPath((0, 0), "${xi}$".format(xi=xi), rotation=0, usetex=True, ha='center') - transform = transOffset - if patch.get_extents().height <= t.get_extents().height + 3: - va = 'bottom' - c = 'k' - transform = transOffsetUp - ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va, transform=transform) - # for xi, t in zip(x, xticklabels): - # ax.text(xi, maxi / 2, t, rotation=90, ha='center', va='center') - # ax.set_xticklabels(xticklabels, rotation=17) - ax.set_xticks([]) - ax.set_xlim(-.5, x0 - .5) + + ard_params = np.atleast_2d(kernel.input_sensitivity()) + bottom = 0 + x = np.arange(kernel.input_dim) + + for i in range(ard_params.shape[-1]): + c = Tango.nextMedium() + bars.append(plot_bars(fig, ax, x, ard_params[:,i], c, kernel._parameters_[i].name, bottom=bottom)) + bottom += ard_params[:,i] + + ax.set_xlim(-.5, kernel.input_dim - .5) + add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-ard_params[:,i]) + if legend: if title is '': mode = 'expand' diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py index f93bb0ec..1666fa35 100644 --- a/GPy/util/__init__.py +++ b/GPy/util/__init__.py @@ -13,6 +13,7 @@ import classification import subarray_and_sorting import caching import diag +import initialization try: import sympy