From d90d67a8c18fec52590e29f68cb411c00dbc1677 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Feb 2014 11:45:18 +0000 Subject: [PATCH 1/9] Revert "changed to 'update_gradients_q_variational'" This reverts commit f311bfdf17c78bc4f56f03514d4e28b26e2e5057. --- GPy/core/parameterization/variational.py | 4 ++-- GPy/kern/_src/rbf.py | 7 +++---- GPy/models/bayesian_gplvm.py | 4 +++- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index 05ce2109..d1c0faf8 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -63,7 +63,7 @@ class NormalPosterior(VariationalPosterior): from ...plotting.matplot_dep import variational_plots return variational_plots.plot(self,*args) -class SpikeAndSlabPosterior(VariationalPosterior): +class SpikeAndSlab(VariationalPosterior): ''' The SpikeAndSlab distribution for variational approximations. ''' @@ -71,7 +71,7 @@ class SpikeAndSlabPosterior(VariationalPosterior): """ binary_prob : the probability of the distribution on the slab part. """ - super(SpikeAndSlabPosterior, self).__init__(means, variances, name) + super(SpikeAndSlab, self).__init__(means, variances, name) self.gamma = Param("binary_prob",binary_prob,) self.add_parameter(self.gamma) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index e23e9e2c..0c8588a2 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -182,7 +182,7 @@ class RBF(Kern): return grad - def update_gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): + def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): mu = posterior_variational.mean S = posterior_variational.variance self._psi_computations(Z, mu, S) @@ -194,9 +194,8 @@ class RBF(Kern): tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) - - posterior_variational.mean.gradient = grad_mu - posterior_variational.variance.gradient = grad_S + + return grad_mu, grad_S def gradients_X(self, dL_dK, X, X2=None): #if self._X is None or X.base is not self._X.base or X2 is not None: diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index a8d643b9..7b09e0b1 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -63,7 +63,9 @@ class BayesianGPLVM(SparseGP, GPLVM): super(BayesianGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.q) - self.kern.update_gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) + # TODO: This has to go into kern + # maybe a update_gradients_q_variational? + self.q.mean.gradient, self.q.variance.gradient = self.kern.gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) # update for the KL divergence self.variational_prior.update_gradients_KL(self.q) From b200b9fa903f0d35dde61c68467dec0e4b8838af Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Feb 2014 14:47:43 +0000 Subject: [PATCH 2/9] input_sensitivity and ard plotting --- GPy/core/parameterization/parameter_core.py | 4 + GPy/examples/dimensionality_reduction.py | 5 +- GPy/kern/_src/add.py | 24 ++++-- GPy/kern/_src/bias.py | 6 +- GPy/kern/_src/kern.py | 20 +++-- GPy/kern/_src/linear.py | 3 + GPy/kern/_src/rbf.py | 4 + GPy/models/bayesian_gplvm.py | 5 +- GPy/models/gplvm.py | 23 ++--- GPy/plotting/matplot_dep/kernel_plots.py | 96 ++++++++++++--------- GPy/util/__init__.py | 1 + 11 files changed, 108 insertions(+), 83 deletions(-) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index c2c8a05a..28d63b02 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -340,6 +340,10 @@ class Parameterizable(Constrainable): if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) return names + @property + def num_params(self): + return len(self._parameters_) + def _add_parameter_name(self, param): pname = adjust_name_for_printing(param.name) # and makes sure to not delete programmatically added parameters diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index b6030eb7..a2686d73 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -164,12 +164,11 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, _np.random.seed(0) data = GPy.util.datasets.oil() - kernel = GPy.kern.RBF(Q, 1., [.1] * Q, ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, 1., _np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2)) Y = data['X'][:N] m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k) m.data_labels = data['Y'][:N].argmax(axis=1) - m['.*noise.var'] = Y.var() / 100. - + if optimize: m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index d5515d98..45800dbf 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -83,7 +83,7 @@ class Add(Kern): from white import White from rbf import RBF #from rbf_inv import RBFInv - #from bias import Bias + from bias import Bias from linear import Linear #ffrom fixed import Fixed @@ -131,11 +131,11 @@ class Add(Kern): def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - from white import white - from rbf import rbf + from white import White + from rbf import RBF #from rbf_inv import rbfinv - #from bias import bias - from linear import linear + from bias import Bias + from linear import Linear #ffrom fixed import fixed target = np.zeros(Z.shape) @@ -146,15 +146,15 @@ class Add(Kern): for p2, is2 in zip(self._parameters_, self.input_slices): if p2 is p1: continue - if isinstance(p2, white): + if isinstance(p2, White): continue - elif isinstance(p2, bias): + elif isinstance(p2, Bias): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. else: - eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. - target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) + target += p1.gradients_z_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) return target def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): @@ -195,6 +195,12 @@ class Add(Kern): from ..plotting.matplot_dep import kernel_plots kernel_plots.plot(self,*args) + def input_sensitivity(self): + in_sen = np.zeros((self.input_dim, self.num_params)) + for i, [p, i_s] in enumerate(zip(self._parameters_, self.input_slices)): + in_sen[i_s, i] = p.input_sensitivity() + return in_sen + def _getstate(self): """ Get the current state of the class, diff --git a/GPy/kern/_src/bias.py b/GPy/kern/_src/bias.py index e1938c95..4eaa9b5c 100644 --- a/GPy/kern/_src/bias.py +++ b/GPy/kern/_src/bias.py @@ -28,12 +28,12 @@ class Bias(Kern): self.variance.gradient = dL_dK.sum() def update_gradients_diag(self, dL_dKdiag, X): - self.variance.gradient = dL_dK.sum() + self.variance.gradient = dL_dKdiag.sum() - def gradients_X(self, dL_dK,X, X2, target): + def gradients_X(self, dL_dK,X, X2): return np.zeros(X.shape) - def gradients_X_diag(self,dL_dKdiag,X,target): + def gradients_X_diag(self,dL_dKdiag,X): return np.zeros(X.shape) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index 8bd9b6d1..172dbdd1 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -61,16 +61,20 @@ class Kern(Parameterized): def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): raise NotImplementedError - def plot_ARD(self, *args): - """If an ARD kernel is present, plot a bar representation using matplotlib - - See GPy.plotting.matplot_dep.plot_ARD - """ + def plot_ARD(self, *args, **kw): + if "matplotlib" in sys.modules: + from ...plotting.matplot_dep import kernel_plots + self.plot_ARD.__doc__ += kernel_plots.plot_ARD.__doc__ assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ...plotting.matplot_dep import kernel_plots - return kernel_plots.plot_ARD(self,*args) - - + return kernel_plots.plot_ARD(self,*args,**kw) + + def input_sensitivity(self): + """ + Returns the sensitivity for each dimension of this kernel. + """ + return np.zeros(self.input_dim) + def __add__(self, other): """ Overloading of the '+' operator. for more control, see self.add """ return self.add(other) diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index a66b3705..2c4e9fa9 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -252,3 +252,6 @@ class Linear(Kern): return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]! + def input_sensitivity(self): + if self.ARD: return self.variances + else: return self.variances.repeat(self.input_dim) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 0c8588a2..a0e23b2b 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -382,3 +382,7 @@ class RBF(Kern): type_converters=weave.converters.blitz, **self.weave_options) return mudist, mudist_sq, psi2_exponent, psi2 + + def input_sensitivity(self): + if self.ARD: return 1./self.lengthscale + else: return (1./self.lengthscale).repeat(self.input_dim) \ No newline at end of file diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 7b09e0b1..74b8abe0 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -10,7 +10,7 @@ from ..inference.optimization import SCG from ..util import linalg from ..core.parameterization.variational import NormalPosterior, NormalPrior -class BayesianGPLVM(SparseGP, GPLVM): +class BayesianGPLVM(SparseGP): """ Bayesian Gaussian Process Latent Variable Model @@ -25,7 +25,8 @@ class BayesianGPLVM(SparseGP, GPLVM): def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs): if X == None: - X = self.initialise_latent(init, input_dim, Y) + from ..util.initialization import initialize_latent + X = initialize_latent(init, input_dim, Y) self.init = init if X_variance is None: diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index 630b1e8c..c2fd46fe 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -28,28 +28,20 @@ class GPLVM(GP): :type init: 'PCA'|'random' """ if X is None: - X = self.initialise_latent(init, input_dim, Y) + from ..util.initialization import initialize_latent + X = initialize_latent(init, input_dim, Y) if kernel is None: - kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2)) + kernel = kern.RBF(input_dim, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2)) likelihood = Gaussian() super(GPLVM, self).__init__(X, Y, kernel, likelihood, name='GPLVM') - self.X = Param('X', X) + self.X = Param('latent_mean', X) self.add_parameter(self.X, index=0) - def initialise_latent(self, init, input_dim, Y): - Xr = np.random.randn(Y.shape[0], input_dim) - if init == 'PCA': - PC = PCA(Y, input_dim)[0] - Xr[:PC.shape[0], :PC.shape[1]] = PC - else: - pass - return Xr - def parameters_changed(self): - GP.parameters_changed(self) - self.X.gradient = self.kern.gradients_X(self.posterior.dL_dK, self.X) + super(GPLVM, self).parameters_changed() + self.X.gradient = self.kern.gradients_X(self._dL_dK, self.X, None) def _getstate(self): return GP._getstate(self) @@ -79,7 +71,8 @@ class GPLVM(GP): pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5) def plot_latent(self, *args, **kwargs): - return util.plot_latent.plot_latent(self, *args, **kwargs) + from ..plotting.matplot_dep import dim_reduction_plots + return dim_reduction_plots.plot_latent(self, *args, **kwargs) def plot_magnification(self, *args, **kwargs): return util.plot_latent.plot_magnification(self, *args, **kwargs) diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py index 3436c4ff..6d4a7f0f 100644 --- a/GPy/plotting/matplot_dep/kernel_plots.py +++ b/GPy/plotting/matplot_dep/kernel_plots.py @@ -9,8 +9,41 @@ from matplotlib.transforms import offset_copy from ...kern import Linear + +def add_bar_labels(fig, ax, bars, bottom=0): + transOffset = offset_copy(ax.transData, fig=fig, + x=0., y= -2., units='points') + transOffsetUp = offset_copy(ax.transData, fig=fig, + x=0., y=1., units='points') + for bar in bars: + for i, [patch, num] in enumerate(zip(bar.patches, np.arange(len(bar.patches)))): + if len(bottom) == len(bar): b = bottom[i] + else: b = bottom + height = patch.get_height() + b + xi = patch.get_x() + patch.get_width() / 2. + va = 'top' + c = 'w' + t = TextPath((0, 0), "${xi}$".format(xi=xi), rotation=0, usetex=True, ha='center') + transform = transOffset + if patch.get_extents().height <= t.get_extents().height + 3: + va = 'bottom' + c = 'k' + transform = transOffsetUp + ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va, transform=transform) + + ax.set_xticks([]) + + +def plot_bars(fig, ax, x, ard_params, color, name, bottom=0): + from ...util.misc import param_to_array + return ax.bar(left=x, height=param_to_array(ard_params), width=.8, + bottom=bottom, align='center', + color=color, edgecolor='k', linewidth=1.2, + label=name.replace("_"," ")) + def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): - """If an ARD kernel is present, plot a bar representation using matplotlib + """ + If an ARD kernel is present, plot a bar representation using matplotlib :param fignum: figure number of the plot :param ax: matplotlib axis to plot on @@ -24,50 +57,27 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): ax = fig.add_subplot(111) else: fig = ax.figure + + if title is None: + ax.set_title('ARD parameters, %s kernel' % kernel.name) + else: + ax.set_title(title) + Tango.reset() - xticklabels = [] bars = [] - x0 = 0 - #for p in kernel._parameters_: - p = kernel - c = Tango.nextMedium() - if hasattr(p, 'ARD') and p.ARD: - if title is None: - ax.set_title('ARD parameters, %s kernel' % p.name) - else: - ax.set_title(title) - if isinstance(p, Linear): - ard_params = p.variances - else: - ard_params = 1. / p.lengthscale - x = np.arange(x0, x0 + len(ard_params)) - from ...util.misc import param_to_array - bars.append(ax.bar(x, param_to_array(ard_params), align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name.replace("_"," "))) - xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))]) - x0 += len(ard_params) - x = np.arange(x0) - transOffset = offset_copy(ax.transData, fig=fig, - x=0., y= -2., units='points') - transOffsetUp = offset_copy(ax.transData, fig=fig, - x=0., y=1., units='points') - for bar in bars: - for patch, num in zip(bar.patches, np.arange(len(bar.patches))): - height = patch.get_height() - xi = patch.get_x() + patch.get_width() / 2. - va = 'top' - c = 'w' - t = TextPath((0, 0), "${xi}$".format(xi=xi), rotation=0, usetex=True, ha='center') - transform = transOffset - if patch.get_extents().height <= t.get_extents().height + 3: - va = 'bottom' - c = 'k' - transform = transOffsetUp - ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va, transform=transform) - # for xi, t in zip(x, xticklabels): - # ax.text(xi, maxi / 2, t, rotation=90, ha='center', va='center') - # ax.set_xticklabels(xticklabels, rotation=17) - ax.set_xticks([]) - ax.set_xlim(-.5, x0 - .5) + + ard_params = np.atleast_2d(kernel.input_sensitivity()) + bottom = 0 + x = np.arange(kernel.input_dim) + + for i in range(ard_params.shape[-1]): + c = Tango.nextMedium() + bars.append(plot_bars(fig, ax, x, ard_params[:,i], c, kernel._parameters_[i].name, bottom=bottom)) + bottom += ard_params[:,i] + + ax.set_xlim(-.5, kernel.input_dim - .5) + add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-ard_params[:,i]) + if legend: if title is '': mode = 'expand' diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py index f93bb0ec..1666fa35 100644 --- a/GPy/util/__init__.py +++ b/GPy/util/__init__.py @@ -13,6 +13,7 @@ import classification import subarray_and_sorting import caching import diag +import initialization try: import sympy From 06dd27c634a87fe86596bf09790061dbf6255fb9 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 24 Feb 2014 14:49:20 +0000 Subject: [PATCH 3/9] input senitivity in stationary --- GPy/kern/_src/stationary.py | 54 ++++--------------------------------- 1 file changed, 5 insertions(+), 49 deletions(-) diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index dde26e63..e8586d07 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -35,6 +35,9 @@ class Stationary(Kern): def K_of_r(self, r): raise NotImplementedError, "implement the covaraiance functino and a fn of r to use this class" + def dK_dr(self, r): + raise NotImplementedError, "implement the covaraiance functino and a fn of r to use this class" + def K(self, X, X2=None): r = self._scaled_dist(X, X2) return self.K_of_r(r) @@ -92,55 +95,8 @@ class Stationary(Kern): def gradients_X_diag(self, dL_dKdiag, X): return np.zeros(X.shape) - def add(self, other, tensor=False): - if not tensor: - return StatAdd(self, other) - else: - return super(Stationary, self).add(other, tensor) - - def prod(self, other, tensor=False): - if not tensor: - return StatProd(self, other) - else: - return super(Stationary, self).prod(other, tensor) - - - -class StatAdd(Stationary): - """ - Addition of two Stationary kernels on the same space is still stationary. - - If you need to add two (stationary) kernels on separate spaces, use the generic add class. - """ - def __init__(self, k1, k2): - assert isinstance(k1, Stationary) - assert isinstance(k2, Stationary) - self.k1, self.k2 = k1, k2 - self.add_parameters(k1, k2) - - def K_of_r(self, r): - return self.k1.K(r) + self.k2.K(r) - - def dK_dr(self, r): - return self.k1.dK_dr + self.k2.dK_dr(r) - -class StatProd(Stationary): - """ - Product of two Stationary kernels on the same space is still stationary. - - If you need to multiply two (stationary) kernels on separate spaces, use the generic Prod class. - """ - def __init__(self, k1, k2): - assert isinstance(k1, Stationary) - assert isinstance(k2, Stationary) - self.k1, self.k2 = k1, k2 - self.add_parameters(k1, k2) - - def K_of_r(self, r): - return self.k1.K(r) * self.k2.K(r) - - def dK_dr(self, r): - return self.k1.dK_dr(r) * self.k2.K_of_r(r) + self.k2.dK_dr(r) * self.k1.K_of_r(r) + def input_sensitivity(self): + return np.ones(self.input_dim)/self.lengthscale class Exponential(Stationary): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'): From b32929a8a5eb68a438a706097b5b68a84a634aec Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 24 Feb 2014 14:53:20 +0000 Subject: [PATCH 4/9] fixed stationary --- GPy/kern/_src/stationary.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index e8586d07..19f531f2 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -102,8 +102,8 @@ class Exponential(Stationary): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'): super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name) - def K(self, X, X2=None): - return self.variance * np.exp(-0.5 * dist) + def K_of_r(self, r): + return self.variance * np.exp(-0.5 * r) def dK_dr(self, r): return -0.5*self.K_of_r(r) From 0e01877586647a650c57e125a7e66b71242fb25d Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 24 Feb 2014 14:55:16 +0000 Subject: [PATCH 5/9] stuf in rbf might be broken --- GPy/kern/_src/rbf.py | 182 +++++++++---------------------------------- 1 file changed, 38 insertions(+), 144 deletions(-) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 28115fae..356160ac 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -9,81 +9,39 @@ from ...util.linalg import tdot from ...util.misc import fast_array_equal, param_to_array from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp +from stationary import Stationary -class RBF(Kern): +class RBF(Stationary): """ Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel: .. math:: - k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2} + k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) - where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input. - - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: the variance of the kernel - :type variance: float - :param lengthscale: the vector of lengthscale of the kernel - :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) - :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. - :type ARD: Boolean - :rtype: kernel object - - .. Note: this object implements both the ARD and 'spherical' version of the function """ - def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'): - super(RBF, self).__init__(input_dim, name) - self.input_dim = input_dim - self.ARD = ARD - - if not ARD: - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - lengthscale = np.ones(1) - else: - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - lengthscale = np.ones(self.input_dim) - - self.variance = Param('variance', variance, Logexp()) - - self.lengthscale = Param('lengthscale', lengthscale, Logexp()) - self.lengthscale.add_observer(self, self.update_lengthscale) - self.update_lengthscale(self.lengthscale) - - self.add_parameters(self.variance, self.lengthscale) - self.parameters_changed() # initializes cache - + def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='RBF'): + super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name) self.weave_options = {} - def update_lengthscale(self, l): - self.lengthscale2 = np.square(self.lengthscale) + def K_of_r(self, r): + return self.variance * np.exp(-0.5 * r**2) + + def dK_dr(self, r): + return -r*self.K_of_r(r) + + #---------------------------------------# + # PSI statistics # + #---------------------------------------# def parameters_changed(self): # reset cached results - self._X, self._X2 = np.empty(shape=(2, 1)) self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - def K(self, X, X2=None): - self._K_computations(X, X2) - return self.variance * self._K_dvar - - def Kdiag(self, X): - ret = np.ones(X.shape[0]) - ret[:] = self.variance - return ret def psi0(self, Z, posterior_variational): - mu = posterior_variational.mean - ret = np.empty(mu.shape[0], dtype=np.float64) - ret[:] = self.variance - return ret + return self.Kdiag(posterior_variational.mean) def psi1(self, Z, posterior_variational): mu = posterior_variational.mean @@ -97,55 +55,30 @@ class RBF(Kern): self._psi_computations(Z, mu, S) return self._psi2 - def update_gradients_full(self, dL_dK, X): - self._K_computations(X, None) - self.variance.gradient = np.sum(self._K_dvar * dL_dK) - if self.ARD: - self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dK, X, None) - else: - self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) - - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - #contributions from Kdiag - self.variance.gradient = np.sum(dL_dKdiag) - - #from Knm - self._K_computations(X, Z) - self.variance.gradient += np.sum(dL_dKnm * self._K_dvar) - if self.ARD: - self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z) - - else: - self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm) - - #from Kmm - self._K_computations(Z, None) - self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) - if self.ARD: - self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) - else: - self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): + #contributions from Kmm + sself.update_gradients_full(dL_dKmm, Z) + mu = posterior_variational.mean S = posterior_variational.variance self._psi_computations(Z, mu, S) + l2 = self.lengthscale **2 #contributions from psi0: - self.variance.gradient = np.sum(dL_dpsi0) + self.variance.gradient += np.sum(dL_dpsi0) #from psi1 self.variance.gradient += np.sum(dL_dpsi1 * self._psi1 / self.variance) d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale) dpsi1_dlength = d_length * dL_dpsi1[:, :, None] if not self.ARD: - self.lengthscale.gradient = dpsi1_dlength.sum() + self.lengthscale.gradient += dpsi1_dlength.sum() else: - self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0) + self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0) #from psi2 d_var = 2.*self._psi2 / self.variance - d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom) + d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / l2) / (self.lengthscale * self._psi2_denom) self.variance.gradient += np.sum(dL_dpsi2 * d_var) dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None] @@ -154,27 +87,20 @@ class RBF(Kern): else: self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0) - #from Kmm - self._K_computations(Z, None) - self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) - if self.ARD: - self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) - else: - self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): mu = posterior_variational.mean S = posterior_variational.variance self._psi_computations(Z, mu, S) + l2 = self.lengthscale **2 #psi1 - denominator = (self.lengthscale2 * (self._psi1_denom)) + denominator = (l2 * (self._psi1_denom)) dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0) #psi2 - term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim - term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim + term1 = self._psi2_Zdist / l2 # num_inducing, num_inducing, input_dim + term2 = self._psi2_mudist / self._psi2_denom / l2 # N, num_inducing, num_inducing, input_dim dZ = self._psi2[:, :, :, None] * (term1[None] + term2) grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) @@ -186,55 +112,22 @@ class RBF(Kern): mu = posterior_variational.mean S = posterior_variational.variance self._psi_computations(Z, mu, S) + l2 = self.lengthscale **2 #psi1 - tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom + tmp = self._psi1[:, :, None] / l2 / self._psi1_denom grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) #psi2 - tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom + tmp = self._psi2[:, :, :, None] / l2 / self._psi2_denom grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) return grad_mu, grad_S - def gradients_X(self, dL_dK, X, X2=None): - #if self._X is None or X.base is not self._X.base or X2 is not None: - self._K_computations(X, X2) - if X2 is None: - _K_dist = 2*(X[:, None, :] - X[None, :, :]) - else: - _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. - gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) - return np.sum(gradients_X * dL_dK.T[:, :, None], 0) - - def dKdiag_dX(self, dL_dKdiag, X): - return np.zeros(X.shape[0]) - - #---------------------------------------# - # PSI statistics # - #---------------------------------------# - #---------------------------------------# # Precomputations # #---------------------------------------# - def _K_computations(self, X, X2): - #params = self._get_params() - if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):# and fast_array_equal(self._params_save , params)): - #self._X = X.copy() - #self._params_save = params.copy() - if X2 is None: - self._X2 = None - X = X / self.lengthscale - Xsquare = np.sum(np.square(X), 1) - self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :]) - else: - self._X2 = X2.copy() - X = X / self.lengthscale - X2 = X2 / self.lengthscale - self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :]) - self._K_dvar = np.exp(-0.5 * self._K_dist2) - def _dL_dlengthscales_via_K(self, dL_dK, X, X2): """ A helper function for update_gradients_* methods @@ -301,19 +194,20 @@ class RBF(Kern): if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S): # something's changed. recompute EVERYTHING + l2 = self.lengthscale **2 # psi1 - self._psi1_denom = S[:, None, :] / self.lengthscale2 + 1. + self._psi1_denom = S[:, None, :] / l2 + 1. self._psi1_dist = Z[None, :, :] - mu[:, None, :] - self._psi1_dist_sq = np.square(self._psi1_dist) / self.lengthscale2 / self._psi1_denom + self._psi1_dist_sq = np.square(self._psi1_dist) / l2 / self._psi1_denom self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1) self._psi1 = self.variance * np.exp(self._psi1_exponent) # psi2 - self._psi2_denom = 2.*S[:, None, None, :] / self.lengthscale2 + 1. # N,M,M,Q + self._psi2_denom = 2.*S[:, None, None, :] / l2 + 1. # N,M,M,Q self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat) # self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q - # self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom) + # self._psi2_mudist_sq = np.square(self._psi2_mudist)/(l2*self._psi2_denom) # self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q @@ -332,11 +226,11 @@ class RBF(Kern): psi2_Zdist_sq = self._psi2_Zdist_sq _psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim) half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim) - variance_sq = float(np.square(self.variance)) + variance_sq = np.float64(np.square(self.variance)) if self.ARD: - lengthscale2 = self.lengthscale2 + lengthscale2 = self.lengthscale **2 else: - lengthscale2 = np.ones(input_dim) * self.lengthscale2 + lengthscale2 = np.ones(input_dim) * self.lengthscale2**2 code = """ double tmp; From 2f7ebb96ba185c76a3d6873846df565b888dbe48 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Feb 2014 14:56:28 +0000 Subject: [PATCH 6/9] added initialization --- GPy/util/initialization.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 GPy/util/initialization.py diff --git a/GPy/util/initialization.py b/GPy/util/initialization.py new file mode 100644 index 00000000..24194b41 --- /dev/null +++ b/GPy/util/initialization.py @@ -0,0 +1,17 @@ +''' +Created on 24 Feb 2014 + +@author: maxz +''' + +import numpy as np +from linalg import PCA + +def initialize_latent(init, input_dim, Y): + Xr = np.random.randn(Y.shape[0], input_dim) + if init == 'PCA': + PC = PCA(Y, input_dim)[0] + Xr[:PC.shape[0], :PC.shape[1]] = PC + else: + pass + return Xr \ No newline at end of file From 2f3e0611f84b26c2cb38a427941d3d6da2d75a9f Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 24 Feb 2014 14:57:05 +0000 Subject: [PATCH 7/9] fixed stationary again --- GPy/kern/_src/stationary.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index 19f531f2..3b8e391b 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -215,7 +215,7 @@ class Cosine(Stationary): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Cosine'): super(Cosine, self).__init__(input_dim, variance, lengthscale, ARD, name) - def K_of_r(self, r) + def K_of_r(self, r): return self.variance * np.cos(r) def dK_dr(self, r): @@ -238,7 +238,7 @@ class RatQuad(Stationary): self.power = Param('power', power, Logexp()) self.add_parameters(self.power) - def K_of_r(self, r) + def K_of_r(self, r): return self.variance*(1. + r**2/2.)**(-self.power) def dK_dr(self, r): From d7f217d8afae7b8b2b12d92e998ef1a7bbfbdae8 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 24 Feb 2014 14:57:23 +0000 Subject: [PATCH 8/9] bugfixin in bernioulli --- GPy/likelihoods/bernoulli.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index 388ce173..9460007b 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -2,9 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from scipy import stats, special -import scipy as sp -from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf +from ...util.univariate_Gaussian import std_norm_pdf, std_norm_cdf import link_functions from likelihood import Likelihood From 66577a8fb07beb9caa3b9a9612706e9a91cbde76 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Mon, 24 Feb 2014 14:58:28 +0000 Subject: [PATCH 9/9] more bugfixin --- GPy/likelihoods/bernoulli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index 9460007b..10df906d 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -2,7 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from ...util.univariate_Gaussian import std_norm_pdf, std_norm_cdf +from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf import link_functions from likelihood import Likelihood