From f820e3b0bff71e41f0b3fdf3f887de538a7330d0 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Thu, 15 May 2014 18:47:20 +0100 Subject: [PATCH 01/15] [examples] stick demo --- GPy/examples/dimensionality_reduction.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index a15c2a93..a783cfaf 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -496,7 +496,8 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect']) GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) plt.draw() - #raw_input('Press enter to finish') + plt.show() + raw_input('Press enter to finish') return m From efc1f4413c11ce9f2e2880721f5e73a7e53f8641 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Fri, 16 May 2014 11:20:50 +0100 Subject: [PATCH 02/15] [ploting] dim reduction --- GPy/plotting/matplot_dep/dim_reduction_plots.py | 6 +++--- GPy/plotting/matplot_dep/models_plots.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py index f8413671..0ba082df 100644 --- a/GPy/plotting/matplot_dep/dim_reduction_plots.py +++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py @@ -31,7 +31,7 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40, fignum=None, plot_inducing=False, legend=True, plot_limits=None, - aspect='auto', updates=False, **kwargs): + aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}): """ :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc) :param resolution: the resolution of the grid on which to evaluate the predictive variance @@ -60,7 +60,7 @@ def plot_latent(model, labels=None, which_indices=None, def plot_function(x): Xtest_full = np.zeros((x.shape[0], model.X.shape[1])) Xtest_full[:, [input_1, input_2]] = x - _, var = model.predict(Xtest_full) + _, var = model.predict(Xtest_full, **predict_kwargs) var = var[:, :1] return np.log(var) @@ -81,7 +81,7 @@ def plot_latent(model, labels=None, which_indices=None, view = ImshowController(ax, plot_function, (xmin, ymin, xmax, ymax), resolution, aspect=aspect, interpolation='bilinear', - cmap=pb.cm.binary, **kwargs) + cmap=pb.cm.binary, **imshow_kwargs) # make sure labels are in order of input: ulabels = [] diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 84747d05..8f3e55b0 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -97,7 +97,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', for d in which_data_ycols: plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol) - plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5) + if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5) #optionally plot some samples if samples: #NOTE not tested with fixed_inputs @@ -151,7 +151,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) - plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) #set the limits of the plot to some sensible values ax.set_xlim(xmin[0], xmax[0]) From 94c84a23a3d5c9e1ed93b40e5848a83cf791b83f Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Fri, 16 May 2014 11:21:08 +0100 Subject: [PATCH 03/15] [mrd] plotting, init, inference etc. --- GPy/core/gp.py | 82 ++++++++--- GPy/core/parameterization/parameterized.py | 19 ++- GPy/core/sparse_gp.py | 6 +- GPy/examples/dimensionality_reduction.py | 43 +++++- .../latent_function_inference/__init__.py | 19 +++ GPy/models/bayesian_gplvm.py | 4 +- GPy/models/mrd.py | 132 +++++++++++++----- GPy/testing/pickle_tests.py | 3 +- 8 files changed, 236 insertions(+), 72 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 29d9032a..ca2583e0 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -180,40 +180,80 @@ class GP(Model): return Ysim - def plot_f(self, *args, **kwargs): + def plot_f(self, plot_limits=None, which_data_rows='all', + which_data_ycols='all', fixed_inputs=[], + levels=20, samples=0, fignum=None, ax=None, resolution=None, + plot_raw=True, + linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'): """ - - Plot the GP's view of the world, where the data is normalized and - before applying a likelihood. - - This is a convenience function: arguments are passed to - GPy.plotting.matplot_dep.models_plots.plot_f_fit - + Plot the GP's view of the world, where the data is normalized and before applying a likelihood. + This is a call to plot with plot_raw=True. + Data will not be plotted in this, as the GP's view of the world + may live in another space, or units then the data. """ assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import models_plots - return models_plots.plot_fit_f(self,*args,**kwargs) + kw = {} + if linecol is not None: + kw['linecol'] = linecol + if fillcol is not None: + kw['fillcol'] = fillcol + return models_plots.plot_fit(self, plot_limits, which_data_rows, + which_data_ycols, fixed_inputs, + levels, samples, fignum, ax, resolution, + plot_raw=plot_raw, Y_metadata=Y_metadata, + data_symbol=data_symbol, **kw) - def plot(self, *args, **kwargs): + def plot(self, plot_limits=None, which_data_rows='all', + which_data_ycols='all', fixed_inputs=[], + levels=20, samples=0, fignum=None, ax=None, resolution=None, + plot_raw=False, + linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'): """ Plot the posterior of the GP. - - In one dimension, the function is plotted with a shaded region - identifying two standard deviations. - - In two dimsensions, a contour-plot shows the mean predicted - function - - In higher dimensions, use fixed_inputs to plot the GP with some of - the inputs fixed. + - In one dimension, the function is plotted with a shaded region identifying two standard deviations. + - In two dimsensions, a contour-plot shows the mean predicted function + - In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed. Can plot only part of the data and part of the posterior functions - using which_data_rows which_data_ycols and which_parts - - This is a convenience function: arguments are passed to - GPy.plotting.matplot_dep.models_plots.plot_fit + using which_data_rowsm which_data_ycols. + :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits + :type plot_limits: np.array + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_rows: 'all' or a list of integers + :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. + :type fixed_inputs: a list of tuples + :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D + :type resolution: int + :param levels: number of levels to plot in a contour plot. + :type levels: int + :param samples: the number of a posteriori samples to plot + :type samples: int + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :type output: integer (first output is 0) + :param linecol: color of line to plot [Tango.colorsHex['darkBlue']] + :type linecol: + :param fillcol: color of fill [Tango.colorsHex['lightBlue']] + :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure """ assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import models_plots - return models_plots.plot_fit(self,*args,**kwargs) + kw = {} + if linecol is not None: + kw['linecol'] = linecol + if fillcol is not None: + kw['fillcol'] = fillcol + return models_plots.plot_fit(self, plot_limits, which_data_rows, + which_data_ycols, fixed_inputs, + levels, samples, fignum, ax, resolution, + plot_raw=plot_raw, Y_metadata=Y_metadata, + data_symbol=data_symbol, **kw) def input_sensitivity(self): """ diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index cfb2171d..48eb2ddc 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -272,8 +272,11 @@ class Parameterized(Parameterizable): def __setattr__(self, name, val): # override the default behaviour, if setting a param, so broadcasting can by used if hasattr(self, "parameters"): - pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False) - if name in pnames: self.parameters[pnames.index(name)][:] = val; return + try: + pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False) + if name in pnames: self.parameters[pnames.index(name)][:] = val; return + except AttributeError: + pass object.__setattr__(self, name, val); #=========================================================================== @@ -281,11 +284,15 @@ class Parameterized(Parameterizable): #=========================================================================== def __setstate__(self, state): super(Parameterized, self).__setstate__(state) - self._connect_parameters() - self._connect_fixes() - self._notify_parent_change() + try: + self._connect_parameters() + self._connect_fixes() + self._notify_parent_change() + + self.parameters_changed() + except Exception as e: + print "WARNING: caught exception {!s}, trying to continue".format(e) - self.parameters_changed() def copy(self): c = super(Parameterized, self).copy() c._connect_parameters() diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 388b682a..ac752c36 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -66,7 +66,11 @@ class SparseGP(GP): #gradients wrt Z self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) self.Z.gradient += self.kern.gradients_Z_expectations( - self.grad_dict['dL_dpsi0'], self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) + self.grad_dict['dL_dpsi0'], + self.grad_dict['dL_dpsi1'], + self.grad_dict['dL_dpsi2'], + Z=self.Z, + variational_posterior=self.X) else: #gradients wrt kernel self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 8acf5489..2a319e8a 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -303,9 +303,11 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) - m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k) - m.inference_method = VarDTCMissingData() - m.Y[inan] = _np.nan + Y[inan] = _np.nan + + m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, + inference_method=VarDTCMissingData(inan=inan), kernel=k) + m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) m.likelihood.variance = .01 m.parameters_changed() @@ -338,7 +340,40 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): print "Optimizing Model:" m.optimize(messages=verbose, max_iters=8e3, gtol=.1) if plot: - m.plot_X_1d("MRD Latent Space 1D") + m.X.plot("MRD Latent Space 1D") + m.plot_scales("MRD Scales") + return m + +def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): + from GPy import kern + from GPy.models import MRD + from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData + + D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 + _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) + + #Ylist = [Ylist[0]] + k = kern.Linear(Q, ARD=True) + inanlist = [] + + for Y in Ylist: + inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) + inanlist.append(inan) + Y[inan] = _np.nan + + imlist = [] + for inan in inanlist: + imlist.append(VarDTCMissingData(limit=1, inan=inan)) + + m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, + kernel=k, inference_method=imlist, + initx="random", initz='permute', **kw) + + if optimize: + print "Optimizing Model:" + m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1) + if plot: + m.X.plot("MRD Latent Space 1D") m.plot_scales("MRD Scales") return m diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py index 878c1e4f..8589a60e 100644 --- a/GPy/inference/latent_function_inference/__init__.py +++ b/GPy/inference/latent_function_inference/__init__.py @@ -38,6 +38,25 @@ class LatentFunctionInference(object): """ pass +class InferenceMethodList(LatentFunctionInference, list): + + def on_optimization_start(self): + for inf in self: + inf.on_optimization_start() + + def on_optimization_end(self): + for inf in self: + inf.on_optimization_end() + + def __getstate__(self): + state = [] + for inf in self: + state.append(inf) + return state + + def __setstate__(self, state): + for inf in state: + self.append(inf) from exact_gaussian_inference import ExactGaussianInference from laplace import Laplace diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 2bcbe0b2..26f4cd53 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -83,7 +83,7 @@ class BayesianGPLVM(SparseGP): resolution=50, ax=None, marker='o', s=40, fignum=None, plot_inducing=True, legend=True, plot_limits=None, - aspect='auto', updates=False, **kwargs): + aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}): import sys assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import dim_reduction_plots @@ -91,7 +91,7 @@ class BayesianGPLVM(SparseGP): return dim_reduction_plots.plot_latent(self, labels, which_indices, resolution, ax, marker, s, fignum, plot_inducing, legend, - plot_limits, aspect, updates, **kwargs) + plot_limits, aspect, updates, predict_kwargs, imshow_kwargs) def do_test_latents(self, Y): """ diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 458a70a1..73b267ba 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -11,9 +11,11 @@ from ..core.parameterization.variational import NormalPosterior, NormalPrior from ..core.parameterization import Param, Parameterized from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC from ..likelihoods import Gaussian -from GPy.util.initialization import initialize_latent +from ..util.initialization import initialize_latent +from ..core.sparse_gp import SparseGP, GP +from ..inference.latent_function_inference import InferenceMethodList -class MRD(Model): +class MRD(SparseGP): """ Apply MRD to all given datasets Y in Ylist. @@ -39,17 +41,18 @@ class MRD(Model): :param num_inducing: number of inducing inputs to use :param Z: initial inducing inputs :param kernel: list of kernels or kernel to copy for each output - :type kernel: [GPy.kern.kern] | GPy.kern.kern | None (default) - :param :class:`~GPy.inference.latent_function_inference inference_method: the inference method to use - :param :class:`~GPy.likelihoods.likelihood.Likelihood` likelihood: the likelihood to use + :type kernel: [GPy.kernels.kernels] | GPy.kernels.kernels | None (default) + :param :class:`~GPy.inference.latent_function_inference inference_method: + InferenceMethodList of inferences, or one inference method for all + :param :class:`~GPy.likelihoodss.likelihoods.likelihoods` likelihoods: the likelihoods to use :param str name: the name of this model :param [str] Ynames: the names for the datasets given, must be of equal length as Ylist or None """ def __init__(self, Ylist, input_dim, X=None, X_variance=None, initx = 'PCA', initz = 'permute', num_inducing=10, Z=None, kernel=None, - inference_method=None, likelihood=None, name='mrd', Ynames=None): - super(MRD, self).__init__(name) + inference_method=None, likelihoods=None, name='mrd', Ynames=None): + super(GP, self).__init__(name) self.input_dim = input_dim self.num_inducing = num_inducing @@ -63,16 +66,16 @@ class MRD(Model): # sort out the kernels if kernel is None: from ..kern import RBF - self.kern = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))] + self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))] elif isinstance(kernel, Kern): - self.kern = [] + self.kernels = [] for i in range(len(Ylist)): k = kernel.copy() - self.kern.append(k) + self.kernels.append(k) else: assert len(kernel) == len(Ylist), "need one kernel per output" assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!" - self.kern = kernel + self.kernels = kernel if X_variance is None: X_variance = np.random.uniform(0.1, 0.2, X.shape) @@ -80,32 +83,44 @@ class MRD(Model): self.variational_prior = NormalPrior() self.X = NormalPosterior(X, X_variance) - if likelihood is None: - self.likelihood = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))] - else: self.likelihood = likelihood + if likelihoods is None: + self.likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))] + else: self.likelihoods = likelihoods if inference_method is None: - self.inference_method= [] + self.inference_method= InferenceMethodList() for y in Ylist: - if np.any(np.isnan(y)): - self.inference_method.append(VarDTCMissingData(limit=1)) + inan = np.isnan(y) + if np.any(inan): + self.inference_method.append(VarDTCMissingData(limit=1, inan=inan)) else: self.inference_method.append(VarDTC(limit=1)) else: + if not isinstance(inference_method, InferenceMethodList): + inference_method = InferenceMethodList(inference_method) self.inference_method = inference_method - self.inference_method.set_limit(len(Ylist)) self.add_parameters(self.X, self.Z) if Ynames is None: Ynames = ['Y{}'.format(i) for i in range(len(Ylist))] + self.names = Ynames + + self.bgplvms = [] + self.num_data = Ylist[0].shape[0] + + for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, self.kernels, self.likelihoods, self.Ylist): + assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another" - for i, n, k, l in itertools.izip(itertools.count(), Ynames, self.kern, self.likelihood): p = Parameterized(name=n) p.add_parameter(k) + p.kern = k p.add_parameter(l) - setattr(self, 'Y{}'.format(i), p) + p.likelihood = l self.add_parameter(p) + self.bgplvms.append(p) + + self.posterior = None self._in_init_ = False def parameters_changed(self): @@ -114,13 +129,13 @@ class MRD(Model): self.Z.gradient[:] = 0. self.X.gradient[:] = 0. - for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method): + for y, k, l, i in itertools.izip(self.Ylist, self.kernels, self.likelihoods, self.inference_method): posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y) self.posteriors.append(posterior) self._log_marginal_likelihood += lml - # likelihood gradients + # likelihoods gradients l.update_gradients(grad_dict.pop('dL_dthetaL')) #gradients wrt kernel @@ -133,13 +148,20 @@ class MRD(Model): #gradients wrt Z self.Z.gradient += k.gradients_X(dL_dKmm, self.Z) self.Z.gradient += k.gradients_Z_expectations( - grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) + grad_dict['dL_dpsi0'], + grad_dict['dL_dpsi1'], + grad_dict['dL_dpsi2'], + Z=self.Z, variational_posterior=self.X) dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict) self.X.mean.gradient += dL_dmean self.X.variance.gradient += dL_dS # update for the KL divergence + self.posterior = self.posteriors[0] + self.kern = self.kernels[0] + self.likelihood = self.likelihoods[0] + self.variational_prior.update_gradients_KL(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) @@ -207,16 +229,25 @@ class MRD(Model): else: return pylab.gcf() - def plot_X(self, fignum=None, ax=None): - fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X)) - return fig + def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, Yindex=0): + """ + Prediction for data set Yindex[default=0]. + This predicts the output mean and variance for the dataset given in Ylist[Yindex] + """ + self.posterior = self.posteriors[Yindex] + self.kern = self.kernels[Yindex] + self.likelihood = self.likelihoods[Yindex] + return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern) - def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs): - fig = self._handle_plotting(fignum, - ax, - lambda i, g, ax: ax.imshow(g. predict(g.X)[0], **kwargs), - sharex=sharex, sharey=sharey) - return fig + #=============================================================================== + # TODO: Predict! Maybe even change to several bgplvms, which share an X? + #=============================================================================== + # def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs): + # fig = self._handle_plotting(fignum, + # ax, + # lambda i, g, ax: ax.imshow(g.predict(g.X)[0], **kwargs), + # sharex=sharex, sharey=sharey) + # return fig def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs): """ @@ -228,16 +259,28 @@ class MRD(Model): """ if titles is None: titles = [r'${}$'.format(name) for name in self.names] - ymax = reduce(max, [np.ceil(max(g.input_sensitivity())) for g in self.bgplvms]) + ymax = reduce(max, [np.ceil(max(g.kernels.input_sensitivity())) for g in self.bgplvms]) def plotf(i, g, ax): ax.set_ylim([0,ymax]) - g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs) + g.kernels.plot_ARD(ax=ax, title=titles[i], *args, **kwargs) fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey) return fig - def plot_latent(self, fignum=None, ax=None, *args, **kwargs): - fig = self.gref.plot_latent(fignum=fignum, ax=ax, *args, **kwargs) # self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs)) - return fig + def plot_latent(self, labels=None, which_indices=None, + resolution=50, ax=None, marker='o', s=40, + fignum=None, plot_inducing=True, legend=True, + plot_limits=None, + aspect='auto', updates=False, predict_kwargs=dict(Yindex=0), imshow_kwargs={}): + """ + """ + import sys + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import dim_reduction_plots + + return dim_reduction_plots.plot_latent(self, labels, which_indices, + resolution, ax, marker, s, + fignum, plot_inducing, legend, + plot_limits, aspect, updates, predict_kwargs, imshow_kwargs) def _debug_plot(self): self.plot_X_1d() @@ -252,4 +295,19 @@ class MRD(Model): pylab.draw() fig.tight_layout() + def __getstate__(self): + # TODO: + import copy + state = copy.copy(self.__dict__) + del state['kernels'] + del state['kern'] + del state['likelihood'] + return state + def __setstate__(self, state): + # TODO: + super(MRD, self).__setstate__(state) + self.kernels = [p.kern for p in self.bgplvms] + self.kern = self.kernels[0] + self.likelihood = self.likelihoods[0] + self.parameters_changed() \ No newline at end of file diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py index 94504645..13a67744 100644 --- a/GPy/testing/pickle_tests.py +++ b/GPy/testing/pickle_tests.py @@ -4,7 +4,8 @@ Created on 13 Mar 2014 @author: maxz ''' import unittest, itertools -import cPickle as pickle +#import cPickle as pickle +import pickle import numpy as np from GPy.core.parameterization.index_operations import ParameterIndexOperations,\ ParameterIndexOperationsView From 01d6b91f9079dfc4aab01c6531d5e9ff4a6b326e Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Fri, 16 May 2014 15:12:19 +0100 Subject: [PATCH 04/15] [mrd] missing data implemented, and plotting better --- GPy/core/parameterization/parameterized.py | 1 - .../latent_function_inference/var_dtc.py | 11 ++++ GPy/models/mrd.py | 55 ++++++++++--------- 3 files changed, 41 insertions(+), 26 deletions(-) diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 48eb2ddc..dd9a07c4 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -288,7 +288,6 @@ class Parameterized(Parameterizable): self._connect_parameters() self._connect_fixes() self._notify_parent_change() - self.parameters_changed() except Exception as e: print "WARNING: caught exception {!s}, trying to continue".format(e) diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 3043a7e8..b5e6787b 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -202,6 +202,17 @@ class VarDTCMissingData(LatentFunctionInference): def set_limit(self, limit): self._Y.limit = limit + def __getstate__(self): + # has to be overridden, as Cacher objects cannot be pickled. + return self._Y.limit, self._inan + + def __setstate__(self, state): + # has to be overridden, as Cacher objects cannot be pickled. + from ...util.caching import Cacher + self.limit = state[0] + self._inan = state[1] + self._Y = Cacher(self._subarray_computations, self.limit) + def _subarray_computations(self, Y): if self._inan is None: inan = np.isnan(Y) diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 73b267ba..c8067c01 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -203,6 +203,7 @@ class MRD(SparseGP): fig = pylab.figure(num=fignum) sharex_ax = None sharey_ax = None + plots = [] for i, g in enumerate(self.bgplvms): try: if sharex: @@ -219,15 +220,16 @@ class MRD(SparseGP): ax = axes[i] else: raise ValueError("Need one axes per latent dimension input_dim") - plotf(i, g, ax) + plots.append(plotf(i, g, ax)) if sharey_ax is not None: pylab.setp(ax.get_yticklabels(), visible=False) pylab.draw() if axes is None: - fig.tight_layout() - return fig - else: - return pylab.gcf() + try: + fig.tight_layout() + except: + pass + return plots def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, Yindex=0): """ @@ -259,10 +261,10 @@ class MRD(SparseGP): """ if titles is None: titles = [r'${}$'.format(name) for name in self.names] - ymax = reduce(max, [np.ceil(max(g.kernels.input_sensitivity())) for g in self.bgplvms]) + ymax = reduce(max, [np.ceil(max(g.kern.input_sensitivity())) for g in self.bgplvms]) def plotf(i, g, ax): ax.set_ylim([0,ymax]) - g.kernels.plot_ARD(ax=ax, title=titles[i], *args, **kwargs) + return g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs) fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey) return fig @@ -270,30 +272,33 @@ class MRD(SparseGP): resolution=50, ax=None, marker='o', s=40, fignum=None, plot_inducing=True, legend=True, plot_limits=None, - aspect='auto', updates=False, predict_kwargs=dict(Yindex=0), imshow_kwargs={}): + aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}): """ + see plotting.matplot_dep.dim_reduction_plots.plot_latent + if predict_kwargs is None, will plot latent spaces for 0th dataset (and kernel), otherwise give + predict_kwargs=dict(Yindex='index') for plotting only the latent space of dataset with 'index'. """ import sys assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import dim_reduction_plots + if "Yindex" not in predict_kwargs: + predict_kwargs['Yindex'] = 0 + if ax is None: + fig = pylab.figure(num=fignum) + ax = fig.add_subplot(111) + else: + fig = ax.figure + plot = dim_reduction_plots.plot_latent(self, labels, which_indices, + resolution, ax, marker, s, + fignum, plot_inducing, legend, + plot_limits, aspect, updates, predict_kwargs, imshow_kwargs) + ax.set_title(self.bgplvms[predict_kwargs['Yindex']].name) + try: + fig.tight_layout() + except: + pass - return dim_reduction_plots.plot_latent(self, labels, which_indices, - resolution, ax, marker, s, - fignum, plot_inducing, legend, - plot_limits, aspect, updates, predict_kwargs, imshow_kwargs) - - def _debug_plot(self): - self.plot_X_1d() - fig = pylab.figure("MRD DEBUG PLOT", figsize=(4 * len(self.bgplvms), 9)) - fig.clf() - axes = [fig.add_subplot(3, len(self.bgplvms), i + 1) for i in range(len(self.bgplvms))] - self.plot_X(ax=axes) - axes = [fig.add_subplot(3, len(self.bgplvms), i + len(self.bgplvms) + 1) for i in range(len(self.bgplvms))] - self.plot_latent(ax=axes) - axes = [fig.add_subplot(3, len(self.bgplvms), i + 2 * len(self.bgplvms) + 1) for i in range(len(self.bgplvms))] - self.plot_scales(ax=axes) - pylab.draw() - fig.tight_layout() + return plot def __getstate__(self): # TODO: From dafad623637e3d0d8ef797bd5ab17f861b52fcd3 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 20 May 2014 14:43:18 +0100 Subject: [PATCH 05/15] [lmv_dimselect] we need to keep a pointer to the lvm_dimselect object, as the updates are weak references: dim_select = ... --- GPy/examples/dimensionality_reduction.py | 14 +++++++------- GPy/plotting/matplot_dep/visualize.py | 5 +---- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 2a319e8a..a216eec6 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -518,21 +518,21 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): Q = 6 kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) - + m.data = data m.likelihood.variance = 0.001 - + # optimize - if optimize: m.optimize('bfgs', messages=verbose, max_iters=800, xtol=1e-300, ftol=1e-300) + if optimize: m.optimize('bfgs', messages=verbose, max_iters=5e3, bfgs_factor=10) if plot: - plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2) + fig, (latent_axes, sense_axes) = plt.subplots(1, 2) plt.sca(latent_axes) m.plot_latent(ax=latent_axes) y = m.Y[:1, :].copy() data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect']) - GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) - plt.draw() - plt.show() + dim_select = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) + fig.canvas.draw() + fig.canvas.show() raw_input('Press enter to finish') return m diff --git a/GPy/plotting/matplot_dep/visualize.py b/GPy/plotting/matplot_dep/visualize.py index d7884730..3ba120aa 100644 --- a/GPy/plotting/matplot_dep/visualize.py +++ b/GPy/plotting/matplot_dep/visualize.py @@ -88,7 +88,6 @@ class vector_show(matplotlib_show): class lvm(matplotlib_show): - def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]): """Visualize a latent variable model @@ -147,7 +146,6 @@ class lvm(matplotlib_show): pass def on_click(self, event): - print 'click!' if event.inaxes!=self.latent_axes: return self.move_on = not self.move_on self.called = True @@ -220,11 +218,10 @@ class lvm_dimselect(lvm): self.labels = labels lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index) self.show_sensitivities() - print "use left and right mouse butons to select dimensions" + print "use left and right mouse buttons to select dimensions" def on_click(self, event): - if event.inaxes==self.sense_axes: new_index = max(0,min(int(np.round(event.xdata-0.5)),self.model.input_dim-1)) if event.button == 1: From dfa23b77c51d6962ec2456f5b0b51da318f834d7 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 20 May 2014 14:43:58 +0100 Subject: [PATCH 06/15] [posteriot] adjusted for more then one covariance per output --- GPy/inference/latent_function_inference/posterior.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py index 309cb7e5..70b0e0ea 100644 --- a/GPy/inference/latent_function_inference/posterior.py +++ b/GPy/inference/latent_function_inference/posterior.py @@ -95,7 +95,7 @@ class Posterior(object): """ if self._covariance is None: #LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1) - self._covariance = self._K - (np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze() + self._covariance = (np.atleast_3d(self._K) - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze() #self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K) return self._covariance From 8c8d06c8aec83c44b86e08c95b641c8d80213660 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 20 May 2014 14:44:59 +0100 Subject: [PATCH 07/15] [pca] colors now as iterator --- GPy/util/pca.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/GPy/util/pca.py b/GPy/util/pca.py index 6c548b3d..967d0e1b 100644 --- a/GPy/util/pca.py +++ b/GPy/util/pca.py @@ -106,12 +106,14 @@ class pca(object): ulabels.append(lab) nlabels = len(ulabels) if colors is None: - colors = [cmap(float(i) / nlabels) for i in range(nlabels)] + colors = iter([cmap(float(i) / nlabels) for i in range(nlabels)]) + else: + colors = iter(colors) X_ = self.project(X, self.Q)[:,dimensions] kwargs.update(dict(s=s)) plots = list() for i, l in enumerate(ulabels): - kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)])) + kwargs.update(dict(color=colors.next(), marker=marker[i % len(marker)])) plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs)) ax.set_xlabel(r"PC$_1$") ax.set_ylabel(r"PC$_2$") From 99699e9e020959f18d17a0b250049da52e25aa42 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 20 May 2014 14:45:34 +0100 Subject: [PATCH 08/15] [vardtc missing data] can handle non broadcastable selections --- GPy/inference/latent_function_inference/var_dtc.py | 6 +++++- GPy/util/subarray_and_sorting.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index b5e6787b..a9a137dc 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -283,7 +283,11 @@ class VarDTCMissingData(LatentFunctionInference): else: beta = beta_all VVT_factor = (beta*y) - VVT_factor_all[v, ind].flat = VVT_factor.flat + try: + VVT_factor_all[v, ind].flat = VVT_factor.flat + except ValueError: + mult = np.ravel_multi_index((v.nonzero()[0][:,None],ind[None,:]), VVT_factor_all.shape) + VVT_factor_all.flat[mult] = VVT_factor output_dim = y.shape[1] psi0 = psi0_all[v] diff --git a/GPy/util/subarray_and_sorting.py b/GPy/util/subarray_and_sorting.py index ab7b7672..f1707394 100644 --- a/GPy/util/subarray_and_sorting.py +++ b/GPy/util/subarray_and_sorting.py @@ -4,7 +4,7 @@ .. moduleauthor:: Max Zwiessele ''' -__updated__ = '2013-12-02' +__updated__ = '2014-05-20' import numpy as np From 533e5fb744fdbbc52f00f2768a485fc5e8b85ba8 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 20 May 2014 14:46:29 +0100 Subject: [PATCH 09/15] [datasets] delete packed data in hapmap dataset --- GPy/util/datasets.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 4d89ece2..d140fe3a 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -687,14 +687,20 @@ def hapmap3(data_set='hapmap3'): import bz2 except ImportError as i: raise i, "Need pandas for hapmap dataset, make sure to install pandas (http://pandas.pydata.org/) before loading the hapmap dataset" - if not data_available(data_set): - download_data(data_set) + dirpath = os.path.join(data_path,'hapmap3') hapmap_file_name = 'hapmap3_r2_b36_fwd.consensus.qc.poly' + unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']] + unpacked_files_exist = reduce(lambda a, b:a and b, map(os.path.exists, unpacked_files)) + + if not unpacked_files_exist and not data_available(data_set): + download_data(data_set) + preprocessed_data_paths = [os.path.join(dirpath,hapmap_file_name + file_name) for file_name in \ ['.snps.pickle', '.info.pickle', '.nan.pickle']] + if not reduce(lambda a,b: a and b, map(os.path.exists, preprocessed_data_paths)): if not overide_manual_authorize and not prompt_user("Preprocessing requires ~25GB " "of memory and can take a (very) long time, continue? [Y/n]"): @@ -708,8 +714,7 @@ def hapmap3(data_set='hapmap3'): perc="="*int(20.*progress/100.)) stdout.write(status); stdout.flush() return status - unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']] - if not reduce(lambda a,b: a and b, map(os.path.exists, unpacked_files)): + if not unpacked_files_exist: status=write_status('unpacking...', 0, '') curr = 0 for newfilepath in unpacked_files: @@ -726,6 +731,7 @@ def hapmap3(data_set='hapmap3'): status=write_status('unpacking...', curr+12.*file_processed/(file_size), status) curr += 12 status=write_status('unpacking...', curr, status) + os.remove(filepath) status=write_status('reading .ped...', 25, status) # Preprocess data: snpstrnp = np.loadtxt(unpacked_files[0], dtype=str) @@ -796,7 +802,7 @@ def hapmap3(data_set='hapmap3'): def singlecell(data_set='singlecell'): if not data_available(data_set): download_data(data_set) - + from pandas import read_csv dirpath = os.path.join(data_path, data_set) filename = os.path.join(dirpath, 'singlecell.csv') From 76718a8fcc14dd435c8f8d2a9b153fb55fccc5fd Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 20 May 2014 14:47:28 +0100 Subject: [PATCH 10/15] [stationary] input_sensitiviy is now 1/(l**2) --- GPy/kern/_src/stationary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index f561baa4..ef8900f9 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -180,7 +180,7 @@ class Stationary(Kern): return np.zeros(X.shape) def input_sensitivity(self): - return np.ones(self.input_dim)/self.lengthscale + return np.ones(self.input_dim)/self.lengthscale**2 class Exponential(Stationary): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'): From 1ebcee967a8df5073d7e868a96de073aac465fe1 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 20 May 2014 14:47:57 +0100 Subject: [PATCH 11/15] [psi-stats] add kernel was missing a psi zero call --- GPy/kern/_src/add.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 1c237a6c..12f5d444 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -134,7 +134,7 @@ class Add(CombinationKernel): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. else: eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. - target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) + target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) return target def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): From 995de897b3472fb6682a419f42bca377f88191f2 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 20 May 2014 14:48:53 +0100 Subject: [PATCH 12/15] [core updates] first try to switch updates off and on. Use m.updates = False to switch updates off, and vice-versa --- GPy/core/parameterization/parameter_core.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index f0fbf4ea..9329651d 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -17,7 +17,7 @@ from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, import numpy as np import re -__updated__ = '2014-05-15' +__updated__ = '2014-05-20' class HierarchyError(Exception): """ @@ -50,11 +50,24 @@ class Observable(object): self as only argument to all its observers. """ _updated = True + _updates = True def __init__(self, *args, **kwargs): super(Observable, self).__init__() from lists_and_dicts import ObserverList self.observers = ObserverList() + @property + def updates(self): + self._updates = self._highest_parent_._updates + return self._updates + + @updates.setter + def updates(self, ups): + assert isinstance(ups, bool), "updates are either on (True) or off (False)" + self._highest_parent_._updates = ups + if ups: + self._trigger_params_changed() + def add_observer(self, observer, callble, priority=0): """ Add an observer `observer` with the callback `callble` @@ -91,6 +104,8 @@ class Observable(object): :param min_priority: only notify observers with priority > min_priority if min_priority is None, notify all observers in order """ + if not self.updates: + return if which is None: which = self if min_priority is None: From ef256223d142b7752bad6a9e9dd4d52fbe3be1f3 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 20 May 2014 14:49:20 +0100 Subject: [PATCH 13/15] [mrd] more control for init, some missing data adjustements, init greatly improved --- GPy/models/mrd.py | 66 ++++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index c8067c01..0e6547bb 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -9,18 +9,25 @@ from ..core import Model from ..kern import Kern from ..core.parameterization.variational import NormalPosterior, NormalPrior from ..core.parameterization import Param, Parameterized +from ..core.parameterization.observable_array import ObsAr from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC +from ..inference.latent_function_inference import InferenceMethodList from ..likelihoods import Gaussian from ..util.initialization import initialize_latent from ..core.sparse_gp import SparseGP, GP -from ..inference.latent_function_inference import InferenceMethodList class MRD(SparseGP): """ + !WARNING: This is bleeding edge code and still in development. + Functionality may change fundamentally during development! + Apply MRD to all given datasets Y in Ylist. Y_i in [n x p_i] + If Ylist is a dictionary, the keys of the dictionary are the names, and the + values are the different datasets to compare. + The samples n in the datasets need to match up, whereas the dimensionality p_d can differ. @@ -57,16 +64,46 @@ class MRD(SparseGP): self.input_dim = input_dim self.num_inducing = num_inducing - self.Ylist = Ylist + if isinstance(Ylist, dict): + Ynames, Ylist = zip(*Ylist.items()) + + self.Ylist = [ObsAr(Y) for Y in Ylist] + + if Ynames is None: + Ynames = ['Y{}'.format(i) for i in range(len(Ylist))] + self.names = Ynames + assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict" + + if inference_method is None: + self.inference_method= InferenceMethodList() + warned = False + for y in Ylist: + inan = np.isnan(y) + if np.any(inan): + if not warned: + print "WARING: NaN values detected, make sure initx method can cope with NaN values or provide starting latent space X" + warned = True + self.inference_method.append(VarDTCMissingData(limit=1, inan=inan)) + else: + self.inference_method.append(VarDTC(limit=1)) + else: + if not isinstance(inference_method, InferenceMethodList): + inference_method = InferenceMethodList(inference_method) + self.inference_method = inference_method + + self._in_init_ = True - X, fracs = self._init_X(initx, Ylist) + if X is None: + X, fracs = self._init_X(initx, Ylist) + else: + fracs = [X.var(0)]*len(Ylist) self.Z = Param('inducing inputs', self._init_Z(initz, X)) self.num_inducing = self.Z.shape[0] # ensure M==N if M>N # sort out the kernels if kernel is None: from ..kern import RBF - self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))] + self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))] elif isinstance(kernel, Kern): self.kernels = [] for i in range(len(Ylist)): @@ -87,25 +124,8 @@ class MRD(SparseGP): self.likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))] else: self.likelihoods = likelihoods - if inference_method is None: - self.inference_method= InferenceMethodList() - for y in Ylist: - inan = np.isnan(y) - if np.any(inan): - self.inference_method.append(VarDTCMissingData(limit=1, inan=inan)) - else: - self.inference_method.append(VarDTC(limit=1)) - else: - if not isinstance(inference_method, InferenceMethodList): - inference_method = InferenceMethodList(inference_method) - self.inference_method = inference_method - self.add_parameters(self.X, self.Z) - if Ynames is None: - Ynames = ['Y{}'.format(i) for i in range(len(Ylist))] - self.names = Ynames - self.bgplvms = [] self.num_data = Ylist[0].shape[0] @@ -173,7 +193,7 @@ class MRD(SparseGP): Ylist = self.Ylist if init in "PCA_concat": X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist)) - fracs = [fracs]*self.input_dim + fracs = [fracs]*len(Ylist) elif init in "PCA_single": X = np.zeros((Ylist[0].shape[0], self.input_dim)) fracs = [] @@ -184,7 +204,7 @@ class MRD(SparseGP): else: # init == 'random': X = np.random.randn(Ylist[0].shape[0], self.input_dim) fracs = X.var(0) - fracs = [fracs]*self.input_dim + fracs = [fracs]*len(Ylist) X -= X.mean() X /= X.std() return X, fracs From c507cfe4ab85fed86d102f16cbe483f4e19dc88f Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 20 May 2014 16:06:56 +0100 Subject: [PATCH 14/15] [index operations] added lookup for properties for a given index as dict for given index --- GPy/core/parameterization/index_operations.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py index 1f3ac934..7fc76d07 100644 --- a/GPy/core/parameterization/index_operations.py +++ b/GPy/core/parameterization/index_operations.py @@ -7,6 +7,20 @@ import numpy from numpy.lib.function_base import vectorize from lists_and_dicts import IntArrayDict +def extract_properties_to_index(index, props): + prop_index = dict() + for i, cl in enumerate(props): + for c in cl: + ind = prop_index.get(c, list()) + ind.append(index[i]) + prop_index[c] = ind + + for c, i in prop_index.items(): + prop_index[c] = numpy.array(i, dtype=int) + + return prop_index + + class ParameterIndexOperations(object): ''' Index operations for storing param index _properties @@ -66,8 +80,34 @@ class ParameterIndexOperations(object): return self._properties.values() def properties_for(self, index): + """ + Returns a list of properties, such that each entry in the list corresponds + to the element of the index given. + + Example: + let properties: 'one':[1,2,3,4], 'two':[3,5,6] + + >>> properties_for([2,3,5]) + [['one'], ['one', 'two'], ['two']] + """ return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index) + def properties_to_index_dict(self, index): + """ + Return a dictionary, containing properties as keys and indices as index + Thus, the indices for each constraint, which is contained will be collected as + one dictionary + + Example: + let properties: 'one':[1,2,3,4], 'two':[3,5,6] + + >>> properties_to_index_dict([2,3,5]) + {'one':[2,3], 'two':[3,5]} + """ + props = self.properties_for(index) + prop_index = extract_properties_to_index(index, props) + return prop_index + def add(self, prop, indices): self._properties[prop] = combine_indices(self._properties[prop], indices) @@ -174,8 +214,32 @@ class ParameterIndexOperationsView(object): def properties_for(self, index): + """ + Returns a list of properties, such that each entry in the list corresponds + to the element of the index given. + + Example: + let properties: 'one':[1,2,3,4], 'two':[3,5,6] + + >>> properties_for([2,3,5]) + [['one'], ['one', 'two'], ['two']] + """ return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index) + def properties_to_index_dict(self, index): + """ + Return a dictionary, containing properties as keys and indices as index + Thus, the indices for each constraint, which is contained will be collected as + one dictionary + + Example: + let properties: 'one':[1,2,3,4], 'two':[3,5,6] + + >>> properties_to_index_dict([2,3,5]) + {'one':[2,3], 'two':[3,5]} + """ + return extract_properties_to_index(index, self.properties_for(index)) + def add(self, prop, indices): self._param_index_ops.add(prop, indices+self._offset) From b520eb212c2a1171b8bea319dc32b91b8d07c070 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 20 May 2014 16:39:52 +0100 Subject: [PATCH 15/15] [fixing] fixing now saves the old constraint --- GPy/core/parameterization/parameter_core.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 9329651d..9cedc46d 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -324,6 +324,7 @@ class Indexable(Nameable, Observable): self._default_constraint_ = default_constraint from index_operations import ParameterIndexOperations self.constraints = ParameterIndexOperations() + self._old_constraints = ParameterIndexOperations() self.priors = ParameterIndexOperations() if self._default_constraint_ is not None: self.constrain(self._default_constraint_) @@ -386,8 +387,10 @@ class Indexable(Nameable, Observable): """ if value is not None: self[:] = value - reconstrained = self.unconstrain() - index = self._add_to_index_operations(self.constraints, reconstrained, __fixed__, warning) + + index = self._raveled_index() + # reconstrained = self.unconstrain() + index = self._add_to_index_operations(self.constraints, index, __fixed__, warning) self._highest_parent_._set_fixed(self, index) self.notify_observers(self, None if trigger_parent else -np.inf) return index