# ## Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np import itertools, logging from ..kern import Kern from ..core.parameterization.variational import NormalPosterior, NormalPrior from ..core.parameterization import Param, Parameterized from ..core.parameterization.observable_array import ObsAr from ..inference.latent_function_inference.var_dtc import VarDTC from ..inference.latent_function_inference import InferenceMethodList from ..likelihoods import Gaussian from ..util.initialization import initialize_latent from ..core.sparse_gp import SparseGP, GP from GPy.core.parameterization.variational import VariationalPosterior from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch class MRD(BayesianGPLVMMiniBatch): """ !WARNING: This is bleeding edge code and still in development. Functionality may change fundamentally during development! Apply MRD to all given datasets Y in Ylist. Y_i in [n x p_i] If Ylist is a dictionary, the keys of the dictionary are the names, and the values are the different datasets to compare. The samples n in the datasets need to match up, whereas the dimensionality p_d can differ. :param [array-like] Ylist: List of datasets to apply MRD on :param input_dim: latent dimensionality :type input_dim: int :param array-like X: mean of starting latent space q in [n x q] :param array-like X_variance: variance of starting latent space q in [n x q] :param initx: initialisation method for the latent space : * 'concat' - PCA on concatenation of all datasets * 'single' - Concatenation of PCA on datasets, respectively * 'random' - Random draw from a Normal(0,1) :type initx: ['concat'|'single'|'random'] :param initz: initialisation method for inducing inputs :type initz: 'permute'|'random' :param num_inducing: number of inducing inputs to use :param Z: initial inducing inputs :param kernel: list of kernels or kernel to copy for each output :type kernel: [GPy.kernels.kernels] | GPy.kernels.kernels | None (default) :param :class:`~GPy.inference.latent_function_inference inference_method: InferenceMethodList of inferences, or one inference method for all :param :class:`~GPy.likelihoodss.likelihoods.likelihoods` likelihoods: the likelihoods to use :param str name: the name of this model :param [str] Ynames: the names for the datasets given, must be of equal length as Ylist or None :param bool|Norm normalizer: How to normalize the data? :param bool stochastic: Should this model be using stochastic gradient descent over the dimensions? :param bool|[bool] batchsize: either one batchsize for all, or one batchsize per dataset. """ def __init__(self, Ylist, input_dim, X=None, X_variance=None, initx = 'PCA', initz = 'permute', num_inducing=10, Z=None, kernel=None, inference_method=None, likelihoods=None, name='mrd', Ynames=None, normalizer=False, stochastic=False, batchsize=10): self.logger = logging.getLogger(self.__class__.__name__) self.input_dim = input_dim self.num_inducing = num_inducing if isinstance(Ylist, dict): Ynames, Ylist = zip(*Ylist.items()) self.logger.debug("creating observable arrays") self.Ylist = [ObsAr(Y) for Y in Ylist] if Ynames is None: self.logger.debug("creating Ynames") Ynames = ['Y{}'.format(i) for i in range(len(Ylist))] self.names = Ynames assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict" if inference_method is None: self.inference_method = InferenceMethodList([VarDTC() for _ in xrange(len(self.Ylist))]) else: assert isinstance(inference_method, InferenceMethodList), "please provide one inference method per Y in the list and provide it as InferenceMethodList, inference_method given: {}".format(inference_method) self.inference_method = inference_method if X is None: X, fracs = self._init_X(initx, Ylist) else: fracs = [X.var(0)]*len(Ylist) Z = self._init_Z(initz, X) self.Z = Param('inducing inputs', Z) self.num_inducing = self.Z.shape[0] # ensure M==N if M>N # sort out the kernels self.logger.info("building kernels") if kernel is None: from ..kern import RBF kernels = [RBF(input_dim, ARD=1, lengthscale=1./fracs[i]) for i in range(len(Ylist))] elif isinstance(kernel, Kern): kernels = [] for i in range(len(Ylist)): k = kernel.copy() kernels.append(k) else: assert len(kernel) == len(Ylist), "need one kernel per output" assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!" kernels = kernel self.variational_prior = NormalPrior() #self.X = NormalPosterior(X, X_variance) if likelihoods is None: likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))] else: likelihoods = likelihoods self.logger.info("adding X and Z") super(MRD, self).__init__(Y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing, Z=self.Z, kernel=None, inference_method=self.inference_method, likelihood=Gaussian(), name='manifold relevance determination', normalizer=None, missing_data=False, stochastic=False, batchsize=1) self._log_marginal_likelihood = 0 self.unlink_parameter(self.likelihood) self.unlink_parameter(self.kern) del self.kern del self.likelihood self.num_data = Ylist[0].shape[0] if isinstance(batchsize, int): batchsize = itertools.repeat(batchsize) self.bgplvms = [] for i, n, k, l, Y, im, bs in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize): assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another" md = np.isnan(Y).any() spgp = BayesianGPLVMMiniBatch(Y, input_dim, X, X_variance, Z=Z, kernel=k, likelihood=l, inference_method=im, name=n, normalizer=normalizer, missing_data=md, stochastic=stochastic, batchsize=bs) spgp.kl_factr = 1./len(Ynames) spgp.unlink_parameter(spgp.Z) spgp.unlink_parameter(spgp.X) del spgp.Z del spgp.X spgp.Z = self.Z spgp.X = self.X self.link_parameter(spgp, i+2) self.bgplvms.append(spgp) self.posterior = None self.logger.info("init done") def parameters_changed(self): self._log_marginal_likelihood = 0 self.Z.gradient[:] = 0. self.X.gradient[:] = 0. for b, i in itertools.izip(self.bgplvms, self.inference_method): self._log_marginal_likelihood += b._log_marginal_likelihood self.logger.info('working on im <{}>'.format(hex(id(i)))) self.Z.gradient[:] += b.full_values['Zgrad'] grad_dict = b.full_values if self.has_uncertain_inputs(): self.X.mean.gradient += grad_dict['meangrad'] self.X.variance.gradient += grad_dict['vargrad'] else: self.X.gradient += grad_dict['Xgrad'] if self.has_uncertain_inputs(): # update for the KL divergence self.variational_prior.update_gradients_KL(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) pass def log_likelihood(self): return self._log_marginal_likelihood def _init_X(self, init='PCA', Ylist=None): if Ylist is None: Ylist = self.Ylist if init in "PCA_concat": X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist)) fracs = [fracs]*len(Ylist) elif init in "PCA_single": X = np.zeros((Ylist[0].shape[0], self.input_dim)) fracs = [] for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist): x,frcs = initialize_latent('PCA', len(qs), Y) X[:, qs] = x fracs.append(frcs) else: # init == 'random': X = np.random.randn(Ylist[0].shape[0], self.input_dim) fracs = X.var(0) fracs = [fracs]*len(Ylist) X -= X.mean() X /= X.std() return X, fracs def _init_Z(self, init="permute", X=None): if X is None: X = self.X if init in "permute": Z = np.random.permutation(X.copy())[:self.num_inducing] elif init in "random": Z = np.random.randn(self.num_inducing, self.input_dim) * X.var() return Z def _handle_plotting(self, fignum, axes, plotf, sharex=False, sharey=False): import matplotlib.pyplot as plt if axes is None: fig = plt.figure(num=fignum) sharex_ax = None sharey_ax = None plots = [] for i, g in enumerate(self.bgplvms): try: if sharex: sharex_ax = ax # @UndefinedVariable sharex = False # dont set twice if sharey: sharey_ax = ax # @UndefinedVariable sharey = False # dont set twice except: pass if axes is None: ax = fig.add_subplot(1, len(self.bgplvms), i + 1, sharex=sharex_ax, sharey=sharey_ax) elif isinstance(axes, (tuple, list, np.ndarray)): ax = axes[i] else: raise ValueError("Need one axes per latent dimension input_dim") plots.append(plotf(i, g, ax)) if sharey_ax is not None: plt.setp(ax.get_yticklabels(), visible=False) plt.draw() if axes is None: try: fig.tight_layout() except: pass return plots def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, Yindex=0): """ Prediction for data set Yindex[default=0]. This predicts the output mean and variance for the dataset given in Ylist[Yindex] """ b = self.bgplvms[Yindex] self.posterior = b.posterior self.kern = b.kern self.likelihood = b.likelihood return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern) #=============================================================================== # TODO: Predict! Maybe even change to several bgplvms, which share an X? #=============================================================================== # def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs): # fig = self._handle_plotting(fignum, # ax, # lambda i, g, ax: ax.imshow(g.predict(g.X)[0], **kwargs), # sharex=sharex, sharey=sharey) # return fig def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs): """ TODO: Explain other parameters :param titles: titles for axes of datasets """ if titles is None: titles = [r'${}$'.format(name) for name in self.names] ymax = reduce(max, [np.ceil(max(g.kern.input_sensitivity())) for g in self.bgplvms]) def plotf(i, g, ax): #ax.set_ylim([0,ymax]) return g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs) fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey) return fig def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40, fignum=None, plot_inducing=True, legend=True, plot_limits=None, aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}): """ see plotting.matplot_dep.dim_reduction_plots.plot_latent if predict_kwargs is None, will plot latent spaces for 0th dataset (and kernel), otherwise give predict_kwargs=dict(Yindex='index') for plotting only the latent space of dataset with 'index'. """ import sys assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from matplotlib import pyplot as plt from ..plotting.matplot_dep import dim_reduction_plots if "Yindex" not in predict_kwargs: predict_kwargs['Yindex'] = 0 Yindex = predict_kwargs['Yindex'] if ax is None: fig = plt.figure(num=fignum) ax = fig.add_subplot(111) else: fig = ax.figure self.kern = self.bgplvms[Yindex].kern self.likelihood = self.bgplvms[Yindex].likelihood plot = dim_reduction_plots.plot_latent(self, labels, which_indices, resolution, ax, marker, s, fignum, plot_inducing, legend, plot_limits, aspect, updates, predict_kwargs, imshow_kwargs) ax.set_title(self.bgplvms[Yindex].name) try: fig.tight_layout() except: pass return plot def __getstate__(self): state = super(MRD, self).__getstate__() if state.has_key('kern'): del state['kern'] if state.has_key('likelihood'): del state['likelihood'] return state def __setstate__(self, state): # TODO: super(MRD, self).__setstate__(state) self.kern = self.bgplvms[0].kern self.likelihood = self.bgplvms[0].likelihood self.parameters_changed()