diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 29d9032a..ca2583e0 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -180,40 +180,80 @@ class GP(Model): return Ysim - def plot_f(self, *args, **kwargs): + def plot_f(self, plot_limits=None, which_data_rows='all', + which_data_ycols='all', fixed_inputs=[], + levels=20, samples=0, fignum=None, ax=None, resolution=None, + plot_raw=True, + linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'): """ - - Plot the GP's view of the world, where the data is normalized and - before applying a likelihood. - - This is a convenience function: arguments are passed to - GPy.plotting.matplot_dep.models_plots.plot_f_fit - + Plot the GP's view of the world, where the data is normalized and before applying a likelihood. + This is a call to plot with plot_raw=True. + Data will not be plotted in this, as the GP's view of the world + may live in another space, or units then the data. """ assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import models_plots - return models_plots.plot_fit_f(self,*args,**kwargs) + kw = {} + if linecol is not None: + kw['linecol'] = linecol + if fillcol is not None: + kw['fillcol'] = fillcol + return models_plots.plot_fit(self, plot_limits, which_data_rows, + which_data_ycols, fixed_inputs, + levels, samples, fignum, ax, resolution, + plot_raw=plot_raw, Y_metadata=Y_metadata, + data_symbol=data_symbol, **kw) - def plot(self, *args, **kwargs): + def plot(self, plot_limits=None, which_data_rows='all', + which_data_ycols='all', fixed_inputs=[], + levels=20, samples=0, fignum=None, ax=None, resolution=None, + plot_raw=False, + linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'): """ Plot the posterior of the GP. - - In one dimension, the function is plotted with a shaded region - identifying two standard deviations. - - In two dimsensions, a contour-plot shows the mean predicted - function - - In higher dimensions, use fixed_inputs to plot the GP with some of - the inputs fixed. + - In one dimension, the function is plotted with a shaded region identifying two standard deviations. + - In two dimsensions, a contour-plot shows the mean predicted function + - In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed. Can plot only part of the data and part of the posterior functions - using which_data_rows which_data_ycols and which_parts - - This is a convenience function: arguments are passed to - GPy.plotting.matplot_dep.models_plots.plot_fit + using which_data_rowsm which_data_ycols. + :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits + :type plot_limits: np.array + :param which_data_rows: which of the training data to plot (default all) + :type which_data_rows: 'all' or a slice object to slice model.X, model.Y + :param which_data_ycols: when the data has several columns (independant outputs), only plot these + :type which_data_rows: 'all' or a list of integers + :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. + :type fixed_inputs: a list of tuples + :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D + :type resolution: int + :param levels: number of levels to plot in a contour plot. + :type levels: int + :param samples: the number of a posteriori samples to plot + :type samples: int + :param fignum: figure to plot on. + :type fignum: figure number + :param ax: axes to plot on. + :type ax: axes handle + :type output: integer (first output is 0) + :param linecol: color of line to plot [Tango.colorsHex['darkBlue']] + :type linecol: + :param fillcol: color of fill [Tango.colorsHex['lightBlue']] + :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure """ assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import models_plots - return models_plots.plot_fit(self,*args,**kwargs) + kw = {} + if linecol is not None: + kw['linecol'] = linecol + if fillcol is not None: + kw['fillcol'] = fillcol + return models_plots.plot_fit(self, plot_limits, which_data_rows, + which_data_ycols, fixed_inputs, + levels, samples, fignum, ax, resolution, + plot_raw=plot_raw, Y_metadata=Y_metadata, + data_symbol=data_symbol, **kw) def input_sensitivity(self): """ diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py index 1f3ac934..7fc76d07 100644 --- a/GPy/core/parameterization/index_operations.py +++ b/GPy/core/parameterization/index_operations.py @@ -7,6 +7,20 @@ import numpy from numpy.lib.function_base import vectorize from lists_and_dicts import IntArrayDict +def extract_properties_to_index(index, props): + prop_index = dict() + for i, cl in enumerate(props): + for c in cl: + ind = prop_index.get(c, list()) + ind.append(index[i]) + prop_index[c] = ind + + for c, i in prop_index.items(): + prop_index[c] = numpy.array(i, dtype=int) + + return prop_index + + class ParameterIndexOperations(object): ''' Index operations for storing param index _properties @@ -66,8 +80,34 @@ class ParameterIndexOperations(object): return self._properties.values() def properties_for(self, index): + """ + Returns a list of properties, such that each entry in the list corresponds + to the element of the index given. + + Example: + let properties: 'one':[1,2,3,4], 'two':[3,5,6] + + >>> properties_for([2,3,5]) + [['one'], ['one', 'two'], ['two']] + """ return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index) + def properties_to_index_dict(self, index): + """ + Return a dictionary, containing properties as keys and indices as index + Thus, the indices for each constraint, which is contained will be collected as + one dictionary + + Example: + let properties: 'one':[1,2,3,4], 'two':[3,5,6] + + >>> properties_to_index_dict([2,3,5]) + {'one':[2,3], 'two':[3,5]} + """ + props = self.properties_for(index) + prop_index = extract_properties_to_index(index, props) + return prop_index + def add(self, prop, indices): self._properties[prop] = combine_indices(self._properties[prop], indices) @@ -174,8 +214,32 @@ class ParameterIndexOperationsView(object): def properties_for(self, index): + """ + Returns a list of properties, such that each entry in the list corresponds + to the element of the index given. + + Example: + let properties: 'one':[1,2,3,4], 'two':[3,5,6] + + >>> properties_for([2,3,5]) + [['one'], ['one', 'two'], ['two']] + """ return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index) + def properties_to_index_dict(self, index): + """ + Return a dictionary, containing properties as keys and indices as index + Thus, the indices for each constraint, which is contained will be collected as + one dictionary + + Example: + let properties: 'one':[1,2,3,4], 'two':[3,5,6] + + >>> properties_to_index_dict([2,3,5]) + {'one':[2,3], 'two':[3,5]} + """ + return extract_properties_to_index(index, self.properties_for(index)) + def add(self, prop, indices): self._param_index_ops.add(prop, indices+self._offset) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index f0fbf4ea..9cedc46d 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -17,7 +17,7 @@ from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, import numpy as np import re -__updated__ = '2014-05-15' +__updated__ = '2014-05-20' class HierarchyError(Exception): """ @@ -50,11 +50,24 @@ class Observable(object): self as only argument to all its observers. """ _updated = True + _updates = True def __init__(self, *args, **kwargs): super(Observable, self).__init__() from lists_and_dicts import ObserverList self.observers = ObserverList() + @property + def updates(self): + self._updates = self._highest_parent_._updates + return self._updates + + @updates.setter + def updates(self, ups): + assert isinstance(ups, bool), "updates are either on (True) or off (False)" + self._highest_parent_._updates = ups + if ups: + self._trigger_params_changed() + def add_observer(self, observer, callble, priority=0): """ Add an observer `observer` with the callback `callble` @@ -91,6 +104,8 @@ class Observable(object): :param min_priority: only notify observers with priority > min_priority if min_priority is None, notify all observers in order """ + if not self.updates: + return if which is None: which = self if min_priority is None: @@ -309,6 +324,7 @@ class Indexable(Nameable, Observable): self._default_constraint_ = default_constraint from index_operations import ParameterIndexOperations self.constraints = ParameterIndexOperations() + self._old_constraints = ParameterIndexOperations() self.priors = ParameterIndexOperations() if self._default_constraint_ is not None: self.constrain(self._default_constraint_) @@ -371,8 +387,10 @@ class Indexable(Nameable, Observable): """ if value is not None: self[:] = value - reconstrained = self.unconstrain() - index = self._add_to_index_operations(self.constraints, reconstrained, __fixed__, warning) + + index = self._raveled_index() + # reconstrained = self.unconstrain() + index = self._add_to_index_operations(self.constraints, index, __fixed__, warning) self._highest_parent_._set_fixed(self, index) self.notify_observers(self, None if trigger_parent else -np.inf) return index diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index cfb2171d..dd9a07c4 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -272,8 +272,11 @@ class Parameterized(Parameterizable): def __setattr__(self, name, val): # override the default behaviour, if setting a param, so broadcasting can by used if hasattr(self, "parameters"): - pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False) - if name in pnames: self.parameters[pnames.index(name)][:] = val; return + try: + pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False) + if name in pnames: self.parameters[pnames.index(name)][:] = val; return + except AttributeError: + pass object.__setattr__(self, name, val); #=========================================================================== @@ -281,11 +284,14 @@ class Parameterized(Parameterizable): #=========================================================================== def __setstate__(self, state): super(Parameterized, self).__setstate__(state) - self._connect_parameters() - self._connect_fixes() - self._notify_parent_change() + try: + self._connect_parameters() + self._connect_fixes() + self._notify_parent_change() + self.parameters_changed() + except Exception as e: + print "WARNING: caught exception {!s}, trying to continue".format(e) - self.parameters_changed() def copy(self): c = super(Parameterized, self).copy() c._connect_parameters() diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 7c963913..ff67bb05 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -66,7 +66,11 @@ class SparseGP(GP): #gradients wrt Z self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) self.Z.gradient += self.kern.gradients_Z_expectations( - self.grad_dict['dL_dpsi0'], self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) + self.grad_dict['dL_dpsi0'], + self.grad_dict['dL_dpsi1'], + self.grad_dict['dL_dpsi2'], + Z=self.Z, + variational_posterior=self.X) else: #gradients wrt kernel self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 484970b8..a216eec6 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -303,9 +303,11 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) - m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k) - m.inference_method = VarDTCMissingData() - m.Y[inan] = _np.nan + Y[inan] = _np.nan + + m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, + inference_method=VarDTCMissingData(inan=inan), kernel=k) + m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) m.likelihood.variance = .01 m.parameters_changed() @@ -338,7 +340,40 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): print "Optimizing Model:" m.optimize(messages=verbose, max_iters=8e3, gtol=.1) if plot: - m.plot_X_1d("MRD Latent Space 1D") + m.X.plot("MRD Latent Space 1D") + m.plot_scales("MRD Scales") + return m + +def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): + from GPy import kern + from GPy.models import MRD + from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData + + D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 + _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) + + #Ylist = [Ylist[0]] + k = kern.Linear(Q, ARD=True) + inanlist = [] + + for Y in Ylist: + inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) + inanlist.append(inan) + Y[inan] = _np.nan + + imlist = [] + for inan in inanlist: + imlist.append(VarDTCMissingData(limit=1, inan=inan)) + + m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, + kernel=k, inference_method=imlist, + initx="random", initz='permute', **kw) + + if optimize: + print "Optimizing Model:" + m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1) + if plot: + m.X.plot("MRD Latent Space 1D") m.plot_scales("MRD Scales") return m @@ -483,21 +518,22 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): Q = 6 kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) - + m.data = data m.likelihood.variance = 0.001 - + # optimize - if optimize: m.optimize('bfgs', messages=verbose, max_iters=800, xtol=1e-300, ftol=1e-300) + if optimize: m.optimize('bfgs', messages=verbose, max_iters=5e3, bfgs_factor=10) if plot: - plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2) + fig, (latent_axes, sense_axes) = plt.subplots(1, 2) plt.sca(latent_axes) m.plot_latent(ax=latent_axes) y = m.Y[:1, :].copy() data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect']) - GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) - plt.draw() - #raw_input('Press enter to finish') + dim_select = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) + fig.canvas.draw() + fig.canvas.show() + raw_input('Press enter to finish') return m diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py index 878c1e4f..8589a60e 100644 --- a/GPy/inference/latent_function_inference/__init__.py +++ b/GPy/inference/latent_function_inference/__init__.py @@ -38,6 +38,25 @@ class LatentFunctionInference(object): """ pass +class InferenceMethodList(LatentFunctionInference, list): + + def on_optimization_start(self): + for inf in self: + inf.on_optimization_start() + + def on_optimization_end(self): + for inf in self: + inf.on_optimization_end() + + def __getstate__(self): + state = [] + for inf in self: + state.append(inf) + return state + + def __setstate__(self, state): + for inf in state: + self.append(inf) from exact_gaussian_inference import ExactGaussianInference from laplace import Laplace diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py index 309cb7e5..70b0e0ea 100644 --- a/GPy/inference/latent_function_inference/posterior.py +++ b/GPy/inference/latent_function_inference/posterior.py @@ -95,7 +95,7 @@ class Posterior(object): """ if self._covariance is None: #LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1) - self._covariance = self._K - (np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze() + self._covariance = (np.atleast_3d(self._K) - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze() #self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K) return self._covariance diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 3043a7e8..a9a137dc 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -202,6 +202,17 @@ class VarDTCMissingData(LatentFunctionInference): def set_limit(self, limit): self._Y.limit = limit + def __getstate__(self): + # has to be overridden, as Cacher objects cannot be pickled. + return self._Y.limit, self._inan + + def __setstate__(self, state): + # has to be overridden, as Cacher objects cannot be pickled. + from ...util.caching import Cacher + self.limit = state[0] + self._inan = state[1] + self._Y = Cacher(self._subarray_computations, self.limit) + def _subarray_computations(self, Y): if self._inan is None: inan = np.isnan(Y) @@ -272,7 +283,11 @@ class VarDTCMissingData(LatentFunctionInference): else: beta = beta_all VVT_factor = (beta*y) - VVT_factor_all[v, ind].flat = VVT_factor.flat + try: + VVT_factor_all[v, ind].flat = VVT_factor.flat + except ValueError: + mult = np.ravel_multi_index((v.nonzero()[0][:,None],ind[None,:]), VVT_factor_all.shape) + VVT_factor_all.flat[mult] = VVT_factor output_dim = y.shape[1] psi0 = psi0_all[v] diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 1c237a6c..12f5d444 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -134,7 +134,7 @@ class Add(CombinationKernel): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. else: eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. - target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) + target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) return target def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): diff --git a/GPy/kern/_src/psi_comp/rbf_psi_comp.py b/GPy/kern/_src/psi_comp/rbf_psi_comp.py new file mode 100644 index 00000000..d5cd8951 --- /dev/null +++ b/GPy/kern/_src/psi_comp/rbf_psi_comp.py @@ -0,0 +1,170 @@ +""" +The module for psi-statistics for RBF kernel +""" + +import numpy as np +from . import PSICOMP +from GPy.util.caching import Cache_this +from ....util.misc import param_to_array +from scipy import weave +from ....util.config import * + +class PSICOMP_RBF(PSICOMP): + + @Cache_this(limit=1, ignore_args=(0,)) + def psicomputations(self, variance, lengthscale, Z, variational_posterior): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi0, psi1 and psi2 + # Produced intermediate results: + # _psi1 NxM + mu = variational_posterior.mean + S = variational_posterior.variance + + psi0 = np.empty(mu.shape[0]) + psi0[:] = variance + psi1 = self._psi1computations(variance, lengthscale, Z, mu, S) + psi2 = self._psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0) + return psi0, psi1, psi2 + + @Cache_this(limit=1, ignore_args=(0,)) + def _psi1computations(self, variance, lengthscale, Z, mu, S): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 + # Produced intermediate results: + # _psi1 NxM + + lengthscale2 = np.square(lengthscale) + + # psi1 + _psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N + _psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.) + _psi1 = variance*np.exp(_psi1_log) + + return _psi1 + + @Cache_this(limit=1, ignore_args=(0,)) + def _psi2computations(self, variance, lengthscale, Z, mu, S): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi2 + # Produced intermediate results: + # _psi2 MxM + + lengthscale2 = np.square(lengthscale) + + _psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N + _psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM + Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ + denom = 1./(2.*S+lengthscale2) + _psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom) + _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2) + + + return _psi2 + + @Cache_this(limit=1, ignore_args=(0,1,2,3)) + def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): + ARD = (len(lengthscale)!=1) + + dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = self._psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) + dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = self._psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) + + dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 + + dL_dlengscale = dl_psi1 + dl_psi2 + if not ARD: + dL_dlengscale = dL_dlengscale.sum() + + dL_dmu = dmu_psi1 + dmu_psi2 + dL_dS = dS_psi1 + dS_psi2 + dL_dZ = dZ_psi1 + dZ_psi2 + + return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS + + def _psi1compDer(self, dL_dpsi1, variance, lengthscale, Z, mu, S): + """ + dL_dpsi1 - NxM + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + """ + # here are the "statistics" for psi1 + # Produced intermediate results: dL_dparams w.r.t. psi1 + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ + + lengthscale2 = np.square(lengthscale) + + _psi1 = self._psi1computations(variance, lengthscale, Z, mu, S) + Lpsi1 = dL_dpsi1*_psi1 + Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ + denom = 1./(S+lengthscale2) + Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ + _dL_dvar = Lpsi1.sum()/variance + _dL_dmu = np.einsum('nm,nmq,nq->nq',Lpsi1,Zmu,denom) + _dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2. + _dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom) + _dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale) + + return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS + + def _psi2compDer(self, dL_dpsi2, variance, lengthscale, Z, mu, S): + """ + Z - MxQ + mu - NxQ + S - NxQ + gamma - NxQ + dL_dpsi2 - MxM + """ + # here are the "statistics" for psi2 + # Produced the derivatives w.r.t. psi2: + # _dL_dvariance 1 + # _dL_dlengthscale Q + # _dL_dZ MxQ + # _dL_dgamma NxQ + # _dL_dmu NxQ + # _dL_dS NxQ + + lengthscale2 = np.square(lengthscale) + denom = 1./(2*S+lengthscale2) + denom2 = np.square(denom) + + _psi2 = self._psi2computations(variance, lengthscale, Z, mu, S) # NxMxM + + Lpsi2 = dL_dpsi2[None,:,:]*_psi2 + Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N + Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ + Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ + Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ + Lpsi2Zhat = Lpsi2Z + Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2 + + _dL_dvar = Lpsi2sum.sum()*2/variance + _dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat) + _dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None] + _dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \ + 2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom) + _dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))- + (2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0) + + return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS + diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index f561baa4..ef8900f9 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -180,7 +180,7 @@ class Stationary(Kern): return np.zeros(X.shape) def input_sensitivity(self): - return np.ones(self.input_dim)/self.lengthscale + return np.ones(self.input_dim)/self.lengthscale**2 class Exponential(Stationary): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'): diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 2e7589a0..bc30bcfc 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -82,8 +82,8 @@ class BayesianGPLVM(SparseGP): def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40, fignum=None, plot_inducing=True, legend=True, - plot_limits=None, - aspect='auto', updates=False, **kwargs): + plot_limits=None, + aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}): import sys assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import dim_reduction_plots @@ -91,7 +91,7 @@ class BayesianGPLVM(SparseGP): return dim_reduction_plots.plot_latent(self, labels, which_indices, resolution, ax, marker, s, fignum, plot_inducing, legend, - plot_limits, aspect, updates, **kwargs) + plot_limits, aspect, updates, predict_kwargs, imshow_kwargs) def do_test_latents(self, Y): """ diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 458a70a1..0e6547bb 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -9,16 +9,25 @@ from ..core import Model from ..kern import Kern from ..core.parameterization.variational import NormalPosterior, NormalPrior from ..core.parameterization import Param, Parameterized +from ..core.parameterization.observable_array import ObsAr from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC +from ..inference.latent_function_inference import InferenceMethodList from ..likelihoods import Gaussian -from GPy.util.initialization import initialize_latent +from ..util.initialization import initialize_latent +from ..core.sparse_gp import SparseGP, GP -class MRD(Model): +class MRD(SparseGP): """ + !WARNING: This is bleeding edge code and still in development. + Functionality may change fundamentally during development! + Apply MRD to all given datasets Y in Ylist. Y_i in [n x p_i] + If Ylist is a dictionary, the keys of the dictionary are the names, and the + values are the different datasets to compare. + The samples n in the datasets need to match up, whereas the dimensionality p_d can differ. @@ -39,40 +48,71 @@ class MRD(Model): :param num_inducing: number of inducing inputs to use :param Z: initial inducing inputs :param kernel: list of kernels or kernel to copy for each output - :type kernel: [GPy.kern.kern] | GPy.kern.kern | None (default) - :param :class:`~GPy.inference.latent_function_inference inference_method: the inference method to use - :param :class:`~GPy.likelihoods.likelihood.Likelihood` likelihood: the likelihood to use + :type kernel: [GPy.kernels.kernels] | GPy.kernels.kernels | None (default) + :param :class:`~GPy.inference.latent_function_inference inference_method: + InferenceMethodList of inferences, or one inference method for all + :param :class:`~GPy.likelihoodss.likelihoods.likelihoods` likelihoods: the likelihoods to use :param str name: the name of this model :param [str] Ynames: the names for the datasets given, must be of equal length as Ylist or None """ def __init__(self, Ylist, input_dim, X=None, X_variance=None, initx = 'PCA', initz = 'permute', num_inducing=10, Z=None, kernel=None, - inference_method=None, likelihood=None, name='mrd', Ynames=None): - super(MRD, self).__init__(name) + inference_method=None, likelihoods=None, name='mrd', Ynames=None): + super(GP, self).__init__(name) self.input_dim = input_dim self.num_inducing = num_inducing - self.Ylist = Ylist + if isinstance(Ylist, dict): + Ynames, Ylist = zip(*Ylist.items()) + + self.Ylist = [ObsAr(Y) for Y in Ylist] + + if Ynames is None: + Ynames = ['Y{}'.format(i) for i in range(len(Ylist))] + self.names = Ynames + assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict" + + if inference_method is None: + self.inference_method= InferenceMethodList() + warned = False + for y in Ylist: + inan = np.isnan(y) + if np.any(inan): + if not warned: + print "WARING: NaN values detected, make sure initx method can cope with NaN values or provide starting latent space X" + warned = True + self.inference_method.append(VarDTCMissingData(limit=1, inan=inan)) + else: + self.inference_method.append(VarDTC(limit=1)) + else: + if not isinstance(inference_method, InferenceMethodList): + inference_method = InferenceMethodList(inference_method) + self.inference_method = inference_method + + self._in_init_ = True - X, fracs = self._init_X(initx, Ylist) + if X is None: + X, fracs = self._init_X(initx, Ylist) + else: + fracs = [X.var(0)]*len(Ylist) self.Z = Param('inducing inputs', self._init_Z(initz, X)) self.num_inducing = self.Z.shape[0] # ensure M==N if M>N # sort out the kernels if kernel is None: from ..kern import RBF - self.kern = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))] + self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))] elif isinstance(kernel, Kern): - self.kern = [] + self.kernels = [] for i in range(len(Ylist)): k = kernel.copy() - self.kern.append(k) + self.kernels.append(k) else: assert len(kernel) == len(Ylist), "need one kernel per output" assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!" - self.kern = kernel + self.kernels = kernel if X_variance is None: X_variance = np.random.uniform(0.1, 0.2, X.shape) @@ -80,32 +120,27 @@ class MRD(Model): self.variational_prior = NormalPrior() self.X = NormalPosterior(X, X_variance) - if likelihood is None: - self.likelihood = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))] - else: self.likelihood = likelihood - - if inference_method is None: - self.inference_method= [] - for y in Ylist: - if np.any(np.isnan(y)): - self.inference_method.append(VarDTCMissingData(limit=1)) - else: - self.inference_method.append(VarDTC(limit=1)) - else: - self.inference_method = inference_method - self.inference_method.set_limit(len(Ylist)) + if likelihoods is None: + self.likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))] + else: self.likelihoods = likelihoods self.add_parameters(self.X, self.Z) - if Ynames is None: - Ynames = ['Y{}'.format(i) for i in range(len(Ylist))] + self.bgplvms = [] + self.num_data = Ylist[0].shape[0] + + for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, self.kernels, self.likelihoods, self.Ylist): + assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another" - for i, n, k, l in itertools.izip(itertools.count(), Ynames, self.kern, self.likelihood): p = Parameterized(name=n) p.add_parameter(k) + p.kern = k p.add_parameter(l) - setattr(self, 'Y{}'.format(i), p) + p.likelihood = l self.add_parameter(p) + self.bgplvms.append(p) + + self.posterior = None self._in_init_ = False def parameters_changed(self): @@ -114,13 +149,13 @@ class MRD(Model): self.Z.gradient[:] = 0. self.X.gradient[:] = 0. - for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method): + for y, k, l, i in itertools.izip(self.Ylist, self.kernels, self.likelihoods, self.inference_method): posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y) self.posteriors.append(posterior) self._log_marginal_likelihood += lml - # likelihood gradients + # likelihoods gradients l.update_gradients(grad_dict.pop('dL_dthetaL')) #gradients wrt kernel @@ -133,13 +168,20 @@ class MRD(Model): #gradients wrt Z self.Z.gradient += k.gradients_X(dL_dKmm, self.Z) self.Z.gradient += k.gradients_Z_expectations( - grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) + grad_dict['dL_dpsi0'], + grad_dict['dL_dpsi1'], + grad_dict['dL_dpsi2'], + Z=self.Z, variational_posterior=self.X) dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict) self.X.mean.gradient += dL_dmean self.X.variance.gradient += dL_dS # update for the KL divergence + self.posterior = self.posteriors[0] + self.kern = self.kernels[0] + self.likelihood = self.likelihoods[0] + self.variational_prior.update_gradients_KL(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) @@ -151,7 +193,7 @@ class MRD(Model): Ylist = self.Ylist if init in "PCA_concat": X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist)) - fracs = [fracs]*self.input_dim + fracs = [fracs]*len(Ylist) elif init in "PCA_single": X = np.zeros((Ylist[0].shape[0], self.input_dim)) fracs = [] @@ -162,7 +204,7 @@ class MRD(Model): else: # init == 'random': X = np.random.randn(Ylist[0].shape[0], self.input_dim) fracs = X.var(0) - fracs = [fracs]*self.input_dim + fracs = [fracs]*len(Ylist) X -= X.mean() X /= X.std() return X, fracs @@ -181,6 +223,7 @@ class MRD(Model): fig = pylab.figure(num=fignum) sharex_ax = None sharey_ax = None + plots = [] for i, g in enumerate(self.bgplvms): try: if sharex: @@ -197,26 +240,36 @@ class MRD(Model): ax = axes[i] else: raise ValueError("Need one axes per latent dimension input_dim") - plotf(i, g, ax) + plots.append(plotf(i, g, ax)) if sharey_ax is not None: pylab.setp(ax.get_yticklabels(), visible=False) pylab.draw() if axes is None: - fig.tight_layout() - return fig - else: - return pylab.gcf() + try: + fig.tight_layout() + except: + pass + return plots - def plot_X(self, fignum=None, ax=None): - fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X)) - return fig + def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, Yindex=0): + """ + Prediction for data set Yindex[default=0]. + This predicts the output mean and variance for the dataset given in Ylist[Yindex] + """ + self.posterior = self.posteriors[Yindex] + self.kern = self.kernels[Yindex] + self.likelihood = self.likelihoods[Yindex] + return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern) - def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs): - fig = self._handle_plotting(fignum, - ax, - lambda i, g, ax: ax.imshow(g. predict(g.X)[0], **kwargs), - sharex=sharex, sharey=sharey) - return fig + #=============================================================================== + # TODO: Predict! Maybe even change to several bgplvms, which share an X? + #=============================================================================== + # def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs): + # fig = self._handle_plotting(fignum, + # ax, + # lambda i, g, ax: ax.imshow(g.predict(g.X)[0], **kwargs), + # sharex=sharex, sharey=sharey) + # return fig def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs): """ @@ -228,28 +281,58 @@ class MRD(Model): """ if titles is None: titles = [r'${}$'.format(name) for name in self.names] - ymax = reduce(max, [np.ceil(max(g.input_sensitivity())) for g in self.bgplvms]) + ymax = reduce(max, [np.ceil(max(g.kern.input_sensitivity())) for g in self.bgplvms]) def plotf(i, g, ax): ax.set_ylim([0,ymax]) - g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs) + return g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs) fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey) return fig - def plot_latent(self, fignum=None, ax=None, *args, **kwargs): - fig = self.gref.plot_latent(fignum=fignum, ax=ax, *args, **kwargs) # self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs)) - return fig + def plot_latent(self, labels=None, which_indices=None, + resolution=50, ax=None, marker='o', s=40, + fignum=None, plot_inducing=True, legend=True, + plot_limits=None, + aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}): + """ + see plotting.matplot_dep.dim_reduction_plots.plot_latent + if predict_kwargs is None, will plot latent spaces for 0th dataset (and kernel), otherwise give + predict_kwargs=dict(Yindex='index') for plotting only the latent space of dataset with 'index'. + """ + import sys + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import dim_reduction_plots + if "Yindex" not in predict_kwargs: + predict_kwargs['Yindex'] = 0 + if ax is None: + fig = pylab.figure(num=fignum) + ax = fig.add_subplot(111) + else: + fig = ax.figure + plot = dim_reduction_plots.plot_latent(self, labels, which_indices, + resolution, ax, marker, s, + fignum, plot_inducing, legend, + plot_limits, aspect, updates, predict_kwargs, imshow_kwargs) + ax.set_title(self.bgplvms[predict_kwargs['Yindex']].name) + try: + fig.tight_layout() + except: + pass - def _debug_plot(self): - self.plot_X_1d() - fig = pylab.figure("MRD DEBUG PLOT", figsize=(4 * len(self.bgplvms), 9)) - fig.clf() - axes = [fig.add_subplot(3, len(self.bgplvms), i + 1) for i in range(len(self.bgplvms))] - self.plot_X(ax=axes) - axes = [fig.add_subplot(3, len(self.bgplvms), i + len(self.bgplvms) + 1) for i in range(len(self.bgplvms))] - self.plot_latent(ax=axes) - axes = [fig.add_subplot(3, len(self.bgplvms), i + 2 * len(self.bgplvms) + 1) for i in range(len(self.bgplvms))] - self.plot_scales(ax=axes) - pylab.draw() - fig.tight_layout() + return plot + def __getstate__(self): + # TODO: + import copy + state = copy.copy(self.__dict__) + del state['kernels'] + del state['kern'] + del state['likelihood'] + return state + def __setstate__(self, state): + # TODO: + super(MRD, self).__setstate__(state) + self.kernels = [p.kern for p in self.bgplvms] + self.kern = self.kernels[0] + self.likelihood = self.likelihoods[0] + self.parameters_changed() \ No newline at end of file diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py index f8413671..0ba082df 100644 --- a/GPy/plotting/matplot_dep/dim_reduction_plots.py +++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py @@ -31,7 +31,7 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40, fignum=None, plot_inducing=False, legend=True, plot_limits=None, - aspect='auto', updates=False, **kwargs): + aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}): """ :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc) :param resolution: the resolution of the grid on which to evaluate the predictive variance @@ -60,7 +60,7 @@ def plot_latent(model, labels=None, which_indices=None, def plot_function(x): Xtest_full = np.zeros((x.shape[0], model.X.shape[1])) Xtest_full[:, [input_1, input_2]] = x - _, var = model.predict(Xtest_full) + _, var = model.predict(Xtest_full, **predict_kwargs) var = var[:, :1] return np.log(var) @@ -81,7 +81,7 @@ def plot_latent(model, labels=None, which_indices=None, view = ImshowController(ax, plot_function, (xmin, ymin, xmax, ymax), resolution, aspect=aspect, interpolation='bilinear', - cmap=pb.cm.binary, **kwargs) + cmap=pb.cm.binary, **imshow_kwargs) # make sure labels are in order of input: ulabels = [] diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 84747d05..8f3e55b0 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -97,7 +97,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', for d in which_data_ycols: plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol) - plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5) + if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5) #optionally plot some samples if samples: #NOTE not tested with fixed_inputs @@ -151,7 +151,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) - plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) #set the limits of the plot to some sensible values ax.set_xlim(xmin[0], xmax[0]) diff --git a/GPy/plotting/matplot_dep/visualize.py b/GPy/plotting/matplot_dep/visualize.py index ee505af9..557729da 100644 --- a/GPy/plotting/matplot_dep/visualize.py +++ b/GPy/plotting/matplot_dep/visualize.py @@ -88,7 +88,6 @@ class vector_show(matplotlib_show): class lvm(matplotlib_show): - def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1], disable_drag=False): """Visualize a latent variable model @@ -150,7 +149,6 @@ class lvm(matplotlib_show): pass def on_click(self, event): - print 'click!' if event.inaxes!=self.latent_axes: return if self.disable_drag: self.move_on = True @@ -228,11 +226,10 @@ class lvm_dimselect(lvm): self.labels = labels lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index) self.show_sensitivities() - print "use left and right mouse butons to select dimensions" + print "use left and right mouse buttons to select dimensions" def on_click(self, event): - if event.inaxes==self.sense_axes: new_index = max(0,min(int(np.round(event.xdata-0.5)),self.model.input_dim-1)) if event.button == 1: diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py index 94504645..13a67744 100644 --- a/GPy/testing/pickle_tests.py +++ b/GPy/testing/pickle_tests.py @@ -4,7 +4,8 @@ Created on 13 Mar 2014 @author: maxz ''' import unittest, itertools -import cPickle as pickle +#import cPickle as pickle +import pickle import numpy as np from GPy.core.parameterization.index_operations import ParameterIndexOperations,\ ParameterIndexOperationsView diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 4d89ece2..d140fe3a 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -687,14 +687,20 @@ def hapmap3(data_set='hapmap3'): import bz2 except ImportError as i: raise i, "Need pandas for hapmap dataset, make sure to install pandas (http://pandas.pydata.org/) before loading the hapmap dataset" - if not data_available(data_set): - download_data(data_set) + dirpath = os.path.join(data_path,'hapmap3') hapmap_file_name = 'hapmap3_r2_b36_fwd.consensus.qc.poly' + unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']] + unpacked_files_exist = reduce(lambda a, b:a and b, map(os.path.exists, unpacked_files)) + + if not unpacked_files_exist and not data_available(data_set): + download_data(data_set) + preprocessed_data_paths = [os.path.join(dirpath,hapmap_file_name + file_name) for file_name in \ ['.snps.pickle', '.info.pickle', '.nan.pickle']] + if not reduce(lambda a,b: a and b, map(os.path.exists, preprocessed_data_paths)): if not overide_manual_authorize and not prompt_user("Preprocessing requires ~25GB " "of memory and can take a (very) long time, continue? [Y/n]"): @@ -708,8 +714,7 @@ def hapmap3(data_set='hapmap3'): perc="="*int(20.*progress/100.)) stdout.write(status); stdout.flush() return status - unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']] - if not reduce(lambda a,b: a and b, map(os.path.exists, unpacked_files)): + if not unpacked_files_exist: status=write_status('unpacking...', 0, '') curr = 0 for newfilepath in unpacked_files: @@ -726,6 +731,7 @@ def hapmap3(data_set='hapmap3'): status=write_status('unpacking...', curr+12.*file_processed/(file_size), status) curr += 12 status=write_status('unpacking...', curr, status) + os.remove(filepath) status=write_status('reading .ped...', 25, status) # Preprocess data: snpstrnp = np.loadtxt(unpacked_files[0], dtype=str) @@ -796,7 +802,7 @@ def hapmap3(data_set='hapmap3'): def singlecell(data_set='singlecell'): if not data_available(data_set): download_data(data_set) - + from pandas import read_csv dirpath = os.path.join(data_path, data_set) filename = os.path.join(dirpath, 'singlecell.csv') diff --git a/GPy/util/pca.py b/GPy/util/pca.py index 6c548b3d..967d0e1b 100644 --- a/GPy/util/pca.py +++ b/GPy/util/pca.py @@ -106,12 +106,14 @@ class pca(object): ulabels.append(lab) nlabels = len(ulabels) if colors is None: - colors = [cmap(float(i) / nlabels) for i in range(nlabels)] + colors = iter([cmap(float(i) / nlabels) for i in range(nlabels)]) + else: + colors = iter(colors) X_ = self.project(X, self.Q)[:,dimensions] kwargs.update(dict(s=s)) plots = list() for i, l in enumerate(ulabels): - kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)])) + kwargs.update(dict(color=colors.next(), marker=marker[i % len(marker)])) plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs)) ax.set_xlabel(r"PC$_1$") ax.set_ylabel(r"PC$_2$") diff --git a/GPy/util/subarray_and_sorting.py b/GPy/util/subarray_and_sorting.py index ab7b7672..f1707394 100644 --- a/GPy/util/subarray_and_sorting.py +++ b/GPy/util/subarray_and_sorting.py @@ -4,7 +4,7 @@ .. moduleauthor:: Max Zwiessele ''' -__updated__ = '2013-12-02' +__updated__ = '2014-05-20' import numpy as np