[datasets] rnaseq changed up

This commit is contained in:
Max Zwiessele 2016-07-19 14:31:37 +01:00
parent e9bad5c18f
commit 30b34591bb
2 changed files with 14 additions and 4 deletions

View file

@ -211,6 +211,12 @@ class Kern(Parameterized):
def input_sensitivity(self, summarize=True):
"""
Returns the sensitivity for each dimension of this kernel.
This is an arbitrary measurement based on the parameters
of the kernel per dimension and scaling in general.
Use this as relative measurement, not for absolute comparison between
kernels.
"""
return np.zeros(self.input_dim)

View file

@ -98,7 +98,7 @@ def data_available(dataset_name=None):
try:
from itertools import zip_longest
except ImportError:
from itertools import zip_longest as zip_longest
from itertools import izip_longest as zip_longest
dr = data_resources[dataset_name]
zip_urls = (dr['files'], )
if 'save_names' in dr: zip_urls += (dr['save_names'], )
@ -1033,14 +1033,18 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
data = inner.RPKM.to_frame()
data.columns = [file_info.name[:-18]]
gene_info = inner.Refseq_IDs.to_frame()
gene_info.columns = [file_info.name[:-18]]
gene_info.columns = ['NCBI Reference Sequence']
else:
data[file_info.name[:-18]] = inner.RPKM
gene_info[file_info.name[:-18]] = inner.Refseq_IDs
#gene_info[file_info.name[:-18]] = inner.Refseq_IDs
# Strip GSM number off data index
rep = re.compile('GSM\d+_')
data.columns = data.columns.to_series().apply(lambda row: row[rep.match(row).end():])
from pandas import MultiIndex
columns = MultiIndex.from_tuples([row.split('_', 1) for row in data.columns])
columns.names = ['GEO Accession', 'index']
data.columns = columns
data = data.T
# make sure the same index gets used