[datasets] rnaseq changed up

This commit is contained in:
Max Zwiessele 2016-07-19 14:31:37 +01:00
parent e9bad5c18f
commit 30b34591bb
2 changed files with 14 additions and 4 deletions

View file

@ -211,6 +211,12 @@ class Kern(Parameterized):
def input_sensitivity(self, summarize=True): def input_sensitivity(self, summarize=True):
""" """
Returns the sensitivity for each dimension of this kernel. Returns the sensitivity for each dimension of this kernel.
This is an arbitrary measurement based on the parameters
of the kernel per dimension and scaling in general.
Use this as relative measurement, not for absolute comparison between
kernels.
""" """
return np.zeros(self.input_dim) return np.zeros(self.input_dim)

View file

@ -98,7 +98,7 @@ def data_available(dataset_name=None):
try: try:
from itertools import zip_longest from itertools import zip_longest
except ImportError: except ImportError:
from itertools import zip_longest as zip_longest from itertools import izip_longest as zip_longest
dr = data_resources[dataset_name] dr = data_resources[dataset_name]
zip_urls = (dr['files'], ) zip_urls = (dr['files'], )
if 'save_names' in dr: zip_urls += (dr['save_names'], ) if 'save_names' in dr: zip_urls += (dr['save_names'], )
@ -1033,14 +1033,18 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
data = inner.RPKM.to_frame() data = inner.RPKM.to_frame()
data.columns = [file_info.name[:-18]] data.columns = [file_info.name[:-18]]
gene_info = inner.Refseq_IDs.to_frame() gene_info = inner.Refseq_IDs.to_frame()
gene_info.columns = [file_info.name[:-18]] gene_info.columns = ['NCBI Reference Sequence']
else: else:
data[file_info.name[:-18]] = inner.RPKM data[file_info.name[:-18]] = inner.RPKM
gene_info[file_info.name[:-18]] = inner.Refseq_IDs #gene_info[file_info.name[:-18]] = inner.Refseq_IDs
# Strip GSM number off data index # Strip GSM number off data index
rep = re.compile('GSM\d+_') rep = re.compile('GSM\d+_')
data.columns = data.columns.to_series().apply(lambda row: row[rep.match(row).end():])
from pandas import MultiIndex
columns = MultiIndex.from_tuples([row.split('_', 1) for row in data.columns])
columns.names = ['GEO Accession', 'index']
data.columns = columns
data = data.T data = data.T
# make sure the same index gets used # make sure the same index gets used