mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-11 15:15:15 +02:00
[datasets] deng et all, labels revisited
This commit is contained in:
parent
c128c6f948
commit
9b3498a912
1 changed files with 17 additions and 3 deletions
|
|
@ -964,7 +964,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
|
|||
if not data_available(dataset):
|
||||
download_data(dataset)
|
||||
|
||||
from pandas import read_csv
|
||||
from pandas import read_csv, isnull
|
||||
dir_path = os.path.join(data_path, dataset)
|
||||
|
||||
# read the info .soft
|
||||
|
|
@ -983,6 +983,21 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
|
|||
c[1:4] = ['strain', 'cross', 'developmental_stage']
|
||||
sample_info.columns = c
|
||||
|
||||
# get the labels right:
|
||||
rep = re.compile('\(.*\)')
|
||||
def filter_dev_stage(row):
|
||||
if isnull(row):
|
||||
row = "2-cell stage embryo"
|
||||
if row.startswith("developmental stage: "):
|
||||
row = row[len("developmental stage: "):]
|
||||
if row == 'adult':
|
||||
row += " liver"
|
||||
row = row.replace(' stage ', ' ')
|
||||
row = rep.sub(' ', row)
|
||||
row = row.strip(' ')
|
||||
return row
|
||||
labels = sample_info.developmental_stage.apply(filter_dev_stage)
|
||||
|
||||
# Extract the tar file
|
||||
filename = os.path.join(dir_path, 'GSE45719_Raw.tar')
|
||||
with tarfile.open(filename, 'r') as files:
|
||||
|
|
@ -1016,8 +1031,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
|
|||
sample_info.index = data.index
|
||||
|
||||
# get the labels from the description
|
||||
rep = re.compile('fibroblast|\d+-cell|embryo|liver|blastocyst|blastomere|zygote', re.IGNORECASE)
|
||||
labels = sample_info.developmental_stage.apply(lambda row: " ".join(rep.findall(row)))
|
||||
#rep = re.compile('fibroblast|\d+-cell|embryo|liver|early blastocyst|mid blastocyst|late blastocyst|blastomere|zygote', re.IGNORECASE)
|
||||
|
||||
sys.stdout.write(' '*len(message) + '\r')
|
||||
sys.stdout.flush()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue