mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-29 15:59:41 +02:00
[datasets] delete packed data in hapmap dataset
This commit is contained in:
parent
99699e9e02
commit
533e5fb744
1 changed files with 11 additions and 5 deletions
|
|
@ -687,14 +687,20 @@ def hapmap3(data_set='hapmap3'):
|
||||||
import bz2
|
import bz2
|
||||||
except ImportError as i:
|
except ImportError as i:
|
||||||
raise i, "Need pandas for hapmap dataset, make sure to install pandas (http://pandas.pydata.org/) before loading the hapmap dataset"
|
raise i, "Need pandas for hapmap dataset, make sure to install pandas (http://pandas.pydata.org/) before loading the hapmap dataset"
|
||||||
if not data_available(data_set):
|
|
||||||
download_data(data_set)
|
|
||||||
dirpath = os.path.join(data_path,'hapmap3')
|
dirpath = os.path.join(data_path,'hapmap3')
|
||||||
hapmap_file_name = 'hapmap3_r2_b36_fwd.consensus.qc.poly'
|
hapmap_file_name = 'hapmap3_r2_b36_fwd.consensus.qc.poly'
|
||||||
|
unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']]
|
||||||
|
unpacked_files_exist = reduce(lambda a, b:a and b, map(os.path.exists, unpacked_files))
|
||||||
|
|
||||||
|
if not unpacked_files_exist and not data_available(data_set):
|
||||||
|
download_data(data_set)
|
||||||
|
|
||||||
preprocessed_data_paths = [os.path.join(dirpath,hapmap_file_name + file_name) for file_name in \
|
preprocessed_data_paths = [os.path.join(dirpath,hapmap_file_name + file_name) for file_name in \
|
||||||
['.snps.pickle',
|
['.snps.pickle',
|
||||||
'.info.pickle',
|
'.info.pickle',
|
||||||
'.nan.pickle']]
|
'.nan.pickle']]
|
||||||
|
|
||||||
if not reduce(lambda a,b: a and b, map(os.path.exists, preprocessed_data_paths)):
|
if not reduce(lambda a,b: a and b, map(os.path.exists, preprocessed_data_paths)):
|
||||||
if not overide_manual_authorize and not prompt_user("Preprocessing requires ~25GB "
|
if not overide_manual_authorize and not prompt_user("Preprocessing requires ~25GB "
|
||||||
"of memory and can take a (very) long time, continue? [Y/n]"):
|
"of memory and can take a (very) long time, continue? [Y/n]"):
|
||||||
|
|
@ -708,8 +714,7 @@ def hapmap3(data_set='hapmap3'):
|
||||||
perc="="*int(20.*progress/100.))
|
perc="="*int(20.*progress/100.))
|
||||||
stdout.write(status); stdout.flush()
|
stdout.write(status); stdout.flush()
|
||||||
return status
|
return status
|
||||||
unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']]
|
if not unpacked_files_exist:
|
||||||
if not reduce(lambda a,b: a and b, map(os.path.exists, unpacked_files)):
|
|
||||||
status=write_status('unpacking...', 0, '')
|
status=write_status('unpacking...', 0, '')
|
||||||
curr = 0
|
curr = 0
|
||||||
for newfilepath in unpacked_files:
|
for newfilepath in unpacked_files:
|
||||||
|
|
@ -726,6 +731,7 @@ def hapmap3(data_set='hapmap3'):
|
||||||
status=write_status('unpacking...', curr+12.*file_processed/(file_size), status)
|
status=write_status('unpacking...', curr+12.*file_processed/(file_size), status)
|
||||||
curr += 12
|
curr += 12
|
||||||
status=write_status('unpacking...', curr, status)
|
status=write_status('unpacking...', curr, status)
|
||||||
|
os.remove(filepath)
|
||||||
status=write_status('reading .ped...', 25, status)
|
status=write_status('reading .ped...', 25, status)
|
||||||
# Preprocess data:
|
# Preprocess data:
|
||||||
snpstrnp = np.loadtxt(unpacked_files[0], dtype=str)
|
snpstrnp = np.loadtxt(unpacked_files[0], dtype=str)
|
||||||
|
|
@ -796,7 +802,7 @@ def hapmap3(data_set='hapmap3'):
|
||||||
def singlecell(data_set='singlecell'):
|
def singlecell(data_set='singlecell'):
|
||||||
if not data_available(data_set):
|
if not data_available(data_set):
|
||||||
download_data(data_set)
|
download_data(data_set)
|
||||||
|
|
||||||
from pandas import read_csv
|
from pandas import read_csv
|
||||||
dirpath = os.path.join(data_path, data_set)
|
dirpath = os.path.join(data_path, data_set)
|
||||||
filename = os.path.join(dirpath, 'singlecell.csv')
|
filename = os.path.join(dirpath, 'singlecell.csv')
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue