diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 4d89ece2..d140fe3a 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -687,14 +687,20 @@ def hapmap3(data_set='hapmap3'):
         import bz2
     except ImportError as i:
         raise i, "Need pandas for hapmap dataset, make sure to install pandas (http://pandas.pydata.org/) before loading the hapmap dataset"
-    if not data_available(data_set):
-        download_data(data_set)
+
     dirpath = os.path.join(data_path,'hapmap3')
     hapmap_file_name = 'hapmap3_r2_b36_fwd.consensus.qc.poly'
+    unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']]
+    unpacked_files_exist = reduce(lambda a, b:a and b, map(os.path.exists, unpacked_files))
+
+    if not unpacked_files_exist and not data_available(data_set):
+        download_data(data_set)
+
     preprocessed_data_paths = [os.path.join(dirpath,hapmap_file_name + file_name) for file_name in \
                                ['.snps.pickle',
                                 '.info.pickle',
                                 '.nan.pickle']]
+
     if not reduce(lambda a,b: a and b, map(os.path.exists, preprocessed_data_paths)):
         if not overide_manual_authorize and not prompt_user("Preprocessing requires ~25GB "
                             "of memory and can take a (very) long time, continue? [Y/n]"):
@@ -708,8 +714,7 @@ def hapmap3(data_set='hapmap3'):
                                                                perc="="*int(20.*progress/100.))
             stdout.write(status); stdout.flush()
             return status
-        unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']]
-        if not reduce(lambda a,b: a and b, map(os.path.exists, unpacked_files)):
+        if not unpacked_files_exist:
             status=write_status('unpacking...', 0, '')
             curr = 0
             for newfilepath in unpacked_files:
@@ -726,6 +731,7 @@ def hapmap3(data_set='hapmap3'):
                             status=write_status('unpacking...', curr+12.*file_processed/(file_size), status)
                 curr += 12
                 status=write_status('unpacking...', curr, status)
+                os.remove(filepath)
         status=write_status('reading .ped...', 25, status)
         # Preprocess data:    
         snpstrnp = np.loadtxt(unpacked_files[0], dtype=str)
@@ -796,7 +802,7 @@ def hapmap3(data_set='hapmap3'):
 def singlecell(data_set='singlecell'):
     if not data_available(data_set):
         download_data(data_set)
-    
+
     from pandas import read_csv
     dirpath = os.path.join(data_path, data_set)
     filename = os.path.join(dirpath, 'singlecell.csv')