Added gpx dataset.

This commit is contained in:
Neil Lawrence 2013-08-22 14:03:59 +02:00
parent 7cd119d2b0
commit 00a5f8e5f6
4 changed files with 123 additions and 91 deletions

View file

@ -10,6 +10,8 @@ import zipfile
import tarfile
import gzip
import zlib
import datetime
import sys, urllib
def reporthook(a,b,c):
@ -65,6 +67,12 @@ The database was created with funding from NSF EIA-0196217.""",
'details': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
'license':None,
'size':3729650},
'epomeo_gpx' : {'urls' : [neil_url + 'epomeo_gpx/'],
'files': [['endomondo_1.gpx', 'endomondo_2.gpx', 'garmin_watch_via_endomondo.gpx','viewranger_phone.gpx','viewranger_tablet.gpx']],
'citation' : '',
'details': "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
'license':None,
'size': 2031872},
'three_phase_oil_flow': {'urls' : [neil_url + 'three_phase_oil_flow/'],
'files' : [['DataTrnLbls.txt', 'DataTrn.txt', 'DataTst.txt', 'DataTstLbls.txt', 'DataVdn.txt', 'DataVdnLbls.txt']],
'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593',
@ -271,6 +279,34 @@ def cmu_urls_files(subj_motions, messages = True):
resource['files'].append(file_download)
return resource
try:
import gpxpy
import gpxpy.gpx
gpxpy_available = True
except ImportError:
gpxpy_available = False
if gpxpy_available:
def epomeo_gpx(data_set='epomeo_gpx'):
if not data_available(data_set):
download_data(data_set)
files = ['endomondo_1', 'endomondo_2', 'garmin_watch_via_endomondo','viewranger_phone', 'viewranger_tablet']
X = []
for file in files:
gpx_file = open(os.path.join(data_path, 'epomeo_gpx', file + '.gpx'), 'r')
gpx = gpxpy.parse(gpx_file)
segment = gpx.tracks[0].segments[0]
points = [point for track in gpx.tracks for segment in track.segments for point in segment.points]
data = [[(point.time-datetime.datetime(2013,8,21)).total_seconds(), point.latitude, point.longitude, point.elevation] for point in points]
X.append(np.asarray(data))
gpx_file.close()
return data_details_return({'X' : X, 'info' : 'Data is an array containing time in seconds, latitude, longitude and elevation in that order.'}, data_set)
del gpxpy_available
# Some general utilities.
def sample_class(f):
@ -439,7 +475,7 @@ def ripley_synth(data_set='ripley_prnn_data'):
ytest = test[:, 2:3]
return data_details_return({'X': X, 'y': y, 'Xtest': Xtest, 'ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set)
def osu_run1(data_set='osu_run1'):
def osu_run1(data_set='osu_run1', sample_every=4):
if not data_available(data_set):
download_data(data_set)
zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'sprintTXT.ZIP'), 'r')
@ -447,7 +483,7 @@ def osu_run1(data_set='osu_run1'):
for name in zip.namelist():
zip.extract(name, path)
Y, connect = GPy.util.mocap.load_text_data('Aug210107', path)
Y = Y[0:-1:4, :]
Y = Y[0:-1:sample_every, :]
return data_details_return({'Y': Y, 'connect' : connect}, data_set)
def swiss_roll_generated(num_samples=1000, sigma=0.0):