Added gpx dataset.

This commit is contained in:
Neil Lawrence 2013-08-22 14:03:59 +02:00
parent 7cd119d2b0
commit 00a5f8e5f6
4 changed files with 123 additions and 91 deletions

View file

@ -1,4 +1,4 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import * from constructors import *
@ -6,4 +6,4 @@ try:
from constructors import rbf_sympy, sympykern # these depend on sympy from constructors import rbf_sympy, sympykern # these depend on sympy
except: except:
pass pass
from kern import kern from kern import *

View file

@ -536,3 +536,86 @@ class kern(Parameterized):
else: else:
raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions" raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions"
from GPy.core.model import Model
class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
num_samples = 20
num_samples2 = 10
if kernel==None:
kernel = GPy.kern.rbf(1)
if X==None:
X = np.random.randn(num_samples, kernel.input_dim)
if X2==None:
X2 = np.random.randn(num_samples2, kernel.input_dim)
if dL_dK==None:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel
self.X = X
self.X2 = X2
self.dL_dK = dL_dK
#self.constrained_indices=[]
#self.constraints=[]
Model.__init__(self)
def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<0):
return False
else:
return True
def _get_params(self):
return self.kernel._get_params()
def _get_param_names(self):
return self.kernel._get_param_names()
def _set_params(self, x):
self.kernel._set_params(x)
def log_likelihood(self):
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
class Kern_check_dK_dtheta(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel.dK_dtheta(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model):
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel.dK_dX(self.dL_dK, self.X, self.X2).flatten()
def _get_param_names(self):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)

View file

@ -4,93 +4,6 @@
import unittest import unittest
import numpy as np import numpy as np
import GPy import GPy
from GPy.core.model import Model
class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
num_samples = 20
num_samples2 = 10
if kernel==None:
kernel = GPy.kern.rbf(1)
if X==None:
X = np.random.randn(num_samples, kernel.input_dim)
if X2==None:
X2 = np.random.randn(num_samples2, kernel.input_dim)
if dL_dK==None:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel
self.X = X
self.X2 = X2
self.dL_dK = dL_dK
#self.constrained_indices=[]
#self.constraints=[]
Model.__init__(self)
def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<0):
return False
else:
return True
def _get_params(self):
return self.kernel._get_params()
def _get_param_names(self):
return self.kernel._get_param_names()
def _set_params(self, x):
self.kernel._set_params(x)
def log_likelihood(self):
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
class Kern_check_dK_dtheta(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel.dK_dtheta(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model):
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel.dK_dX(self.dL_dK, self.X, self.X2).flatten()
def _get_param_names(self):
names = []
for i in range(self.X.shape[0]):
for j in range(self.X.shape[0]):
names.append('X_' +str(i) + ','+str(j))
return names
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)

View file

@ -10,6 +10,8 @@ import zipfile
import tarfile import tarfile
import gzip import gzip
import zlib import zlib
import datetime
import sys, urllib import sys, urllib
def reporthook(a,b,c): def reporthook(a,b,c):
@ -65,6 +67,12 @@ The database was created with funding from NSF EIA-0196217.""",
'details': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.", 'details': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
'license':None, 'license':None,
'size':3729650}, 'size':3729650},
'epomeo_gpx' : {'urls' : [neil_url + 'epomeo_gpx/'],
'files': [['endomondo_1.gpx', 'endomondo_2.gpx', 'garmin_watch_via_endomondo.gpx','viewranger_phone.gpx','viewranger_tablet.gpx']],
'citation' : '',
'details': "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
'license':None,
'size': 2031872},
'three_phase_oil_flow': {'urls' : [neil_url + 'three_phase_oil_flow/'], 'three_phase_oil_flow': {'urls' : [neil_url + 'three_phase_oil_flow/'],
'files' : [['DataTrnLbls.txt', 'DataTrn.txt', 'DataTst.txt', 'DataTstLbls.txt', 'DataVdn.txt', 'DataVdnLbls.txt']], 'files' : [['DataTrnLbls.txt', 'DataTrn.txt', 'DataTst.txt', 'DataTstLbls.txt', 'DataVdn.txt', 'DataVdnLbls.txt']],
'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593', 'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593',
@ -271,6 +279,34 @@ def cmu_urls_files(subj_motions, messages = True):
resource['files'].append(file_download) resource['files'].append(file_download)
return resource return resource
try:
import gpxpy
import gpxpy.gpx
gpxpy_available = True
except ImportError:
gpxpy_available = False
if gpxpy_available:
def epomeo_gpx(data_set='epomeo_gpx'):
if not data_available(data_set):
download_data(data_set)
files = ['endomondo_1', 'endomondo_2', 'garmin_watch_via_endomondo','viewranger_phone', 'viewranger_tablet']
X = []
for file in files:
gpx_file = open(os.path.join(data_path, 'epomeo_gpx', file + '.gpx'), 'r')
gpx = gpxpy.parse(gpx_file)
segment = gpx.tracks[0].segments[0]
points = [point for track in gpx.tracks for segment in track.segments for point in segment.points]
data = [[(point.time-datetime.datetime(2013,8,21)).total_seconds(), point.latitude, point.longitude, point.elevation] for point in points]
X.append(np.asarray(data))
gpx_file.close()
return data_details_return({'X' : X, 'info' : 'Data is an array containing time in seconds, latitude, longitude and elevation in that order.'}, data_set)
del gpxpy_available
# Some general utilities. # Some general utilities.
def sample_class(f): def sample_class(f):
@ -439,7 +475,7 @@ def ripley_synth(data_set='ripley_prnn_data'):
ytest = test[:, 2:3] ytest = test[:, 2:3]
return data_details_return({'X': X, 'y': y, 'Xtest': Xtest, 'ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set) return data_details_return({'X': X, 'y': y, 'Xtest': Xtest, 'ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set)
def osu_run1(data_set='osu_run1'): def osu_run1(data_set='osu_run1', sample_every=4):
if not data_available(data_set): if not data_available(data_set):
download_data(data_set) download_data(data_set)
zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'sprintTXT.ZIP'), 'r') zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'sprintTXT.ZIP'), 'r')
@ -447,7 +483,7 @@ def osu_run1(data_set='osu_run1'):
for name in zip.namelist(): for name in zip.namelist():
zip.extract(name, path) zip.extract(name, path)
Y, connect = GPy.util.mocap.load_text_data('Aug210107', path) Y, connect = GPy.util.mocap.load_text_data('Aug210107', path)
Y = Y[0:-1:4, :] Y = Y[0:-1:sample_every, :]
return data_details_return({'Y': Y, 'connect' : connect}, data_set) return data_details_return({'Y': Y, 'connect' : connect}, data_set)
def swiss_roll_generated(num_samples=1000, sigma=0.0): def swiss_roll_generated(num_samples=1000, sigma=0.0):