Added gpx dataset.

2026-05-05 17:52:39 +02:00 · 2013-08-22 14:03:59 +02:00 · 2013-08-22 14:03:59 +02:00 · 00a5f8e5f6
commit 00a5f8e5f6
parent 7cd119d2b0
4 changed files with 123 additions and 91 deletions
--- a/GPy/kern/init.py
+++ b/GPy/kern/init.py
@ -1,4 +1,4 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 from constructors import *
@ -6,4 +6,4 @@ try:
    from constructors import rbf_sympy, sympykern # these depend on sympy
 except:
    pass
-from kern import kern
+from kern import *
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@ -536,3 +536,86 @@ class kern(Parameterized):
        else:
            raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions"
 from GPy.core.model import Model
 class Kern_check_model(Model):
    """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
    def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
        num_samples = 20
        num_samples2 = 10
        if kernel==None:
            kernel = GPy.kern.rbf(1)
        if X==None:
            X = np.random.randn(num_samples, kernel.input_dim)
        if X2==None:
            X2 = np.random.randn(num_samples2, kernel.input_dim)
        if dL_dK==None:
            dL_dK = np.ones((X.shape[0], X2.shape[0]))
        self.kernel=kernel
        self.X = X
        self.X2 = X2
        self.dL_dK = dL_dK
        #self.constrained_indices=[]
        #self.constraints=[]
        Model.__init__(self)
    def is_positive_definite(self):
        v = np.linalg.eig(self.kernel.K(self.X))[0]
        if any(v<0):
            return False
        else:
            return True
    def _get_params(self):
        return self.kernel._get_params()
    def _get_param_names(self):
        return self.kernel._get_param_names()
    def _set_params(self, x):
        self.kernel._set_params(x)
    def log_likelihood(self):
        return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
    def _log_likelihood_gradients(self):
        raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
 class Kern_check_dK_dtheta(Kern_check_model):
    """This class allows gradient checks for the gradient of a kernel with respect to parameters. """
    def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
        Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
    def _log_likelihood_gradients(self):
        return self.kernel.dK_dtheta(self.dL_dK, self.X, self.X2)
 class Kern_check_dKdiag_dtheta(Kern_check_model):
    """This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
    def __init__(self, kernel=None, dL_dK=None, X=None):
        Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
        if dL_dK==None:
            self.dL_dK = np.ones((self.X.shape[0]))
    def log_likelihood(self):
        return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
    def _log_likelihood_gradients(self):
        return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
 class Kern_check_dK_dX(Kern_check_model):
    """This class allows gradient checks for the gradient of a kernel with respect to X. """
    def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
        Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
    def _log_likelihood_gradients(self):
        return self.kernel.dK_dX(self.dL_dK, self.X, self.X2).flatten()
    def _get_param_names(self):
        return ['X_'  +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
    def _get_params(self):
        return self.X.flatten()
    def _set_params(self, x):
        self.X=x.reshape(self.X.shape)
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@ -4,93 +4,6 @@
 import unittest
 import numpy as np
 import GPy
 from GPy.core.model import Model
 class Kern_check_model(Model):
    """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
    def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
        num_samples = 20
        num_samples2 = 10
        if kernel==None:
            kernel = GPy.kern.rbf(1)
        if X==None:
            X = np.random.randn(num_samples, kernel.input_dim)
        if X2==None:
            X2 = np.random.randn(num_samples2, kernel.input_dim)
        if dL_dK==None:
            dL_dK = np.ones((X.shape[0], X2.shape[0]))
        self.kernel=kernel
        self.X = X
        self.X2 = X2
        self.dL_dK = dL_dK
        #self.constrained_indices=[]
        #self.constraints=[]
        Model.__init__(self)
    def is_positive_definite(self):
        v = np.linalg.eig(self.kernel.K(self.X))[0]
        if any(v<0):
            return False
        else:
            return True
    def _get_params(self):
        return self.kernel._get_params()
    def _get_param_names(self):
        return self.kernel._get_param_names()
    def _set_params(self, x):
        self.kernel._set_params(x)
    def log_likelihood(self):
        return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
    def _log_likelihood_gradients(self):
        raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
 class Kern_check_dK_dtheta(Kern_check_model):
    """This class allows gradient checks for the gradient of a kernel with respect to parameters. """
    def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
        Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
    def _log_likelihood_gradients(self):
        return self.kernel.dK_dtheta(self.dL_dK, self.X, self.X2)
 class Kern_check_dKdiag_dtheta(Kern_check_model):
    """This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
    def __init__(self, kernel=None, dL_dK=None, X=None):
        Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
        if dL_dK==None:
            self.dL_dK = np.ones((self.X.shape[0]))
    def log_likelihood(self):
        return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
    def _log_likelihood_gradients(self):
        return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
 class Kern_check_dK_dX(Kern_check_model):
    """This class allows gradient checks for the gradient of a kernel with respect to X. """
    def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
        Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
    def _log_likelihood_gradients(self):
        return self.kernel.dK_dX(self.dL_dK, self.X, self.X2).flatten()
    def _get_param_names(self):
        names = []
        for i in range(self.X.shape[0]):
            for j in range(self.X.shape[0]):
                names.append('X_' +str(i) + ','+str(j))
        return names
    def _get_params(self):
        return self.X.flatten()
    def _set_params(self, x):
        self.X=x.reshape(self.X.shape)
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -10,6 +10,8 @@ import zipfile
 import tarfile
 import gzip
 import zlib
 import datetime
 import sys, urllib
 def reporthook(a,b,c): 
@ -65,6 +67,12 @@ The database was created with funding from NSF EIA-0196217.""",
                                   'details': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
                                   'license':None,
                                   'size':3729650},
                  'epomeo_gpx' : {'urls' : [neil_url + 'epomeo_gpx/'],
                                   'files': [['endomondo_1.gpx', 'endomondo_2.gpx', 'garmin_watch_via_endomondo.gpx','viewranger_phone.gpx','viewranger_tablet.gpx']],
                                   'citation' : '',
                                   'details': "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
                                   'license':None,
                                   'size': 2031872},
                  'three_phase_oil_flow': {'urls' : [neil_url + 'three_phase_oil_flow/'],
                                           'files' : [['DataTrnLbls.txt', 'DataTrn.txt', 'DataTst.txt', 'DataTstLbls.txt', 'DataVdn.txt', 'DataVdnLbls.txt']],
                                           'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593',
@ -271,6 +279,34 @@ def cmu_urls_files(subj_motions, messages = True):
            resource['files'].append(file_download)
    return resource
 try:
    import gpxpy
    import gpxpy.gpx
    gpxpy_available = True
 except ImportError:
    gpxpy_available = False
 if gpxpy_available:
    def epomeo_gpx(data_set='epomeo_gpx'):
        if not data_available(data_set):
            download_data(data_set)
        files = ['endomondo_1', 'endomondo_2', 'garmin_watch_via_endomondo','viewranger_phone', 'viewranger_tablet']
        X = []
        for file in files:
            gpx_file = open(os.path.join(data_path, 'epomeo_gpx', file + '.gpx'), 'r')
            gpx = gpxpy.parse(gpx_file)
            segment = gpx.tracks[0].segments[0]
            points = [point for track in gpx.tracks for segment in track.segments for point in segment.points]
            data = [[(point.time-datetime.datetime(2013,8,21)).total_seconds(), point.latitude, point.longitude, point.elevation] for point in points]
            X.append(np.asarray(data))
            gpx_file.close()        
        return data_details_return({'X' : X, 'info' : 'Data is an array containing time in seconds, latitude, longitude and elevation in that order.'}, data_set)
 del gpxpy_available
 # Some general utilities.
 def sample_class(f):
@ -439,7 +475,7 @@ def ripley_synth(data_set='ripley_prnn_data'):
    ytest = test[:, 2:3]
    return data_details_return({'X': X, 'y': y, 'Xtest': Xtest, 'ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set)
-def osu_run1(data_set='osu_run1'):
+def osu_run1(data_set='osu_run1', sample_every=4):
    if not data_available(data_set):
        download_data(data_set)
    zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'sprintTXT.ZIP'), 'r')
@ -447,7 +483,7 @@ def osu_run1(data_set='osu_run1'):
    for name in zip.namelist():
        zip.extract(name, path)
    Y, connect = GPy.util.mocap.load_text_data('Aug210107', path)
-    Y = Y[0:-1:4, :]
+    Y = Y[0:-1:sample_every, :]
    return data_details_return({'Y': Y, 'connect' : connect}, data_set)
 def swiss_roll_generated(num_samples=1000, sigma=0.0):