diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 6d9cf07a..eb4076c3 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) from constructors import * @@ -6,4 +6,4 @@ try: from constructors import rbf_sympy, sympykern # these depend on sympy except: pass -from kern import kern +from kern import * diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 04c63773..0919d891 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -536,3 +536,86 @@ class kern(Parameterized): else: raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions" +from GPy.core.model import Model + +class Kern_check_model(Model): + """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + num_samples = 20 + num_samples2 = 10 + if kernel==None: + kernel = GPy.kern.rbf(1) + if X==None: + X = np.random.randn(num_samples, kernel.input_dim) + if X2==None: + X2 = np.random.randn(num_samples2, kernel.input_dim) + if dL_dK==None: + dL_dK = np.ones((X.shape[0], X2.shape[0])) + + self.kernel=kernel + self.X = X + self.X2 = X2 + self.dL_dK = dL_dK + #self.constrained_indices=[] + #self.constraints=[] + Model.__init__(self) + + def is_positive_definite(self): + v = np.linalg.eig(self.kernel.K(self.X))[0] + if any(v<0): + return False + else: + return True + + def _get_params(self): + return self.kernel._get_params() + + def _get_param_names(self): + return self.kernel._get_param_names() + + def _set_params(self, x): + self.kernel._set_params(x) + + def log_likelihood(self): + return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum() + + def _log_likelihood_gradients(self): + raise NotImplementedError, "This needs to be implemented to use the kern_check_model class." + +class Kern_check_dK_dtheta(Kern_check_model): + """This class allows gradient checks for the gradient of a kernel with respect to parameters. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + + def _log_likelihood_gradients(self): + return self.kernel.dK_dtheta(self.dL_dK, self.X, self.X2) + +class Kern_check_dKdiag_dtheta(Kern_check_model): + """This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters.""" + def __init__(self, kernel=None, dL_dK=None, X=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) + if dL_dK==None: + self.dL_dK = np.ones((self.X.shape[0])) + + def log_likelihood(self): + return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() + + def _log_likelihood_gradients(self): + return self.kernel.dKdiag_dtheta(self.dL_dK, self.X) + +class Kern_check_dK_dX(Kern_check_model): + """This class allows gradient checks for the gradient of a kernel with respect to X. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + + def _log_likelihood_gradients(self): + return self.kernel.dK_dX(self.dL_dK, self.X, self.X2).flatten() + + def _get_param_names(self): + return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])] + + def _get_params(self): + return self.X.flatten() + + def _set_params(self, x): + self.X=x.reshape(self.X.shape) diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 4e264ca3..1d7f0faf 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -4,93 +4,6 @@ import unittest import numpy as np import GPy -from GPy.core.model import Model - -class Kern_check_model(Model): - """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - num_samples = 20 - num_samples2 = 10 - if kernel==None: - kernel = GPy.kern.rbf(1) - if X==None: - X = np.random.randn(num_samples, kernel.input_dim) - if X2==None: - X2 = np.random.randn(num_samples2, kernel.input_dim) - if dL_dK==None: - dL_dK = np.ones((X.shape[0], X2.shape[0])) - - self.kernel=kernel - self.X = X - self.X2 = X2 - self.dL_dK = dL_dK - #self.constrained_indices=[] - #self.constraints=[] - Model.__init__(self) - - def is_positive_definite(self): - v = np.linalg.eig(self.kernel.K(self.X))[0] - if any(v<0): - return False - else: - return True - - def _get_params(self): - return self.kernel._get_params() - - def _get_param_names(self): - return self.kernel._get_param_names() - - def _set_params(self, x): - self.kernel._set_params(x) - - def log_likelihood(self): - return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum() - - def _log_likelihood_gradients(self): - raise NotImplementedError, "This needs to be implemented to use the kern_check_model class." - -class Kern_check_dK_dtheta(Kern_check_model): - """This class allows gradient checks for the gradient of a kernel with respect to parameters. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) - - def _log_likelihood_gradients(self): - return self.kernel.dK_dtheta(self.dL_dK, self.X, self.X2) - -class Kern_check_dKdiag_dtheta(Kern_check_model): - """This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters.""" - def __init__(self, kernel=None, dL_dK=None, X=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) - if dL_dK==None: - self.dL_dK = np.ones((self.X.shape[0])) - - def log_likelihood(self): - return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() - - def _log_likelihood_gradients(self): - return self.kernel.dKdiag_dtheta(self.dL_dK, self.X) - -class Kern_check_dK_dX(Kern_check_model): - """This class allows gradient checks for the gradient of a kernel with respect to X. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) - - def _log_likelihood_gradients(self): - return self.kernel.dK_dX(self.dL_dK, self.X, self.X2).flatten() - - def _get_param_names(self): - names = [] - for i in range(self.X.shape[0]): - for j in range(self.X.shape[0]): - names.append('X_' +str(i) + ','+str(j)) - return names - - def _get_params(self): - return self.X.flatten() - - def _set_params(self, x): - self.X=x.reshape(self.X.shape) diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 61ed727f..afc08b15 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -10,6 +10,8 @@ import zipfile import tarfile import gzip import zlib +import datetime + import sys, urllib def reporthook(a,b,c): @@ -65,6 +67,12 @@ The database was created with funding from NSF EIA-0196217.""", 'details': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.", 'license':None, 'size':3729650}, + 'epomeo_gpx' : {'urls' : [neil_url + 'epomeo_gpx/'], + 'files': [['endomondo_1.gpx', 'endomondo_2.gpx', 'garmin_watch_via_endomondo.gpx','viewranger_phone.gpx','viewranger_tablet.gpx']], + 'citation' : '', + 'details': "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).", + 'license':None, + 'size': 2031872}, 'three_phase_oil_flow': {'urls' : [neil_url + 'three_phase_oil_flow/'], 'files' : [['DataTrnLbls.txt', 'DataTrn.txt', 'DataTst.txt', 'DataTstLbls.txt', 'DataVdn.txt', 'DataVdnLbls.txt']], 'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593', @@ -271,6 +279,34 @@ def cmu_urls_files(subj_motions, messages = True): resource['files'].append(file_download) return resource +try: + import gpxpy + import gpxpy.gpx + gpxpy_available = True +except ImportError: + gpxpy_available = False + +if gpxpy_available: + def epomeo_gpx(data_set='epomeo_gpx'): + if not data_available(data_set): + download_data(data_set) + files = ['endomondo_1', 'endomondo_2', 'garmin_watch_via_endomondo','viewranger_phone', 'viewranger_tablet'] + + X = [] + for file in files: + gpx_file = open(os.path.join(data_path, 'epomeo_gpx', file + '.gpx'), 'r') + + gpx = gpxpy.parse(gpx_file) + segment = gpx.tracks[0].segments[0] + points = [point for track in gpx.tracks for segment in track.segments for point in segment.points] + data = [[(point.time-datetime.datetime(2013,8,21)).total_seconds(), point.latitude, point.longitude, point.elevation] for point in points] + X.append(np.asarray(data)) + gpx_file.close() + return data_details_return({'X' : X, 'info' : 'Data is an array containing time in seconds, latitude, longitude and elevation in that order.'}, data_set) + +del gpxpy_available + + # Some general utilities. def sample_class(f): @@ -439,7 +475,7 @@ def ripley_synth(data_set='ripley_prnn_data'): ytest = test[:, 2:3] return data_details_return({'X': X, 'y': y, 'Xtest': Xtest, 'ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set) -def osu_run1(data_set='osu_run1'): +def osu_run1(data_set='osu_run1', sample_every=4): if not data_available(data_set): download_data(data_set) zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'sprintTXT.ZIP'), 'r') @@ -447,7 +483,7 @@ def osu_run1(data_set='osu_run1'): for name in zip.namelist(): zip.extract(name, path) Y, connect = GPy.util.mocap.load_text_data('Aug210107', path) - Y = Y[0:-1:4, :] + Y = Y[0:-1:sample_every, :] return data_details_return({'Y': Y, 'connect' : connect}, data_set) def swiss_roll_generated(num_samples=1000, sigma=0.0):