From a074763eb69597ae22b0b5f7b284a96685d12ea2 Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Thu, 14 Nov 2013 12:28:26 -0800 Subject: [PATCH 1/6] fixed problem in warping --- GPy/util/warping_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py index e05f39af..35ad3b80 100644 --- a/GPy/util/warping_functions.py +++ b/GPy/util/warping_functions.py @@ -222,7 +222,7 @@ class TanhWarpingFunction_d(WarpingFunction): """ - mpsi = psi.coSpy() + mpsi = psi.copy() d = psi[-1] mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3) From b845c0d634a48f9e11e13cb6a3329629e84e28fd Mon Sep 17 00:00:00 2001 From: mu Date: Mon, 18 Nov 2013 10:43:58 +0000 Subject: [PATCH 2/6] constructor and init for ODE_UY --- GPy/kern/constructors.py | 17 +++++++++++++++++ GPy/kern/parts/__init__.py | 1 + 2 files changed, 18 insertions(+) diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index 392f43ba..1feec4df 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -588,3 +588,20 @@ def ODE_1(input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthsc """ part = parts.ODE_1.ODE_1(input_dim, varianceU, varianceY, lengthscaleU, lengthscaleY) return kern(input_dim, [part]) + +def ODE_UY(input_dim=2, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None): + """ + kernel resultiong from a first order ODE with OU driving GP + :param input_dim: the number of input dimension, has to be equal to one + :type input_dim: int + :param input_lengthU: the number of input U length + :param varianceU: variance of the driving GP + :type varianceU: float + :param varianceY: 'variance' of the transfer function + :type varianceY: float + :param lengthscaleY: 'lengthscale' of the transfer function + :type lengthscaleY: float + :rtype: kernel object + """ + part = parts.ODE_UY.ODE_UY(input_dim, varianceU, varianceY, lengthscaleU, lengthscaleY) + return kern(input_dim, [part]) \ No newline at end of file diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py index 0a758f1e..3b020828 100644 --- a/GPy/kern/parts/__init__.py +++ b/GPy/kern/parts/__init__.py @@ -14,6 +14,7 @@ import Matern32 import Matern52 import mlp import ODE_1 +import ODE_UY import periodic_exponential import periodic_Matern32 import periodic_Matern52 From 241ca0b628b5eb2cf8e00cde11fa842721fcbf6c Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Mon, 18 Nov 2013 16:39:43 +0000 Subject: [PATCH 3/6] Working eq_ode1 in sympy now. --- GPy/kern/parts/__init__.py | 1 + GPy/kern/parts/sympy_helpers.cpp | 119 ++++++++++++++++++------------- GPy/kern/parts/sympy_helpers.py | 71 ++++++++++++++++++ GPy/util/symbolic.py | 2 +- 4 files changed, 141 insertions(+), 52 deletions(-) create mode 100644 GPy/kern/parts/sympy_helpers.py diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py index 0a758f1e..54c5bba5 100644 --- a/GPy/kern/parts/__init__.py +++ b/GPy/kern/parts/__init__.py @@ -26,4 +26,5 @@ import rbf import rbf_inv import spline import symmetric +import sympy_helpers import white diff --git a/GPy/kern/parts/sympy_helpers.cpp b/GPy/kern/parts/sympy_helpers.cpp index d21d2683..9f30eea9 100644 --- a/GPy/kern/parts/sympy_helpers.cpp +++ b/GPy/kern/parts/sympy_helpers.cpp @@ -1,3 +1,4 @@ +#include "Python.h" #include #include #include @@ -29,24 +30,33 @@ double sinc_grad(double x){ else return (x*cos(x) - sin(x))/(x*x); } - double erfcx(double x){ + // Based on code by Soren Hauberg 2010 for Octave. // compute the scaled complex error function. + //return erfc(x)*exp(x*x); double xneg=-sqrt(log(DBL_MAX/2)); double xmax = 1/(sqrt(M_PI)*DBL_MIN); xmax = DBL_MAXxmax) return 0.0; else @@ -55,16 +65,19 @@ double erfcx(double x){ double ln_diff_erf(double x0, double x1){ // stably compute the log of difference between two erfs. - if (x1>x0) - throw std::runtime_error("Error: second argument must be smaller than first in ln_diff_err"); - return log(erf(x0) - erf(x1)); - if (x0==x1) + if (x1>x0){ + PyErr_SetString(PyExc_RuntimeError,"second argument must be smaller than or equal to first in ln_diff_erf"); + throw 1; + } + if (x0==x1){ + PyErr_WarnEx(PyExc_RuntimeWarning,"divide by zero encountered in log", 1); return -INFINITY; - else if(x0<0 && x1>0 || x0>0 && x1<0) + } + else if(x0<0 && x1>0 || x0>0 && x1<0) //x0 and x1 have opposite signs return log(erf(x0)-erf(x1)); - else if(x1>0) - return log(erfcx(x1)-erfcx(x0)*exp(x1*x1- x0*x0))-x1*x1; - else + else if(x0>0) //x0 positive, x1 non-negative + return log(erfcx(x1)-erfcx(x0)*exp(x1*x1- x0*x0))-x1*x1; + else //x0 and x1 non-positive return log(erfcx(-x0)-erfcx(-x1)*exp(x0*x0 - x1*x1))-x0*x0; } @@ -80,26 +93,19 @@ double h(double t, double tprime, double d_i, double d_j, double l){ sign_val = 0.0; else if (t/l < 0) sign_val = -1.0; - double ln_part_2 = ln_diff_erf(half_l_di, arg_2); - - return sign_val*exp(half_l_di*half_l_di - d_i*(t-tprime) + ln_part_1 - log(d_i + d_j)) - sign_val*exp(half_l_di*half_l_di - d_i*t - d_j*tprime + ln_part_2 - log(d_i + d_j)); -} - -double dh_dl(double t, double tprime, double d_i, double d_j, double l){ - // compute gradient of h function with respect to lengthscale for sim covariance - // TODO a lot of energy wasted recomputing things here, need to do this in a shared way somehow ... perhaps needs rewrite of sympykern. - double half_l_di = 0.5*l*d_i; - double arg_1 = half_l_di + tprime/l; - double arg_2 = half_l_di - (t-tprime)/l; - double ln_part_1 = ln_diff_erf(arg_1, arg_2); arg_2 = half_l_di - t/l; double ln_part_2 = ln_diff_erf(half_l_di, arg_2); - double diff_t = t - tprime; - double l2 = l*l; - double hv = h(t, tprime, d_i, d_j, l); - return 0.5*d_i*d_i*l*hv + 2/(sqrt(M_PI)*(d_i+d_j))*((-diff_t/l2-d_i/2)*exp(-diff_t*diff_t/l2)+(-tprime/l2+d_i/2)*exp(-tprime*tprime/l2-d_i*t)-(-t/l2-d_i/2)*exp(-t*t/l2-d_j*tprime)-d_i/2*exp(-(d_i*t+d_j*tprime))); + // if either ln_part_1 or ln_part_2 are -inf, don't bother computing rest of that term. + double part_1 = 0.0; + if(isfinite(ln_part_1)) + part_1 = sign_val*exp(half_l_di*half_l_di - d_i*(t-tprime) + ln_part_1 - log(d_i + d_j)); + double part_2 = 0.0; + if(isfinite(ln_part_2)) + part_2 = sign_val*exp(half_l_di*half_l_di - d_i*t - d_j*tprime + ln_part_2 - log(d_i + d_j)); + return part_1 - part_2; } + double dh_dd_i(double t, double tprime, double d_i, double d_j, double l){ double diff_t = (t-tprime); double l2 = l*l; @@ -116,41 +122,52 @@ double dh_dd_i(double t, double tprime, double d_i, double d_j, double l){ else if (t/l < 0) sign_val = -1.0; double ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l); - - double base = ((0.5*d_i*l2*(d_i+d_j)-1)*hv - + (-diff_t*sign_val*exp(half_l_di*half_l_di - -d_i*diff_t - +ln_part_1) - +t*sign_val*exp(half_l_di*half_l_di - -d_i*t-d_j*tprime - +ln_part_2)) - + l/sqrt(M_PI)*(-exp(-diff_t*diff_t/l2) - +exp(-tprime*tprime/l2-d_i*t) - +exp(-t*t/l2-d_j*tprime) - -exp(-(d_i*t + d_j*tprime)))); + double base = (0.5*d_i*l2*(d_i+d_j)-1)*hv; + if(isfinite(ln_part_1)) + base -= diff_t*sign_val*exp(half_l_di*half_l_di + -d_i*diff_t + +ln_part_1); + if(isfinite(ln_part_2)) + base += t*sign_val*exp(half_l_di*half_l_di + -d_i*t-d_j*tprime + +ln_part_2); + base += l/sqrt(M_PI)*(-exp(-diff_t*diff_t/l2) + +exp(-tprime*tprime/l2-d_i*t) + +exp(-t*t/l2-d_j*tprime) + -exp(-(d_i*t + d_j*tprime))); return base/(d_i+d_j); + } double dh_dd_j(double t, double tprime, double d_i, double d_j, double l){ - double diff_t = (t-tprime); - double l2 = l*l; double half_l_di = 0.5*l*d_i; double hv = h(t, tprime, d_i, d_j, l); - double arg_1 = half_l_di + tprime/l; - double arg_2 = half_l_di - (t-tprime)/l; - double ln_part_1 = ln_diff_erf(arg_1, arg_2); - arg_1 = half_l_di; - arg_2 = half_l_di - t/l; double sign_val = 1.0; if(t/l==0) sign_val = 0.0; else if (t/l < 0) sign_val = -1.0; double ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l); - double base = tprime*sign_val*exp(half_l_di*half_l_di-(d_i*t+d_j*tprime)+ln_part_2)-hv; + double base = -hv; + if(isfinite(ln_part_2)) + base += tprime*sign_val*exp(half_l_di*half_l_di-(d_i*t+d_j*tprime)+ln_part_2); return base/(d_i+d_j); } +double dh_dl(double t, double tprime, double d_i, double d_j, double l){ + // compute gradient of h function with respect to lengthscale for sim covariance + // TODO a lot of energy wasted recomputing things here, need to do this in a shared way somehow ... perhaps needs rewrite of sympykern. + double half_l_di = 0.5*l*d_i; + double arg_1 = half_l_di + tprime/l; + double arg_2 = half_l_di - (t-tprime)/l; + double ln_part_1 = ln_diff_erf(arg_1, arg_2); + arg_2 = half_l_di - t/l; + double ln_part_2 = ln_diff_erf(half_l_di, arg_2); + double diff_t = t - tprime; + double l2 = l*l; + double hv = h(t, tprime, d_i, d_j, l); + return 0.5*d_i*d_i*l*hv + 2/(sqrt(M_PI)*(d_i+d_j))*((-diff_t/l2-d_i/2)*exp(-diff_t*diff_t/l2)+(-tprime/l2+d_i/2)*exp(-tprime*tprime/l2-d_i*t)-(-t/l2-d_i/2)*exp(-t*t/l2-d_j*tprime)-d_i/2*exp(-(d_i*t+d_j*tprime))); +} double dh_dt(double t, double tprime, double d_i, double d_j, double l){ return 0.0; diff --git a/GPy/kern/parts/sympy_helpers.py b/GPy/kern/parts/sympy_helpers.py new file mode 100644 index 00000000..125dac58 --- /dev/null +++ b/GPy/kern/parts/sympy_helpers.py @@ -0,0 +1,71 @@ +# Code for testing functions written in sympy_helpers.cpp +from scipy import weave +import tempfile +import os +import numpy as np +current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) +extra_compile_args = [] + +weave_kwargs = { + 'support_code': "", + 'include_dirs':[tempfile.gettempdir(), current_dir], + 'headers':['"parts/sympy_helpers.h"'], + 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")], + 'extra_compile_args':extra_compile_args, + 'extra_link_args':['-lgomp'], + 'verbose':True} + +def erfcx(x): + code = """ + // Code for computing scaled complementary erf + int i; + int dim; + int elements = Ntarget[0]; + for (dim=1; dim Date: Tue, 19 Nov 2013 06:50:25 +0000 Subject: [PATCH 4/6] Bug fix for single output sympy kernel. --- GPy/kern/parts/__init__.py | 2 +- GPy/kern/parts/sympykern.py | 15 +++++++++++---- GPy/util/datasets.py | 15 +++++++++++++-- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py index d8e7f8e6..f278941a 100644 --- a/GPy/kern/parts/__init__.py +++ b/GPy/kern/parts/__init__.py @@ -14,7 +14,7 @@ import Matern32 import Matern52 import mlp import ODE_1 -import ODE_UY +#import ODE_UY import periodic_exponential import periodic_Matern32 import periodic_Matern52 diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py index 88c179aa..7f7fba11 100644 --- a/GPy/kern/parts/sympykern.py +++ b/GPy/kern/parts/sympykern.py @@ -177,8 +177,15 @@ class spkern(Kernpart): # Code to compute argument string when only diagonal is required. diag_arg_string = re.sub('int jj','//int jj',X_arg_string) diag_arg_string = re.sub('j','i',diag_arg_string) - diag_precompute_string = precompute_list[0] - + if precompute_string == '': + # if it's not multioutput, the precompute strings are set to zero + diag_precompute_string = '' + diag_precompute_replace = '' + else: + # for multioutput we need to extract the index of the output form the input. + diag_precompute_string = precompute_list[0] + diag_precompute_replace = precompute_list[1] + # Here's the code to do the looping for K self._K_code =\ @@ -215,13 +222,13 @@ class spkern(Kernpart): TARGET2(i, i) += k(%s); for (j=0;j Date: Tue, 19 Nov 2013 09:33:06 +0000 Subject: [PATCH 5/6] Moved data resource information to a json file. --- GPy/util/data_resources.json | 319 +++++++++++++++++++++ GPy/util/datasets.py | 131 +-------- GPy/util/datasets/data_resources_create.py | 127 ++++++++ 3 files changed, 453 insertions(+), 124 deletions(-) create mode 100644 GPy/util/data_resources.json create mode 100644 GPy/util/datasets/data_resources_create.py diff --git a/GPy/util/data_resources.json b/GPy/util/data_resources.json new file mode 100644 index 00000000..2b36b0c1 --- /dev/null +++ b/GPy/util/data_resources.json @@ -0,0 +1,319 @@ +{ + "rogers_girolami_data":{ + "files":[ + [ + "firstcoursemldata.tar.gz" + ] + ], + "license":null, + "citation":"A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146", + "details":"Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.", + "urls":[ + "https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/" + ], + "suffices":[ + [ + "?dl=1" + ] + ], + "size":21949154 + }, + "ankur_pose_data":{ + "files":[ + [ + "ankurDataPoseSilhouette.mat" + ] + ], + "citation":"3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.", + "license":null, + "urls":[ + "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/ankur_pose_data/" + ], + "details":"Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing." + }, + "osu_accad":{ + "files":[ + [ + "swagger1TXT.ZIP", + "handspring1TXT.ZIP", + "quickwalkTXT.ZIP", + "run1TXT.ZIP", + "sprintTXT.ZIP", + "dogwalkTXT.ZIP", + "camper_04TXT.ZIP", + "dance_KB3_TXT.ZIP", + "per20_TXT.ZIP", + "perTWO07_TXT.ZIP", + "perTWO13_TXT.ZIP", + "perTWO14_TXT.ZIP", + "perTWO15_TXT.ZIP", + "perTWO16_TXT.ZIP" + ], + [ + "connections.txt" + ] + ], + "license":"Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).", + "citation":"The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.", + "details":"Motion capture data of different motions from the Open Motion Data Project at Ohio State University.", + "urls":[ + "http://accad.osu.edu/research/mocap/data/", + "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/" + ], + "size":15922790 + }, + "isomap_face_data":{ + "files":[ + [ + "face_data.mat" + ] + ], + "license":null, + "citation":"A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000", + "details":"Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.", + "urls":[ + "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/isomap_face_data/" + ], + "size":24229368 + }, + "boston_housing":{ + "files":[ + [ + "Index", + "housing.data", + "housing.names" + ] + ], + "license":null, + "citation":"Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.", + "details":"The Boston Housing data relates house values in Boston to a range of input variables.", + "urls":[ + "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/" + ], + "size":51276 + }, + "cmu_mocap_full":{ + "files":[ + [ + "allasfamc.zip" + ] + ], + "license":"From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.", + "citation":"Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.\nThe database was created with funding from NSF EIA-0196217.", + "details":"CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.", + "urls":[ + "http://mocap.cs.cmu.edu" + ], + "size":null + }, + "brendan_faces":{ + "files":[ + [ + "frey_rawface.mat" + ] + ], + "license":null, + "citation":"Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.", + "details":"A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.", + "urls":[ + "http://www.cs.nyu.edu/~roweis/data/" + ], + "size":1100584 + }, + "olympic_marathon_men":{ + "files":[ + [ + "olympicMarathonTimes.csv" + ] + ], + "license":null, + "citation":null, + "details":"Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data", + "urls":[ + "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olympic_marathon_men/" + ], + "size":584 + }, + "pumadyn-32nm":{ + "files":[ + [ + "pumadyn-32nm.tar.gz" + ] + ], + "license":"Data is made available by the Delve system at the University of Toronto", + "citation":"Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.", + "details":"Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.", + "urls":[ + "ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/" + ], + "size":5861646 + }, + "ripley_prnn_data":{ + "files":[ + [ + "Cushings.dat", + "README", + "crabs.dat", + "fglass.dat", + "fglass.grp", + "pima.te", + "pima.tr", + "pima.tr2", + "synth.te", + "synth.tr", + "viruses.dat", + "virus3.dat" + ] + ], + "license":null, + "citation":"Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7", + "details":"Data sets from Brian Ripley's Pattern Recognition and Neural Networks", + "urls":[ + "http://www.stats.ox.ac.uk/pub/PRNN/" + ], + "size":93565 + }, + "three_phase_oil_flow":{ + "files":[ + [ + "DataTrnLbls.txt", + "DataTrn.txt", + "DataTst.txt", + "DataTstLbls.txt", + "DataVdn.txt", + "DataVdnLbls.txt" + ] + ], + "license":null, + "citation":"Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593", + "details":"The three phase oil data used initially for demonstrating the Generative Topographic mapping.", + "urls":[ + "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/three_phase_oil_flow/" + ], + "size":712796 + }, + "robot_wireless":{ + "files":[ + [ + "uw-floor.txt" + ] + ], + "license":null, + "citation":"WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.", + "details":"Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.", + "urls":[ + "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/robot_wireless/" + ], + "size":284390 + }, + "xw_pen":{ + "files":[ + [ + "xw_pen_15.csv" + ] + ], + "license":null, + "citation":"Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005", + "details":"Accelerometer pen data used for robust regression by Tipping and Lawrence.", + "urls":[ + "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/xw_pen/" + ], + "size":3410 + }, + "swiss_roll":{ + "files":[ + [ + "swiss_roll_data.mat" + ] + ], + "license":null, + "citation":"A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000", + "details":"Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.", + "urls":[ + "http://isomap.stanford.edu/" + ], + "size":800256 + }, + "osu_run1":{ + "files":[ + [ + "run1TXT.ZIP" + ], + [ + "connections.txt" + ] + ], + "license":"Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).", + "citation":"The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.", + "details":"Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.", + "urls":[ + "http://accad.osu.edu/research/mocap/data/", + "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/" + ], + "size":338103 + }, + "creep_rupture":{ + "files":[ + [ + "creeprupt.tar" + ] + ], + "license":null, + "citation":"Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.", + "details":"Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.", + "urls":[ + "http://www.msm.cam.ac.uk/map/data/tar/" + ], + "size":602797 + }, + "olivetti_faces":{ + "files":[ + [ + "att_faces.zip" + ], + [ + "olivettifaces.mat" + ] + ], + "license":null, + "citation":"Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994", + "details":"Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. ", + "urls":[ + "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/", + "http://www.cs.nyu.edu/~roweis/data/" + ], + "size":8561331 + }, + "della_gatta":{ + "files":[ + [ + "DellaGattadata.mat" + ] + ], + "license":null, + "citation":"Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008", + "details":"The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.", + "urls":[ + "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/della_gatta/" + ], + "size":3729650 + }, + "epomeo_gpx":{ + "files":[ + [ + "endomondo_1.gpx", + "endomondo_2.gpx", + "garmin_watch_via_endomondo.gpx", + "viewranger_phone.gpx", + "viewranger_tablet.gpx" + ] + ], + "license":null, + "citation":"", + "details":"Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).", + "urls":[ + "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/epomeo_gpx/" + ], + "size":2031872 + } +} \ No newline at end of file diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 69f010f9..f33a2e92 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -7,7 +7,7 @@ import urllib as url import zipfile import tarfile import datetime - +import json ipython_available=True try: import IPython @@ -29,129 +29,10 @@ data_path = os.path.join(os.path.dirname(__file__), 'datasets') default_seed = 10000 overide_manual_authorize=False neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/' -sam_url = 'http://www.cs.nyu.edu/~roweis/data/' -cmu_url = 'http://mocap.cs.cmu.edu/subjects/' -# Note: there may be a better way of storing data resources, for the -# moment we are storing them in a dictionary. -data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'], - 'files' : [['ankurDataPoseSilhouette.mat']], - 'license' : None, - 'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""", - 'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""}, - - 'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'], - 'files' : [['Index', 'housing.data', 'housing.names']], - 'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""", - 'details' : """The Boston Housing data relates house values in Boston to a range of input variables.""", - 'license' : None, - 'size' : 51276 - }, - 'brendan_faces' : {'urls' : [sam_url], - 'files': [['frey_rawface.mat']], - 'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.', - 'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""", - 'license': None, - 'size' : 1100584}, - 'cmu_mocap_full' : {'urls' : ['http://mocap.cs.cmu.edu'], - 'files' : [['allasfamc.zip']], - 'citation' : """Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu. -The database was created with funding from NSF EIA-0196217.""", - 'details' : """CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.""", - 'license' : """From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.""", - 'size' : None}, - 'creep_rupture' : {'urls' : ['http://www.msm.cam.ac.uk/map/data/tar/'], - 'files' : [['creeprupt.tar']], - 'citation' : 'Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.', - 'details' : """Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.""", - 'license' : None, - 'size' : 602797}, - 'della_gatta' : {'urls' : [neil_url + 'della_gatta/'], - 'files': [['DellaGattadata.mat']], - 'citation' : 'Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008', - 'details': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.", - 'license':None, - 'size':3729650}, - 'epomeo_gpx' : {'urls' : [neil_url + 'epomeo_gpx/'], - 'files': [['endomondo_1.gpx', 'endomondo_2.gpx', 'garmin_watch_via_endomondo.gpx','viewranger_phone.gpx','viewranger_tablet.gpx']], - 'citation' : '', - 'details': "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).", - 'license':None, - 'size': 2031872}, - 'three_phase_oil_flow': {'urls' : [neil_url + 'three_phase_oil_flow/'], - 'files' : [['DataTrnLbls.txt', 'DataTrn.txt', 'DataTst.txt', 'DataTstLbls.txt', 'DataVdn.txt', 'DataVdnLbls.txt']], - 'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593', - 'details' : """The three phase oil data used initially for demonstrating the Generative Topographic mapping.""", - 'license' : None, - 'size' : 712796}, - 'rogers_girolami_data' : {'urls' : ['https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/'], - 'files' : [['firstcoursemldata.tar.gz']], - 'suffices' : [['?dl=1']], - 'citation' : 'A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146', - 'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""", - 'license' : None, - 'size' : 21949154}, - 'olivetti_faces' : {'urls' : [neil_url + 'olivetti_faces/', sam_url], - 'files' : [['att_faces.zip'], ['olivettifaces.mat']], - 'citation' : 'Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994', - 'details' : """Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. """, - 'license': None, - 'size' : 8561331}, - 'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'], - 'files' : [['olympicMarathonTimes.csv']], - 'citation' : None, - 'details' : """Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data""", - 'license': None, - 'size' : 584}, - 'osu_run1' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'], - 'files': [['run1TXT.ZIP'],['connections.txt']], - 'details' : "Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.", - 'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.', - 'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).', - 'size': 338103}, - 'osu_accad' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'], - 'files': [['swagger1TXT.ZIP','handspring1TXT.ZIP','quickwalkTXT.ZIP','run1TXT.ZIP','sprintTXT.ZIP','dogwalkTXT.ZIP','camper_04TXT.ZIP','dance_KB3_TXT.ZIP','per20_TXT.ZIP','perTWO07_TXT.ZIP','perTWO13_TXT.ZIP','perTWO14_TXT.ZIP','perTWO15_TXT.ZIP','perTWO16_TXT.ZIP'],['connections.txt']], - 'details' : "Motion capture data of different motions from the Open Motion Data Project at Ohio State University.", - 'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.', - 'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).', - 'size': 15922790}, - 'pumadyn-32nm' : {'urls' : ['ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/'], - 'files' : [['pumadyn-32nm.tar.gz']], - 'details' : """Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.""", - 'citation' : """Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.""", - 'license' : """Data is made available by the Delve system at the University of Toronto""", - 'size' : 5861646}, - 'robot_wireless' : {'urls' : [neil_url + 'robot_wireless/'], - 'files' : [['uw-floor.txt']], - 'citation' : """WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.""", - 'details' : """Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.""", - 'license' : None, - 'size' : 284390}, - 'swiss_roll' : {'urls' : ['http://isomap.stanford.edu/'], - 'files' : [['swiss_roll_data.mat']], - 'details' : """Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""", - 'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000', - 'license' : None, - 'size' : 800256}, - 'ripley_prnn_data' : {'urls' : ['http://www.stats.ox.ac.uk/pub/PRNN/'], - 'files' : [['Cushings.dat', 'README', 'crabs.dat', 'fglass.dat', 'fglass.grp', 'pima.te', 'pima.tr', 'pima.tr2', 'synth.te', 'synth.tr', 'viruses.dat', 'virus3.dat']], - 'details' : """Data sets from Brian Ripley's Pattern Recognition and Neural Networks""", - 'citation': """Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7""", - 'license' : None, - 'size' : 93565}, - 'isomap_face_data' : {'urls' : [neil_url + 'isomap_face_data/'], - 'files' : [['face_data.mat']], - 'details' : """Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""", - 'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000', - 'license' : None, - 'size' : 24229368}, - 'xw_pen' : {'urls' : [neil_url + 'xw_pen/'], - 'files' : [['xw_pen_15.csv']], - 'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""", - 'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005', - 'license' : None, - 'size' : 3410} - } +# Read data resources from json file. +json_data=open('data_resources.json').read() +data_resources = json.loads(json_data) def prompt_user(prompt): @@ -623,7 +504,7 @@ def xw_pen(data_set='xw_pen'): return data_details_return({'Y': Y, 'X': X, 'info': "Tilt data from a personalized digital assistant pen. Plot in original paper showed regression between time steps 175 and 275."}, data_set) -def download_rogers_girolami_data(): +def download_rogers_girolami_data(data_set='rogers_girolami_data'): if not data_available('rogers_girolami_data'): download_data(data_set) path = os.path.join(data_path, data_set) @@ -909,3 +790,5 @@ def cmu_mocap(subject, train_motions, test_motions=[], sample_every=4, data_set= if sample_every != 1: info += ' Data is sub-sampled to every ' + str(sample_every) + ' frames.' return data_details_return({'Y': Y, 'lbls' : lbls, 'Ytest': Ytest, 'lblstest' : lblstest, 'info': info, 'skel': skel}, data_set) + + diff --git a/GPy/util/datasets/data_resources_create.py b/GPy/util/datasets/data_resources_create.py new file mode 100644 index 00000000..8ae62a85 --- /dev/null +++ b/GPy/util/datasets/data_resources_create.py @@ -0,0 +1,127 @@ +import json + +neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/' +sam_url = 'http://www.cs.nyu.edu/~roweis/data/' +cmu_url = 'http://mocap.cs.cmu.edu/subjects/' + +data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'], + 'files' : [['ankurDataPoseSilhouette.mat']], + 'license' : None, + 'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""", + 'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""}, + + 'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'], + 'files' : [['Index', 'housing.data', 'housing.names']], + 'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""", + 'details' : """The Boston Housing data relates house values in Boston to a range of input variables.""", + 'license' : None, + 'size' : 51276 + }, + 'brendan_faces' : {'urls' : [sam_url], + 'files': [['frey_rawface.mat']], + 'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.', + 'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""", + 'license': None, + 'size' : 1100584}, + 'cmu_mocap_full' : {'urls' : ['http://mocap.cs.cmu.edu'], + 'files' : [['allasfamc.zip']], + 'citation' : """Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu. +The database was created with funding from NSF EIA-0196217.""", + 'details' : """CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.""", + 'license' : """From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.""", + 'size' : None}, + 'creep_rupture' : {'urls' : ['http://www.msm.cam.ac.uk/map/data/tar/'], + 'files' : [['creeprupt.tar']], + 'citation' : 'Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.', + 'details' : """Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.""", + 'license' : None, + 'size' : 602797}, + 'della_gatta' : {'urls' : [neil_url + 'della_gatta/'], + 'files': [['DellaGattadata.mat']], + 'citation' : 'Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008', + 'details': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.", + 'license':None, + 'size':3729650}, + 'epomeo_gpx' : {'urls' : [neil_url + 'epomeo_gpx/'], + 'files': [['endomondo_1.gpx', 'endomondo_2.gpx', 'garmin_watch_via_endomondo.gpx','viewranger_phone.gpx','viewranger_tablet.gpx']], + 'citation' : '', + 'details': "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).", + 'license':None, + 'size': 2031872}, + 'three_phase_oil_flow': {'urls' : [neil_url + 'three_phase_oil_flow/'], + 'files' : [['DataTrnLbls.txt', 'DataTrn.txt', 'DataTst.txt', 'DataTstLbls.txt', 'DataVdn.txt', 'DataVdnLbls.txt']], + 'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593', + 'details' : """The three phase oil data used initially for demonstrating the Generative Topographic mapping.""", + 'license' : None, + 'size' : 712796}, + 'rogers_girolami_data' : {'urls' : ['https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/'], + 'files' : [['firstcoursemldata.tar.gz']], + 'suffices' : [['?dl=1']], + 'citation' : 'A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146', + 'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""", + 'license' : None, + 'size' : 21949154}, + 'olivetti_faces' : {'urls' : [neil_url + 'olivetti_faces/', sam_url], + 'files' : [['att_faces.zip'], ['olivettifaces.mat']], + 'citation' : 'Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994', + 'details' : """Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. """, + 'license': None, + 'size' : 8561331}, + 'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'], + 'files' : [['olympicMarathonTimes.csv']], + 'citation' : None, + 'details' : """Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data""", + 'license': None, + 'size' : 584}, + 'osu_run1' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'], + 'files': [['run1TXT.ZIP'],['connections.txt']], + 'details' : "Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.", + 'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.', + 'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).', + 'size': 338103}, + 'osu_accad' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'], + 'files': [['swagger1TXT.ZIP','handspring1TXT.ZIP','quickwalkTXT.ZIP','run1TXT.ZIP','sprintTXT.ZIP','dogwalkTXT.ZIP','camper_04TXT.ZIP','dance_KB3_TXT.ZIP','per20_TXT.ZIP','perTWO07_TXT.ZIP','perTWO13_TXT.ZIP','perTWO14_TXT.ZIP','perTWO15_TXT.ZIP','perTWO16_TXT.ZIP'],['connections.txt']], + 'details' : "Motion capture data of different motions from the Open Motion Data Project at Ohio State University.", + 'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.', + 'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).', + 'size': 15922790}, + 'pumadyn-32nm' : {'urls' : ['ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/'], + 'files' : [['pumadyn-32nm.tar.gz']], + 'details' : """Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.""", + 'citation' : """Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.""", + 'license' : """Data is made available by the Delve system at the University of Toronto""", + 'size' : 5861646}, + 'robot_wireless' : {'urls' : [neil_url + 'robot_wireless/'], + 'files' : [['uw-floor.txt']], + 'citation' : """WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.""", + 'details' : """Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.""", + 'license' : None, + 'size' : 284390}, + 'swiss_roll' : {'urls' : ['http://isomap.stanford.edu/'], + 'files' : [['swiss_roll_data.mat']], + 'details' : """Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""", + 'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000', + 'license' : None, + 'size' : 800256}, + 'ripley_prnn_data' : {'urls' : ['http://www.stats.ox.ac.uk/pub/PRNN/'], + 'files' : [['Cushings.dat', 'README', 'crabs.dat', 'fglass.dat', 'fglass.grp', 'pima.te', 'pima.tr', 'pima.tr2', 'synth.te', 'synth.tr', 'viruses.dat', 'virus3.dat']], + 'details' : """Data sets from Brian Ripley's Pattern Recognition and Neural Networks""", + 'citation': """Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7""", + 'license' : None, + 'size' : 93565}, + 'isomap_face_data' : {'urls' : [neil_url + 'isomap_face_data/'], + 'files' : [['face_data.mat']], + 'details' : """Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""", + 'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000', + 'license' : None, + 'size' : 24229368}, + 'xw_pen' : {'urls' : [neil_url + 'xw_pen/'], + 'files' : [['xw_pen_15.csv']], + 'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""", + 'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005', + 'license' : None, + 'size' : 3410} + } + +with open('data_resources.json', 'w') as file: + json.dump(data_resources, file) From fca3287e9c5c042c044361bd35ceb87287aa843a Mon Sep 17 00:00:00 2001 From: James Hensman Date: Tue, 19 Nov 2013 16:54:07 +0000 Subject: [PATCH 6/6] added a path for the data resources. not all users will be working in the GPy directory. --- GPy/util/datasets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index f33a2e92..732e2a1b 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -31,7 +31,8 @@ overide_manual_authorize=False neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/' # Read data resources from json file. -json_data=open('data_resources.json').read() +path = os.path.join(os.path.dirname(__file__), 'data_resources.json') +json_data=open(path).read() data_resources = json.loads(json_data)