Added robot_wireless data set and examples.

This commit is contained in:
Neil Lawrence 2013-08-18 23:46:02 +02:00
parent c45a80499c
commit 4082f6c02e
4 changed files with 139 additions and 9 deletions

View file

@ -378,6 +378,17 @@ def stick():
return m
def robot_wireless():
data = GPy.util.datasets.robot_wireless()
# optimize
m = GPy.models.GPLVM(data['Y'], 2)
m.optimize(messages=1, max_f_eval=10000)
m._set_params(m._get_params())
plt.clf
ax = m.plot_latent()
return m
def stick_bgplvm(model=None):
data = GPy.util.datasets.osu_run1()
Q = 6

View file

@ -325,6 +325,27 @@ def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf):
return np.array(lls)
def robot_wireless(optim_iters=100):
"""Predict the location of a robot given wirelss signal strengthr readings."""
data = GPy.util.datasets.robot_wireless()
# create simple GP Model
m = GPy.models.GPRegression(data['Y'], data['X'])
# optimize
m.optimize(messages=True, max_f_eval=optim_iters)
Ypredict = m.predict(data['Y'])[0]
pb.plot(data['Xtest'][:, 0], data['Xtest'][:, 1], 'r-')
pb.plot(Ypredict[:, 0], Ypredict[:, 1], 'b-')
pb.axis('equal')
pb.title('WiFi Localization with Gaussian Processes')
pb.legend(('True Location', 'Predicted Location'))
sse = ((data['Ytest'] - Y.predict)**2).sum()
print(m)
print('Sum of squares error on test data: ', str(sse))
return m
def sparse_GP_regression_1D(N=400, num_inducing=5, optim_iters=100):
"""Run a 1D example of a sparse GP regression."""
# sample inputs and outputs

View file

@ -58,3 +58,19 @@ class Kernpart(object):
raise NotImplementedError
def dK_dX(self, dL_dK, X, X2, target):
raise NotImplementedError
class Kernpart_inner(Kernpart):
def __init__(self,input_dim):
"""
The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs.
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
Kernpart.__init__(self, input_dim)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))

View file

@ -23,7 +23,7 @@ def reporthook(a,b,c):
# Global variables
data_path = os.path.join(os.path.dirname(__file__), 'datasets')
default_seed = 10000
overide_manual_authorize=False
neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
cmu_url = 'http://mocap.cs.cmu.edu/subjects/'
# Note: there may be a better way of storing data resources. One of the pythonistas will need to take a look.
@ -33,7 +33,13 @@ data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""",
'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""},
'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'],
'files' : [['Index', 'housing.data', 'housing.names']],
'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""",
'details' : """The Boston Housing data relates house values in Boston to a range of input variables.""",
'license' : None,
'size' : 51276
},
'brendan_faces' : {'urls' : ['http://www.cs.nyu.edu/~roweis/data/'],
'files': [['frey_rawface.mat']],
'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.',
@ -90,6 +96,12 @@ The database was created with funding from NSF EIA-0196217.""",
'citation' : """Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.""",
'license' : """Data is made available by the Delve system at the University of Toronto""",
'size' : 5861646},
'robot_wireless' : {'urls' : [neil_url + 'robot_wireless/'],
'files' : [['uw-floor.txt']],
'citation' : """WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.""",
'details' : """Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.""",
'license' : None,
'size' : 284390},
'swiss_roll' : {'urls' : ['http://isomap.stanford.edu/'],
'files' : [['swiss_roll_data.mat']],
'details' : """Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""",
@ -163,12 +175,19 @@ def authorize_download(dataset_name=None):
print('')
print('Data will be stored in ' + os.path.join(data_path, dataset_name) + '.')
print('')
if dr['license']:
print('You must also agree to the following license:')
print(dr['license'])
print('')
print('Do you wish to proceed with the download? [yes/no]')
return prompt_user()
if overide_manual_authorize:
if dr['license']:
print('You have agreed to the following license:')
print(dr['license'])
print('')
return True
else:
if dr['license']:
print('You must also agree to the following license:')
print(dr['license'])
print('')
print('Do you wish to proceed with the download? [yes/no]')
return prompt_user()
def download_data(dataset_name=None):
"""Check with the user that the are happy with terms and conditions for the data set, then download it."""
@ -254,6 +273,14 @@ def sample_class(f):
c = np.where(c, 1, -1)
return c
def boston_housing(data_set='boston_housing'):
if not data_available(data_set):
download_data(data_set)
all_data = np.genfromtxt(os.path.join(data_path, data_set, 'housing.data'))
X = all_data[:, 0:13]
Y = all_data[:, 13:14]
return data_details_return({'X' : X, 'Y': Y}, data_set)
def brendan_faces(data_set='brendan_faces'):
if not data_available(data_set):
download_data(data_set)
@ -339,6 +366,47 @@ def pumadyn(seed=default_seed, data_set='pumadyn-32nm'):
Ytest = data[indicesTest, -1][:, None]
return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'seed': seed}, data_set)
def robot_wireless(data_set='robot_wireless'):
# WiFi access point strengths on a tour around UW Paul Allen building.
if not data_available(data_set):
download_data(data_set)
file_name = os.path.join(data_path, data_set, 'uw-floor.txt')
all_time = np.genfromtxt(file_name, usecols=(0))
macaddress = np.genfromtxt(file_name, usecols=(1), dtype='string')
x = np.genfromtxt(file_name, usecols=(2))
y = np.genfromtxt(file_name, usecols=(3))
strength = np.genfromtxt(file_name, usecols=(4))
addresses = np.unique(macaddress)
times = np.unique(all_time)
addresses.sort()
times.sort()
allY = np.zeros((len(times), len(addresses)))
allX = np.zeros((len(times), 2))
allY[:]=-92.
strengths={}
for address, j in zip(addresses, range(len(addresses))):
ind = np.nonzero(address==macaddress)
temp_strengths=strength[ind]
temp_x=x[ind]
temp_y=y[ind]
temp_times = all_time[ind]
for time in temp_times:
vals = time==temp_times
if any(vals):
ind2 = np.nonzero(vals)
i = np.nonzero(time==times)
allY[i, j] = temp_strengths[ind2]
allX[i, 0] = temp_x[ind2]
allX[i, 1] = temp_y[ind2]
allY = (allY + 85.)/15.
X = allX[0:215, :]
Y = allY[0:215, :]
Xtest = allX[215:, :]
Ytest = allX[215:, :]
return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'addresses' : addresses, 'times' : times}, data_set)
def silhouette(data_set='ankur_pose_data'):
# Ankur Agarwal and Bill Trigg's silhoutte data.
if not data_available(data_set):
@ -468,6 +536,21 @@ def olympic_100m_men(data_set='rogers_girolami_data'):
Y = olympic_data[:, 1][:, None]
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_100m_women(data_set='rogers_girolami_data'):
if not data_available(data_set):
download_data(data_set)
path = os.path.join(data_path, data_set)
tar_file = os.path.join(path, 'firstcoursemldata.tar.gz')
tar = tarfile.open(tar_file)
print('Extracting file.')
tar.extractall(path=path)
tar.close()
olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['female100']
X = olympic_data[:, 0][:, None]
Y = olympic_data[:, 1][:, None]
return data_details_return({'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m women from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}, data_set)
def olympic_marathon_men(data_set='olympic_marathon_men'):
if not data_available(data_set):
download_data(data_set)
@ -543,7 +626,6 @@ def crescent_data(num_data=200, seed=default_seed):
num_data_total += num_data_part[i]
X = np.vstack((Xparts[0], Xparts[1], Xparts[2], Xparts[3]))
Y = np.vstack((np.ones((num_data_part[0] + num_data_part[1], 1)), -np.ones((num_data_part[2] + num_data_part[3], 1))))
return {'X':X, 'Y':Y, 'info': "Two separate classes of data formed approximately in the shape of two crescents."}