From 45a2c2b3a6b93259dad767b47947192e96b69f42 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Thu, 6 Jun 2013 09:04:02 +0100 Subject: [PATCH] Part changes to datasets.py and mocap.py to download data resources for examples. Not working currently! --- GPy/examples/dimensionality_reduction.py | 8 +-- GPy/util/datasets.py | 66 ++++++++++++++++++++++-- GPy/util/mocap.py | 10 ++-- GPy/util/visualize.py | 47 +++++++++++++---- 4 files changed, 108 insertions(+), 23 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index b3320ca9..36dbe727 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -151,7 +151,7 @@ def BGPLVM_oil(optimize=True, N=200, Q=10, num_inducing=15, max_f_eval=4e3, plot data_show = GPy.util.visualize.vector_show(y) lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes) raw_input('Press enter to finish') - plt.close('all') + plt.close(fig) return m def oil_100(): @@ -327,7 +327,7 @@ def brendan_faces(): data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False) lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') - plt.close('all') + lvm_visualizer.close() return m @@ -345,7 +345,7 @@ def stick(): data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') - plt.close('all') + lvm_visualizer.close() return m @@ -367,7 +367,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True): data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel']) lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') - plt.close('all') + lvm_visualizer.close() return m diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index c477f283..3741d953 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -9,6 +9,61 @@ import urllib2 as url data_path = os.path.join(os.path.dirname(__file__), 'datasets') default_seed = 10000 +neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/' + +def prompt_user(): + # raw_input returns the empty string for "enter" + yes = set(['yes', 'y']) + no = set(['no','n']) + + choice = raw_input().lower() + if choice in yes: + return True + elif choice in no: + return False + else: + sys.stdout.write("Please respond with 'yes', 'y' or 'no', 'n'") + return prompt_user() + +def download_data(dataset_name=None): + """Helper function which contains the resource locations for each data set in one place""" + + # Note: there may be a better way of doing this. One of the pythonistas will need to take a look. Neil + data_resources = {'oil': {'urls' : [neil_url + 'oil_data/'], + 'files' : [['DataTrnLbls.txt', 'DataTrn.txt']], + 'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593', + 'details' : """The three phase oil data used initially for demonstrating the Generative Topographic mapping.""", + 'agreement' : None}, + 'brendan_faces' : {'url' : ['http://www.cs.nyu.edu/~roweis/data/'], + 'files' [['frey_rawface.mat']], + 'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.', + 'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""", + 'agreement': None} + } + + + print('Acquiring resource: ' + dataset_name) + # TODO, check resource is in dictionary! + dr = data_resources[dataset_name] + print('Details of data: ') + print(dr['details']) + if dr['citation']: + print('Please cite:') + print(dr['citation']) + if dr['agreement']: + print('You must also agree to the following:') + print(dr['agreement']) + print('Do you wish to proceed with the download? [yes/no]') + if prompt_user()==False: + return False + + for url, files in zip(dr['urls'], dr['files']): + for file in files: + download_resource(url + file) + return True + + + # Some general utilities. def sample_class(f): @@ -17,7 +72,7 @@ def sample_class(f): c = np.where(c, 1, -1) return c -def fetch_dataset(resource, save_name = None, save_file = True, messages = True): +def download_resource(resource, save_name = None, save_file = True, messages = True): if messages: print "Downloading resource: " , resource, " ... ", response = url.urlopen(resource) @@ -57,10 +112,11 @@ def simulation_BGPLVM(): # The data sets def oil(): - fid = open(os.path.join(data_path, 'oil', 'DataTrn.txt')) + download_data('oil') + fid = open(oil_train_file) X = np.fromfile(fid, sep='\t').reshape((-1, 12)) fid.close() - fid = open(os.path.join(data_path, 'oil', 'DataTrnLbls.txt')) + fid = open(oil_trainlbls_file) Y = np.fromfile(fid, sep='\t').reshape((-1, 3)) * 2. - 1. fid.close() return {'X': X, 'Y': Y, 'info': "The oil data from Bishop and James (1993)."} @@ -283,6 +339,10 @@ def cmu_mocap(subject, train_motions, test_motions=[], sample_every=4): # Load in subject skeleton. subject_dir = os.path.join(data_path, 'mocap', 'cmu', subject) + + # Make sure the data is downloaded. + mocap.fetch_cmu(([subject], [train_motions]), skel_store_dir=subject_dir,motion_store_dir=subject_dir) + skel = GPy.util.mocap.acclaim_skeleton(os.path.join(subject_dir, subject + '.asf')) # Set up labels for each sequence diff --git a/GPy/util/mocap.py b/GPy/util/mocap.py index 84a893cb..4c9fa678 100644 --- a/GPy/util/mocap.py +++ b/GPy/util/mocap.py @@ -693,7 +693,7 @@ skel = acclaim_skeleton() -def fetch_data(base_url = 'http://mocap.cs.cmu.edu:8080/subjects', skel_store_dir = '.', motion_store_dir = '.', subj_motions = None, store_motions = True, return_motions = True, messages = True): +def fetch_cmu(subj_motions, base_url = 'http://mocap.cs.cmu.edu:8080/subjects', skel_store_dir = '.', motion_store_dir = '.', store_motions = True, return_motions = True, messages = True): ''' Download and store the skel. and motions indicated in a tuple (A,B) where A is a list of skeletons and B the corresponding 2-D list of motions, ie B_ij is the j-th motion to download for skeleton A_i @@ -702,9 +702,9 @@ def fetch_data(base_url = 'http://mocap.cs.cmu.edu:8080/subjects', skel_store_di e.g. # Download the data, do not return anything - GPy.util.mocap.fetch_data(subj_motions = ([35],[[1,2,3]]), return_motions = False) + GPy.util.mocap.fetch_cmu(subj_motions = ([35],[[1,2,3]]), return_motions = False) # Fetch and return the data in a list. Do not store them anywhere - GPy.util.mocap.fetch_data(subj_motions = ([35],[[1,2,3]]), return_motions = True, store_motions = False) + GPy.util.mocap.fetch_cmu(subj_motions = ([35],[[1,2,3]]), return_motions = True, store_motions = False) In both cases above, if the data do exist in the given skel_store_dir and motion_store_dir, they are just loaded from there. ''' @@ -752,7 +752,7 @@ def fetch_data(base_url = 'http://mocap.cs.cmu.edu:8080/subjects', skel_store_di os.mkdir(cur_skel_dir) if not os.path.isdir(motion_store_dir + cur_skel_suffix): os.mkdir(motion_store_dir + cur_skel_suffix) - cur_skel_data = dat.fetch_dataset(cur_skel_url, cur_skel_file, store_motions, messages) + cur_skel_data = dat.download_resource(cur_skel_url, cur_skel_file, store_motions, messages) if return_motions: all_skels.append(cur_skel_data) @@ -765,7 +765,7 @@ def fetch_data(base_url = 'http://mocap.cs.cmu.edu:8080/subjects', skel_store_di if return_motions: cur_motion_data = f.read() else: - cur_motion_data = dat.fetch_dataset(cur_motion_url, cur_motion_file, store_motions, messages) + cur_motion_data = dat.download_resource(cur_motion_url, cur_motion_file, store_motions, messages) if return_motions: all_motions[i].append(cur_motion_data) diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py index 06c373d8..fae8c575 100644 --- a/GPy/util/visualize.py +++ b/GPy/util/visualize.py @@ -9,7 +9,7 @@ import visual class data_show: """ - The data show class is a base class which describes how to visualize a + The data_show class is a base class which describes how to visualize a particular data set. For example, motion capture data can be plotted as a stick figure, or images are shown using imshow. This class enables latent to data visualizations for the GP-LVM. @@ -21,6 +21,28 @@ class data_show: def modify(self, vals): raise NotImplementedError, "this needs to be implemented to use the data_show class" + def close(self): + raise NotImplementedError, "this needs to be implemented to use the data_show class" + + +class vpython_show(data_show): + """ + the vpython_show class is a base class for all visualization methods that use vpython to display. It is initialized with a scene. If the scene is set to None it creates a scene window. + """ + + def __init__(self, vals, scene=None): + data_show.__init__(self, vals) + # If no axes are defined, create some. + + if scene==None: + self.scene = visual.display(title='Data Visualization') + else: + self.scene = scene + + def close(self): + self.scene.exit() + + class matplotlib_show(data_show): """ @@ -36,6 +58,9 @@ class matplotlib_show(data_show): else: self.axes = axes + def close(self): + plt.close(self.axes.get_figure()) + class vector_show(matplotlib_show): """ A base visualization class that just shows a data vector as a plot of @@ -276,11 +301,11 @@ class image_show(matplotlib_show): self.vals = Image.fromarray(self.vals.astype('uint8')) self.vals.putpalette(self.palette) # palette is a list, must be loaded before calling this function -class mocap_data_show_visual(data_show): +class mocap_data_show_vpython(vpython_show): """Base class for visualizing motion capture data using visual module.""" - def __init__(self, vals, connect=None, radius=0.1): - data_show.__init__(self, vals) + def __init__(self, vals, scene=None, connect=None, radius=0.1): + vpython_show.__init__(self, vals, scene) self.radius = radius self.connect = connect self.process_values() @@ -291,7 +316,7 @@ class mocap_data_show_visual(data_show): self.spheres = [] for i in range(self.vals.shape[0]): self.spheres.append(visual.sphere(pos=(self.vals[i, 0], self.vals[i, 2], self.vals[i, 1]), radius=self.radius)) - + self.scene.visible=True def draw_edges(self): self.rods = [] @@ -410,17 +435,17 @@ class mocap_data_show(matplotlib_show): self.axes.set_zlim(self.z_lim) -class stick_show(mocap_data_show_visual): +class stick_show(mocap_data_show_vpython): """Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect.""" - def __init__(self, vals, connect=None): - mocap_data_show_visual.__init__(self, vals, connect, radius=0.04) + def __init__(self, vals, connect=None, scene=None): + mocap_data_show_vpython.__init__(self, vals, scene=scene, connect=connect, radius=0.04) def process_values(self): self.vals = self.vals.reshape((3, self.vals.shape[1]/3)).T -class skeleton_show(mocap_data_show_visual): +class skeleton_show(mocap_data_show_vpython): """data_show class for visualizing motion capture data encoded as a skeleton with angles.""" - def __init__(self, vals, skel, padding=0): + def __init__(self, vals, skel, scene=None, padding=0): """data_show class for visualizing motion capture data encoded as a skeleton with angles. :param vals: set of modeled angles to use for printing in the axis when it's first created. :type vals: np.array @@ -432,7 +457,7 @@ class skeleton_show(mocap_data_show_visual): self.skel = skel self.padding = padding connect = skel.connection_matrix() - mocap_data_show_visual.__init__(self, vals, connect, radius=0.4) + mocap_data_show_vpython.__init__(self, vals, scene=scene, connect=connect, radius=0.4) def process_values(self): """Takes a set of angles and converts them to the x,y,z coordinates in the internal prepresentation of the class, ready for plotting.