From 45a2c2b3a6b93259dad767b47947192e96b69f42 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Thu, 6 Jun 2013 09:04:02 +0100
Subject: [PATCH] Part changes to datasets.py and mocap.py to download data
 resources for examples. Not working currently!

---
 GPy/examples/dimensionality_reduction.py |  8 +--
 GPy/util/datasets.py                     | 66 ++++++++++++++++++++++--
 GPy/util/mocap.py                        | 10 ++--
 GPy/util/visualize.py                    | 47 +++++++++++++----
 4 files changed, 108 insertions(+), 23 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index b3320ca9..36dbe727 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -151,7 +151,7 @@ def BGPLVM_oil(optimize=True, N=200, Q=10, num_inducing=15, max_f_eval=4e3, plot
         data_show = GPy.util.visualize.vector_show(y)
         lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes)
         raw_input('Press enter to finish')
-        plt.close('all')
+        plt.close(fig)
     return m
 
 def oil_100():
@@ -327,7 +327,7 @@ def brendan_faces():
     data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
     lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
     raw_input('Press enter to finish')
-    plt.close('all')
+    lvm_visualizer.close()
 
     return m
 
@@ -345,7 +345,7 @@ def stick():
     data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
     lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
     raw_input('Press enter to finish')
-    plt.close('all')
+    lvm_visualizer.close()
 
     return m
 
@@ -367,7 +367,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
     data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel'])
     lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
     raw_input('Press enter to finish')
-    plt.close('all')
+    lvm_visualizer.close()
 
     return m
 
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index c477f283..3741d953 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -9,6 +9,61 @@ import urllib2 as url
 
 data_path = os.path.join(os.path.dirname(__file__), 'datasets')
 default_seed = 10000
+neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/'
+
+def prompt_user():
+    # raw_input returns the empty string for "enter"
+    yes = set(['yes', 'y'])
+    no = set(['no','n'])
+
+    choice = raw_input().lower()
+    if choice in yes:
+        return True
+    elif choice in no:
+        return False
+    else:
+        sys.stdout.write("Please respond with 'yes', 'y' or 'no', 'n'")
+        return prompt_user()
+
+def download_data(dataset_name=None):
+    """Helper function which contains the resource locations for each data set in one place"""
+
+    # Note: there may be a better way of doing this. One of the pythonistas will need to take a look. Neil
+    data_resources = {'oil': {'urls' : [neil_url + 'oil_data/'],
+                              'files' : [['DataTrnLbls.txt', 'DataTrn.txt']],
+                              'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593',
+                              'details' : """The three phase oil data used initially for demonstrating the Generative Topographic mapping.""",
+                              'agreement' : None},
+                      'brendan_faces' : {'url' : ['http://www.cs.nyu.edu/~roweis/data/'],
+                                         'files' [['frey_rawface.mat']],
+                                         'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.',
+                                         'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""",
+                                         'agreement': None}
+                      }
+
+
+        print('Acquiring resource: ' + dataset_name)
+        # TODO, check resource is in dictionary!
+        dr = data_resources[dataset_name]
+        print('Details of data: ')
+        print(dr['details'])
+        if dr['citation']:
+            print('Please cite:')
+            print(dr['citation'])
+        if dr['agreement']:
+            print('You must also agree to the following:')
+            print(dr['agreement'])
+        print('Do you wish to proceed with the download? [yes/no]')
+        if prompt_user()==False:
+            return False
+
+        for url, files in zip(dr['urls'], dr['files']):
+            for file in files:
+                download_resource(url + file)
+        return True
+                  
+
+        
 
 # Some general utilities.
 def sample_class(f):
@@ -17,7 +72,7 @@ def sample_class(f):
     c = np.where(c, 1, -1)
     return c
 
-def fetch_dataset(resource, save_name = None, save_file = True, messages = True):
+def download_resource(resource, save_name = None, save_file = True, messages = True):
     if messages:
         print "Downloading resource: " , resource, " ... ",
     response = url.urlopen(resource)
@@ -57,10 +112,11 @@ def simulation_BGPLVM():
 
 # The data sets
 def oil():
-    fid = open(os.path.join(data_path, 'oil', 'DataTrn.txt'))
+    download_data('oil')
+    fid = open(oil_train_file)
     X = np.fromfile(fid, sep='\t').reshape((-1, 12))
     fid.close()
-    fid = open(os.path.join(data_path, 'oil', 'DataTrnLbls.txt'))
+    fid = open(oil_trainlbls_file)
     Y = np.fromfile(fid, sep='\t').reshape((-1, 3)) * 2. - 1.
     fid.close()
     return {'X': X, 'Y': Y, 'info': "The oil data from Bishop and James (1993)."}
@@ -283,6 +339,10 @@ def cmu_mocap(subject, train_motions, test_motions=[], sample_every=4):
 
     # Load in subject skeleton.
     subject_dir = os.path.join(data_path, 'mocap', 'cmu', subject)
+
+    # Make sure the data is downloaded.
+    mocap.fetch_cmu(([subject], [train_motions]), skel_store_dir=subject_dir,motion_store_dir=subject_dir)
+
     skel = GPy.util.mocap.acclaim_skeleton(os.path.join(subject_dir, subject + '.asf'))
 
     # Set up labels for each sequence
diff --git a/GPy/util/mocap.py b/GPy/util/mocap.py
index 84a893cb..4c9fa678 100644
--- a/GPy/util/mocap.py
+++ b/GPy/util/mocap.py
@@ -693,7 +693,7 @@ skel = acclaim_skeleton()
 
 
     
-def fetch_data(base_url = 'http://mocap.cs.cmu.edu:8080/subjects', skel_store_dir = '.', motion_store_dir = '.', subj_motions = None, store_motions = True, return_motions = True, messages = True):
+def fetch_cmu(subj_motions, base_url = 'http://mocap.cs.cmu.edu:8080/subjects', skel_store_dir = '.', motion_store_dir = '.', store_motions = True, return_motions = True, messages = True):
     ''' 
     Download and store the skel. and motions indicated in a tuple (A,B) where A is a list of skeletons and B
     the corresponding 2-D list of motions, ie B_ij is the j-th motion to download for skeleton A_i
@@ -702,9 +702,9 @@ def fetch_data(base_url = 'http://mocap.cs.cmu.edu:8080/subjects', skel_store_di
 
     e.g.
     # Download the data, do not return anything
-    GPy.util.mocap.fetch_data(subj_motions = ([35],[[1,2,3]]), return_motions = False)
+    GPy.util.mocap.fetch_cmu(subj_motions = ([35],[[1,2,3]]), return_motions = False)
     # Fetch and return the data in a list. Do not store them anywhere
-    GPy.util.mocap.fetch_data(subj_motions = ([35],[[1,2,3]]), return_motions = True, store_motions = False)
+    GPy.util.mocap.fetch_cmu(subj_motions = ([35],[[1,2,3]]), return_motions = True, store_motions = False)
 
     In both cases above, if the data do exist in the given skel_store_dir and motion_store_dir, they are just loaded from there.
     '''
@@ -752,7 +752,7 @@ def fetch_data(base_url = 'http://mocap.cs.cmu.edu:8080/subjects', skel_store_di
                     os.mkdir(cur_skel_dir)
                 if not os.path.isdir(motion_store_dir + cur_skel_suffix):
                     os.mkdir(motion_store_dir + cur_skel_suffix)
-            cur_skel_data = dat.fetch_dataset(cur_skel_url, cur_skel_file, store_motions, messages)
+            cur_skel_data = dat.download_resource(cur_skel_url, cur_skel_file, store_motions, messages)
         
         if return_motions:
             all_skels.append(cur_skel_data)
@@ -765,7 +765,7 @@ def fetch_data(base_url = 'http://mocap.cs.cmu.edu:8080/subjects', skel_store_di
                     if return_motions:
                         cur_motion_data = f.read()
             else:
-                cur_motion_data = dat.fetch_dataset(cur_motion_url, cur_motion_file, store_motions, messages)
+                cur_motion_data = dat.download_resource(cur_motion_url, cur_motion_file, store_motions, messages)
 
             if return_motions:
                 all_motions[i].append(cur_motion_data)
diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py
index 06c373d8..fae8c575 100644
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@@ -9,7 +9,7 @@ import visual
 
 class data_show:
     """
-    The data show class is a base class which describes how to visualize a
+    The data_show class is a base class which describes how to visualize a
     particular data set. For example, motion capture data can be plotted as a
     stick figure, or images are shown using imshow. This class enables latent
     to data visualizations for the GP-LVM.
@@ -21,6 +21,28 @@ class data_show:
     def modify(self, vals):
         raise NotImplementedError, "this needs to be implemented to use the data_show class"
 
+    def close(self):
+        raise NotImplementedError, "this needs to be implemented to use the data_show class"
+
+
+class vpython_show(data_show):
+    """
+    the vpython_show class is a base class for all visualization methods that use vpython to display. It is initialized with a scene. If the scene is set to None it creates a scene window.
+    """
+
+    def __init__(self, vals, scene=None):
+        data_show.__init__(self, vals)
+        # If no axes are defined, create some.
+
+        if scene==None:
+            self.scene = visual.display(title='Data Visualization')
+        else:
+            self.scene = scene
+
+    def close(self):
+        self.scene.exit()
+
+
 
 class matplotlib_show(data_show):
     """
@@ -36,6 +58,9 @@ class matplotlib_show(data_show):
         else:
             self.axes = axes
 
+    def close(self):
+        plt.close(self.axes.get_figure())
+
 class vector_show(matplotlib_show):
     """
     A base visualization class that just shows a data vector as a plot of
@@ -276,11 +301,11 @@ class image_show(matplotlib_show):
             self.vals = Image.fromarray(self.vals.astype('uint8'))
             self.vals.putpalette(self.palette) # palette is a list, must be loaded before calling this function
 
-class mocap_data_show_visual(data_show):
+class mocap_data_show_vpython(vpython_show):
     """Base class for visualizing motion capture data using visual module."""
 
-    def __init__(self, vals, connect=None, radius=0.1):
-        data_show.__init__(self, vals)
+    def __init__(self, vals, scene=None, connect=None, radius=0.1):
+        vpython_show.__init__(self, vals, scene)
         self.radius = radius
         self.connect = connect
         self.process_values()
@@ -291,7 +316,7 @@ class mocap_data_show_visual(data_show):
         self.spheres = []
         for i in range(self.vals.shape[0]):
             self.spheres.append(visual.sphere(pos=(self.vals[i, 0], self.vals[i, 2], self.vals[i, 1]), radius=self.radius))
-
+        self.scene.visible=True
         
     def draw_edges(self):
         self.rods = []
@@ -410,17 +435,17 @@ class mocap_data_show(matplotlib_show):
         self.axes.set_zlim(self.z_lim)
 
 
-class stick_show(mocap_data_show_visual):
+class stick_show(mocap_data_show_vpython):
     """Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect."""
-    def __init__(self, vals, connect=None):
-        mocap_data_show_visual.__init__(self, vals, connect, radius=0.04)
+    def __init__(self, vals, connect=None, scene=None):
+        mocap_data_show_vpython.__init__(self, vals, scene=scene, connect=connect, radius=0.04)
 
     def process_values(self):
         self.vals = self.vals.reshape((3, self.vals.shape[1]/3)).T
 
-class skeleton_show(mocap_data_show_visual):
+class skeleton_show(mocap_data_show_vpython):
     """data_show class for visualizing motion capture data encoded as a skeleton with angles."""
-    def __init__(self, vals, skel, padding=0):
+    def __init__(self, vals, skel, scene=None, padding=0):
         """data_show class for visualizing motion capture data encoded as a skeleton with angles.
         :param vals: set of modeled angles to use for printing in the axis when it's first created.
         :type vals: np.array
@@ -432,7 +457,7 @@ class skeleton_show(mocap_data_show_visual):
         self.skel = skel
         self.padding = padding
         connect = skel.connection_matrix()
-        mocap_data_show_visual.__init__(self, vals, connect, radius=0.4)
+        mocap_data_show_vpython.__init__(self, vals, scene=scene, connect=connect, radius=0.4)
     def process_values(self):
         """Takes a set of angles and converts them to the x,y,z coordinates in the internal prepresentation of the class, ready for plotting.