diff --git a/GPy/likelihoods/Gaussian.py b/GPy/likelihoods/Gaussian.py
index d3696fa6..23ab216e 100644
--- a/GPy/likelihoods/Gaussian.py
+++ b/GPy/likelihoods/Gaussian.py
@@ -2,19 +2,30 @@ import numpy as np
 from likelihood import likelihood
 
 class Gaussian(likelihood):
+    """
+    Likelihood class for doing Expectation propagation
+
+    :param Y: observed output (Nx1 numpy.darray)
+    ..Note:: Y values allowed depend on the likelihood_function used
+    :param variance : 
+    :param normalize:  whether to normalize the data before computing (predictions will be in original scales)
+    :type normalize: False|True
+    """
     def __init__(self,data,variance=1.,normalize=False):
         self.is_heteroscedastic = False
         self.Nparams = 1
         self.Z = 0. # a correction factor which accounts for the approximation made
         N, self.D = data.shape
 
-        #normaliztion
+        #normalization
         if normalize:
-            self._mean = data.mean(0)[None,:]
-            self._std = data.std(0)[None,:]
+            self._bias = data.mean(0)[None,:]
+            self._scale = data.std(0)[None,:]
+            # Don't scale outputs which have zero variance to zero. 
+            self._scale[np.nonzero(self._scale==0.)] = 1.0e-3
         else:
-            self._mean = np.zeros((1,self.D))
-            self._std = np.ones((1,self.D))
+            self._bias = np.zeros((1,self.D))
+            self._scale = np.ones((1,self.D))
 
         self.set_data(data)
 
@@ -24,7 +35,7 @@ class Gaussian(likelihood):
         self.data = data
         self.N,D = data.shape
         assert D == self.D
-        self.Y = (self.data - self._mean)/self._std
+        self.Y = (self.data - self._bias)/self._scale
         if D > self.N:
             self.YYT = np.dot(self.Y,self.Y.T)
             self.trYYT = np.trace(self.YYT)
@@ -47,19 +58,19 @@ class Gaussian(likelihood):
         """
         Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
         """
-        mean = mu*self._std + self._mean
+        mean = mu*self._scale + self._bias
         if full_cov:
             if self.D >1:
                 raise NotImplementedError, "TODO"
                 #Note. for D>1, we need to re-normalise all the outputs independently.
                 # This will mess up computations of diag(true_var), below.
                 #note that the upper, lower quantiles should be the same shape as mean
-            true_var = (var + np.eye(var.shape[0])*self._variance)*self._std**2
-            _5pc = mean + - 2.*np.sqrt(np.diag(true_var))
+            true_var = (var + np.eye(var.shape[0])*self._variance)*self._scale**2
+            _5pc = mean - 2.*np.sqrt(np.diag(true_var))
             _95pc = mean + 2.*np.sqrt(np.diag(true_var))
         else:
-            true_var = (var + self._variance)*self._std**2
-            _5pc = mean + - 2.*np.sqrt(true_var)
+            true_var = (var + self._variance)*self._scale**2
+            _5pc = mean - 2.*np.sqrt(true_var)
             _95pc = mean + 2.*np.sqrt(true_var)
         return mean, true_var, _5pc, _95pc
 
diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index 74bb5915..c6e46bea 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -19,8 +19,6 @@ class GP(model):
     :parm likelihood: a GPy likelihood
     :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
     :type normalize_X: False|True
-    :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
-    :type normalize_Y: False|True
     :param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
     :rtype: model object
     :param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index bd56ff12..c0d9429a 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -24,12 +24,12 @@ class GPLVM(GP):
     :type init: 'PCA'|'random'
 
     """
-    def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs):
+    def __init__(self, Y, Q, init='PCA', X = None, kernel=None, normalize_Y=False, **kwargs):
         if X is None:
             X = self.initialise_latent(init, Q, Y)
         if kernel is None:
             kernel = kern.rbf(Q) + kern.bias(Q)
-        likelihood = Gaussian(Y)
+        likelihood = Gaussian(Y, normalize=normalize_Y)
         GP.__init__(self, X, likelihood, kernel, **kwargs)
 
     def initialise_latent(self, init, Q, Y):
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index d326f31b..ab290dd8 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -225,49 +225,92 @@ def creep_data():
     X = all_data[:, features].copy()
     return {'X': X, 'y' : y}
 
-def cmu_35_walk_jog():
+def cmu_mocap_49_balance():
+    """Load CMU subject 49's one legged balancing motion that was used by Alvarez, Luengo and Lawrence at AISTATS 2009."""
+    train_motions = ['18', '19']
+    test_motions = ['20']
+    data = cmu_mocap('49', train_motions, test_motions, sample_every=4)
+    data['info'] = "One legged balancing motions from CMU data base subject 49. As used in Alvarez, Luengo and Lawrence at AISTATS 2009. It consists of " + data['info']
+    return data
 
-    skel = GPy.util.mocap.acclaim_skeleton(os.path.join(data_path, 'mocap', 'cmu', '35', '35.asf'))
-    examples = ['01', '02', '03', '04', '05', '06',
+def cmu_mocap_35_walk_jog():
+    """Load CMU subject 35's walking and jogging motions, the same data that was used by Taylor, Roweis and Hinton at NIPS 2007. but without their preprocessing. Also used by Lawrence at AISTATS 2007."""
+    train_motions = ['01', '02', '03', '04', '05', '06',
                 '07', '08', '09', '10', '11', '12',
                 '13', '14', '15', '16', '17', '19',
                 '20', '21', '22', '23', '24', '25',
                 '26', '28', '30', '31', '32', '33', '34']
-    test_examples = ['18', '29']
-    # Label differently for each sequence
-    exlbls = np.eye(31)
-    testexlbls = np.eye(2)
-    tot_length = 0
-    tot_test_length = 0
-    tY = []
-    tlbls = []
-    for i in range(len(examples)):
-        tmpchan = skel.load_channels(os.path.join(data_path, 'mocap', 'cmu', '35', '35_' + examples[i] + '.amc'))
-        tY.append(tmpchan[::4, :])
-        tlbls.append(np.tile(exlbls[i, :], (tY[i].shape[0], 1)))
-        tot_length += tY[i].shape[0]
-    Y = np.zeros((tot_length, tY[0].shape[1]))
-    lbls = np.zeros((tot_length, tlbls[0].shape[1]))
-    endInd = 0
-    for i in range(len(tY)):
-        startInd = endInd 
-        endInd += tY[i].shape[0]
-        Y[startInd:endInd, :] = tY[i]
-        lbls[startInd:endInd, :] = tlbls[i]
-    tYtest = []
-    tlblstest = [] 
-    for i in range(len(test_examples)):
-        tmpchan = skel.load_channels(os.path.join(data_path, 'mocap', 'cmu', '35', '35_' + test_examples[i] + '.amc'))
-        tYtest.append(tmpchan[::4, :])
-        tlblstest.append(np.tile(testexlbls[i, :], (tYtest[i].shape[0], 1)))
-        tot_test_length += tYtest[i].shape[0]
+    test_motions = ['18', '29']
+    data = cmu_mocap('35', train_motions, test_motions, sample_every=4)
+    data['info'] = "Walk and jog data from CMU data base subject 35. As used in Tayor, Roweis and Hinton at NIPS 2007, but without their pre-processing (i.e. as used by Lawrence at AISTATS 2007). It consists of " + data['info']
+    return data
 
-    Ytest = np.zeros((tot_test_length, tYtest[0].shape[1]))
-    lblstest = np.zeros((tot_test_length, tlblstest[0].shape[1]))
-    endInd = 0
-    for i in range(len(tYtest)):
-        startInd = endInd 
-        endInd += tYtest[i].shape[0]
-        Ytest[startInd:endInd, :] = tYtest[i]
-        lblstest[startInd:endInd, :] = tlblstest[i]
-    return {'Y': Y, 'lbls' : lbls, 'Ytest': Ytest, 'lblstest' : lblstest, 'info': "Walk and jog data from CMU data base subject 35."}
+def cmu_mocap(subject, train_motions, test_motions=[], sample_every=4):
+    """Load a given subject's training and test motions from the CMU motion capture data."""
+
+    # Load in subject skeleton.
+    subject_dir = os.path.join(data_path, 'mocap', 'cmu', subject)
+    skel = GPy.util.mocap.acclaim_skeleton(os.path.join(subject_dir, subject + '.asf'))
+
+    # Set up labels for each sequence
+    exlbls = np.eye(len(train_motions))
+
+    # Load sequences
+    tot_length = 0
+    temp_Y = []
+    temp_lbls = []
+    for i in range(len(train_motions)):
+        temp_chan = skel.load_channels(os.path.join(subject_dir, subject + '_' + train_motions[i] + '.amc'))
+        temp_Y.append(temp_chan[::sample_every, :])
+        temp_lbls.append(np.tile(exlbls[i, :], (temp_Y[i].shape[0], 1)))
+        tot_length += temp_Y[i].shape[0]
+
+    Y = np.zeros((tot_length, temp_Y[0].shape[1]))
+    lbls = np.zeros((tot_length, temp_lbls[0].shape[1]))
+
+    end_ind = 0
+    for i in range(len(temp_Y)):
+        start_ind = end_ind 
+        end_ind += temp_Y[i].shape[0]
+        Y[start_ind:end_ind, :] = temp_Y[i]
+        lbls[start_ind:end_ind, :] = temp_lbls[i]
+    if len(test_motions)>0:
+        temp_Ytest = []
+        temp_lblstest = [] 
+
+        testexlbls = np.eye(len(test_motions))
+        tot_test_length = 0
+        for i in range(len(test_motions)):
+            temp_chan = skel.load_channels(os.path.join(subject_dir, subject + '_' + test_motions[i] + '.amc'))
+            temp_Ytest.append(temp_chan[::sample_every, :])
+            temp_lblstest.append(np.tile(testexlbls[i, :], (temp_Ytest[i].shape[0], 1)))
+            tot_test_length += temp_Ytest[i].shape[0]
+
+        # Load test data
+        Ytest = np.zeros((tot_test_length, temp_Ytest[0].shape[1]))
+        lblstest = np.zeros((tot_test_length, temp_lblstest[0].shape[1]))
+
+        end_ind = 0
+        for i in range(len(temp_Ytest)):
+            start_ind = end_ind 
+            end_ind += temp_Ytest[i].shape[0]
+            Ytest[start_ind:end_ind, :] = temp_Ytest[i]
+            lblstest[start_ind:end_ind, :] = temp_lblstest[i]
+    else:
+        Ytest = None
+        lblstest = None
+
+    info = 'Subject: ' + subject + '. Training motions: '
+    for motion in train_motions:
+        info += motion + ', '
+    info = info[:-2]
+    if len(test_motions)>0:
+        info += '. Test motions: '
+        for motion in test_motions:
+            info += motion + ', '
+        info = info[:-2] + '.'
+    else:
+        info += '.'
+    if sample_every != 1:
+        info += ' Data is sub-sampled to every ' + str(sample_every) + ' frames.'
+    return {'Y': Y, 'lbls' : lbls, 'Ytest': Ytest, 'lblstest' : lblstest, 'info': info, 'skel': skel}
diff --git a/GPy/util/mocap.py b/GPy/util/mocap.py
index 2eec687d..76650086 100644
--- a/GPy/util/mocap.py
+++ b/GPy/util/mocap.py
@@ -157,6 +157,13 @@ class skeleton(tree):
     def __init__(self):
         tree.__init__(self)
 
+    def connection_matrix(self):
+        connection = np.zeros((len(self.vertices), len(self.vertices)), dtype=bool)
+        for i in range(len(self.vertices)):
+            for j in range(len(self.vertices[i].children)):
+                connection[i, self.vertices[i].children[j]] = True
+        return connection
+
     def to_xyz(self, channels):
         raise NotImplementedError, "this needs to be implemented to use the skeleton class"
 
@@ -557,6 +564,7 @@ class acclaim_skeleton(skeleton):
                     lin = self.read_line(fid)
             else:
                 raise Error, 'Unrecognised file format'
+            self.finalize()
             
     def read_units(self, fid):
         """Read units from an acclaim skeleton file stream."""
diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py
index 482cc687..9754db63 100644
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@@ -184,71 +184,115 @@ class image_show(data_show):
         #if self.invert:
         #    self.vals = -self.vals
 
-class stick_show(data_show):
-    """Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect."""
+
+class mocap_data_show(data_show):
+    """Base class for visualizing motion capture data."""
 
     def __init__(self, vals, axes=None, connect=None):
         if axes==None:
             fig = plt.figure()
             axes = fig.add_subplot(111, projection='3d')
         data_show.__init__(self, vals, axes)
-        self.vals = vals.reshape((3, vals.shape[1]/3)).T
-        self.x_lim = np.array([self.vals[:, 0].min(), self.vals[:, 0].max()])
-        self.y_lim = np.array([self.vals[:, 1].min(), self.vals[:, 1].max()])
-        self.z_lim = np.array([self.vals[:, 2].min(), self.vals[:, 2].max()])
-        self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2])
-        self.axes.set_xlim(self.x_lim)
-        self.axes.set_ylim(self.y_lim)
-        self.axes.set_zlim(self.z_lim)
-        self.axes.set_aspect(1)
-        self.axes.autoscale(enable=False)
 
         self.connect = connect
-        if not self.connect==None:
-            x = []
-            y = []
-            z = []
-            self.I, self.J = np.nonzero(self.connect)
-            for i in range(len(self.I)):
-                x.append(self.vals[self.I[i], 0])
-                x.append(self.vals[self.J[i], 0])
-                x.append(np.NaN)
-                y.append(self.vals[self.I[i], 1])
-                y.append(self.vals[self.J[i], 1])
-                y.append(np.NaN)
-                z.append(self.vals[self.I[i], 2])
-                z.append(self.vals[self.J[i], 2])
-                z.append(np.NaN)
-            self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), 'b-')
+        self.process_values(vals)
+        self.initialize_axes()
+        self.draw_vertices()
+        self.finalize_axes()
+        self.draw_edges()
         self.axes.figure.canvas.draw()
 
-    def modify(self, vals):
-        self.points_handle.remove()
-        self.line_handle[0].remove()
-        self.vals = vals.reshape((3, vals.shape[1]/3)).T
+    def draw_vertices(self):
         self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2])
-        self.axes.set_xlim(self.x_lim)
-        self.axes.set_ylim(self.y_lim)
-        self.axes.set_zlim(self.z_lim)
+        
+    def draw_edges(self):
         self.line_handle = []
         if not self.connect==None:
             x = []
             y = []
             z = []
             self.I, self.J = np.nonzero(self.connect)
-            for i in range(len(self.I)):
-                x.append(self.vals[self.I[i], 0])
-                x.append(self.vals[self.J[i], 0])
+            for i, j in zip(self.I, self.J):
+                x.append(self.vals[i, 0])
+                x.append(self.vals[j, 0])
                 x.append(np.NaN)
-                y.append(self.vals[self.I[i], 1])
-                y.append(self.vals[self.J[i], 1])
+                y.append(self.vals[i, 1])
+                y.append(self.vals[j, 1])
                 y.append(np.NaN)
-                z.append(self.vals[self.I[i], 2])
-                z.append(self.vals[self.J[i], 2])
+                z.append(self.vals[i, 2])
+                z.append(self.vals[j, 2])
                 z.append(np.NaN)
             self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), 'b-')
-
+            
+    def modify(self, vals):
+        self.process_values(vals)
+        self.initialize_axes_modify()
+        self.draw_vertices()
+        self.finalize_axes_modify()
+        self.draw_edges()
         self.axes.figure.canvas.draw()
 
+    def process_values(self, vals):
+        raise NotImplementedError, "this needs to be implemented to use the data_show class"
+
+    def initialize_axes(self):
+        """Set up the axes with the right limits and scaling."""
+        self.x_lim = np.array([self.vals[:, 0].min(), self.vals[:, 0].max()])
+        self.y_lim = np.array([self.vals[:, 1].min(), self.vals[:, 1].max()])
+        self.z_lim = np.array([self.vals[:, 2].min(), self.vals[:, 2].max()])
+
+    def initialize_axes_modify(self):
+        self.points_handle.remove()
+        self.line_handle[0].remove()
+
+    def finalize_axes(self):
+        self.axes.set_xlim(self.x_lim)
+        self.axes.set_ylim(self.y_lim)
+        self.axes.set_zlim(self.z_lim)
+        self.axes.set_aspect(1)
+        self.axes.autoscale(enable=False)
+
+    def finalize_axes_modify(self):
+        self.axes.set_xlim(self.x_lim)
+        self.axes.set_ylim(self.y_lim)
+        self.axes.set_zlim(self.z_lim)
 
 
+class stick_show(mocap_data_show):
+    """Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect."""
+    def __init__(self, vals, axes=None, connect=None):
+        mocap_data_show.__init__(self, vals, axes, connect)
+
+    def process_values(self, vals):
+        self.vals = vals.reshape((3, vals.shape[1]/3)).T
+    
+class skeleton_show(mocap_data_show):
+    """data_show class for visualizing motion capture data encoded as a skeleton with angles."""
+    def __init__(self, vals, skel, padding=0, axes=None):
+        self.skel = skel
+        self.padding = padding
+        connect = skel.connection_matrix()
+        mocap_data_show.__init__(self, vals, axes, connect)
+
+    def process_values(self, vals):
+        if self.padding>0:
+            channels = np.zeros((vals.shape[0], vals.shape[1]+self.padding))
+            channels[:, 0:vals.shape[0]] = vals
+        else:
+            channels = vals
+        vals_mat = self.skel.to_xyz(channels.flatten())
+        self.vals = vals_mat
+        # Flip the Y and Z axes
+        self.vals[:, 0] = vals_mat[:, 0]
+        self.vals[:, 1] = vals_mat[:, 2]
+        self.vals[:, 2] = vals_mat[:, 1]
+        
+    def wrap_around(vals, lim, connect):
+        quot = lim[1] - lim[0]
+        vals = rem(vals, quot)+lim[0]
+        nVals = floor(vals/quot)
+        for i in range(connect.shape[0]):
+            for j in find(connect[i, :]):
+                if nVals[i] != nVals[j]:
+                    connect[i, j] = False
+        return vals, connect