merge mu's changes into devel

2026-05-24 14:15:14 +02:00 · 2014-05-14 11:19:18 +01:00 · 2014-05-14 11:19:18 +01:00 · 9171909724
commit 9171909724
parent be87beffe1 621de63fc8
37 changed files with 1177 additions and 878 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -72,7 +72,7 @@ class GP(Model):
    def log_likelihood(self):
        return self._log_marginal_likelihood

-    def _raw_predict(self, _Xnew, full_cov=False):
+    def _raw_predict(self, _Xnew, full_cov=False, kern=None):
        """
        For making predictions, does not account for normalization or likelihood

@ -87,14 +87,17 @@ class GP(Model):
        $$

        """
-        Kx = self.kern.K(_Xnew, self.X).T
+        if kern is None:
+            kern = self.kern
+
+        Kx = kern.K(_Xnew, self.X).T
        WiKx = np.dot(self.posterior.woodbury_inv, Kx)
        mu = np.dot(Kx.T, self.posterior.woodbury_vector)
        if full_cov:
-            Kxx = self.kern.K(_Xnew)
+            Kxx = kern.K(_Xnew)
            var = Kxx - np.dot(Kx.T, WiKx)
        else:
-            Kxx = self.kern.Kdiag(_Xnew)
+            Kxx = kern.Kdiag(_Xnew)
            var = Kxx - np.sum(WiKx*Kx, 0)
            var = var.reshape(-1, 1)

@ -102,7 +105,7 @@ class GP(Model):
        if len(mu.shape)==1: mu = mu[:,None]
        return mu, var

-    def predict(self, Xnew, full_cov=False, Y_metadata=None):
+    def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
        """
        Predict the function(s) at the new point(s) Xnew.

@ -111,6 +114,9 @@ class GP(Model):
        :param full_cov: whether to return the full covariance matrix, or just
                         the diagonal
        :type full_cov: bool
+        :param Y_metadata: metadata about the predicting point to pass to the likelihood
+        :param kern: The kernel to use for prediction (defaults to the model
+                     kern). this is useful for examining e.g. subprocesses.
        :returns: mean: posterior mean,  a Numpy array, Nnew x self.input_dim
        :returns: var: posterior variance, a Numpy array, Nnew x 1 if
                       full_cov=False, Nnew x Nnew otherwise
@ -123,7 +129,7 @@ class GP(Model):

        """
        #predict the latent function values
-        mu, var = self._raw_predict(Xnew, full_cov=full_cov)
+        mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)

        # now push through likelihood
        mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
--- a/GPy/core/parameterization/lists_and_dicts.py
+++ b/GPy/core/parameterization/lists_and_dicts.py
@ -58,7 +58,9 @@ class ObservablesList(object):
    def __repr__(self):
        return self._poc.__repr__()
    
+
    def add(self, priority, observable, callble):
+        if observable is not None:
            ins = 0
            for pr, _, _ in self:
                if priority > pr:
@ -86,7 +88,6 @@ class ObservablesList(object):
    def __iter__(self):
        self.flush()
        for p, o, c in self._poc:
-            if o() is not None:
            yield p, o(), c 

    def __len__(self):
@ -94,10 +95,11 @@ class ObservablesList(object):
        return self._poc.__len__()

    def __deepcopy__(self, memo):
-        self.flush()
        s = ObservablesList()
+        for p,o,c in self:
            import copy
-        s._poc = copy.deepcopy(self._poc, memo)
+            s.add(p, copy.deepcopy(o, memo), copy.deepcopy(c, memo))
+        s.flush()
        return s

    def __getstate__(self):
--- a/GPy/core/parameterization/observable_array.py
+++ b/GPy/core/parameterization/observable_array.py
@ -1,7 +1,7 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)

-__updated__ = '2014-04-15'
+__updated__ = '2014-05-12'

 import numpy as np
 from parameter_core import Observable, Pickleable
@ -15,10 +15,10 @@ class ObsAr(np.ndarray, Pickleable, Observable):
    """
    __array_priority__ = -1 # Never give back ObsAr
    def __new__(cls, input_array, *a, **kw):
+        # allways make a copy of input paramters, as we need it to be in C order:
        if not isinstance(input_array, ObsAr):
-            obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls)
+            obj = np.atleast_1d(np.require(np.copy(input_array), dtype=np.float64, requirements=['W', 'C'])).view(cls)
        else: obj = input_array
-        #cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing
        super(ObsAr, obj).__init__(*a, **kw)
        return obj

--- a/GPy/core/parameterization/param.py
+++ b/GPy/core/parameterization/param.py
@ -45,7 +45,6 @@ class Param(OptimizationHandlable, ObsAr):
    _parameters_ = []
    def __new__(cls, name, input_array, default_constraint=None):
        obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
-        cls.__name__ = "Param"
        obj._current_slice_ = (slice(obj.shape[0]),)
        obj._realshape_ = obj.shape
        obj._realsize_ = obj.size
@ -58,9 +57,9 @@ class Param(OptimizationHandlable, ObsAr):

    def build_pydot(self,G):
        import pydot
-        node = pydot.Node(id(self), shape='record', label=self.name)
+        node = pydot.Node(id(self), shape='trapezium', label=self.name)#, fontcolor='white', color='white')
        G.add_node(node)
-        for o in self.observers.keys():
+        for _, o, _ in self.observers:
            label = o.name if hasattr(o, 'name') else str(o)
            observed_node = pydot.Node(id(o), label=label)
            G.add_node(observed_node)
@ -90,6 +89,13 @@ class Param(OptimizationHandlable, ObsAr):
    def param_array(self):
        return self

+    @property
+    def values(self):
+        """
+        Return self as numpy array view
+        """
+        return self.view(np.ndarray)
+
    @property
    def gradient(self):
        """
@ -100,11 +106,11 @@ class Param(OptimizationHandlable, ObsAr):
        """
        if getattr(self, '_gradient_array_', None) is None:
            self._gradient_array_ = numpy.empty(self._realshape_, dtype=numpy.float64)
-        return self._gradient_array_[self._current_slice_]
+        return self._gradient_array_#[self._current_slice_]

    @gradient.setter
    def gradient(self, val):
-        self._gradient_array_[self._current_slice_] = val
+        self._gradient_array_[:] = val

    #===========================================================================
    # Array operations -> done
@ -112,10 +118,13 @@ class Param(OptimizationHandlable, ObsAr):
    def __getitem__(self, s, *args, **kwargs):
        if not isinstance(s, tuple):
            s = (s,)
-        if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim:
-            s += (Ellipsis,)
+        #if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim:
+        #    s += (Ellipsis,)
        new_arr = super(Param, self).__getitem__(s, *args, **kwargs)
-        try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base
+        try: 
+            new_arr._current_slice_ = s
+            new_arr._gradient_array_ = self.gradient[s]
+            new_arr._original_ = self.base is new_arr.base
        except AttributeError: pass  # returning 0d array or float, double etc
        return new_arr

@ -156,6 +165,34 @@ class Param(OptimizationHandlable, ObsAr):
    def _ensure_fixes(self):
        if not self._has_fixes(): self._fixes_ = numpy.ones(self._realsize_, dtype=bool)

+    #===========================================================================
+    # parameterizable
+    #===========================================================================
+    def traverse(self, visit, *args, **kwargs):
+        """
+        Traverse the hierarchy performing visit(self, *args, **kwargs) at every node passed by.
+        See "visitor pattern" in literature. This is implemented in pre-order fashion.
+
+        This will function will just call visit on self, as Param are leaf nodes.
+        """
+        visit(self, *args, **kwargs)
+    
+    def traverse_parents(self, visit, *args, **kwargs):
+        """
+        Traverse the hierarchy upwards, visiting all parents and their children, except self.
+        See "visitor pattern" in literature. This is implemented in pre-order fashion.
+    
+        Example:
+    
+        parents = []
+        self.traverse_parents(parents.append)
+        print parents
+        """
+        if self.has_parent():
+            self.__visited = True
+            self._parent_._traverse_parents(visit, *args, **kwargs)
+            self.__visited = False
+
    #===========================================================================
    # Convenience
    #===========================================================================
@ -316,8 +353,8 @@ class ParamConcatenation(object):
            val = val.values()
        ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
        vals = self.values(); vals[s] = val
-        [numpy.copyto(p, vals[ps], where=ind[ps])
-         for p, ps in zip(self.params, self._param_slices_)]
+        for p, ps in zip(self.params, self._param_slices_):
+            p.flat[ind[ps]] = vals[ps]
        if update:
            self.update_all_params()
    def values(self):
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@ -17,7 +17,7 @@ from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED,
 import numpy as np
 import re

-__updated__ = '2014-04-16'
+__updated__ = '2014-05-12'

 class HierarchyError(Exception):
    """
@ -124,7 +124,7 @@ class Parentable(object):
        """
        Disconnect this object from its parent
        """
-        raise NotImplementedError, "Abstaract superclass"
+        raise NotImplementedError, "Abstract superclass"

    @property
    def _highest_parent_(self):
@ -162,7 +162,6 @@ class Pickleable(object):
        :param protocol: pickling protocol to use, python-pickle for details.
        """
        import cPickle as pickle
-        import pickle #TODO: cPickle
        if isinstance(f, str):
            with open(f, 'w') as f:
                pickle.dump(self, f, protocol)
@ -177,19 +176,23 @@ class Pickleable(object):
        #raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy"
        import copy
        memo = {}
-        memo[id(self._parent_)] = None
-        memo[id(self.gradient)] = None
-        memo[id(self.param_array)] = None
-        memo[id(self._fixes_)] = None
-        c = copy.deepcopy(self, memo)
+        # the next part makes sure that we do not include parents in any form:
+        parents = []
+        self.traverse_parents(parents.append) # collect parents
+        for p in parents:
+            memo[id(p)] = None # set all parents to be None, so they will not be copied
+        memo[id(self.gradient)] = None # reset the gradient
+        memo[id(self.param_array)] = None # and param_array
+        memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
+        c = copy.deepcopy(self, memo) # and start the copy
        c._parent_index_ = None
        return c

    def __deepcopy__(self, memo):
-        s = self.__new__(self.__class__)
-        memo[id(self)] = s
+        s = self.__new__(self.__class__) # fresh instance
+        memo[id(self)] = s # be sure to break all cycles --> self is already done
        import copy
-        s.__dict__.update(copy.deepcopy(self.__dict__, memo))
+        s.__dict__.update(copy.deepcopy(self.__dict__, memo)) # standard copy
        return s

    def __getstate__(self):
@ -202,9 +205,6 @@ class Pickleable(object):
        dc = dict()
        for k,v in self.__dict__.iteritems():
            if k not in ignore_list:
-                #if hasattr(v, "__getstate__"):
-                #dc[k] = v.__getstate__()
-                #else:
                dc[k] = v
        return dc
 
@ -212,12 +212,6 @@ class Pickleable(object):
        self.__dict__.update(state)
        return self

-    #def __getstate__(self, memo):
-    #    raise NotImplementedError, "get state must be implemented to be able to pickle objects"
-
-    #def __setstate__(self, memo):
-    #    raise NotImplementedError, "set state must be implemented to be able to pickle objects"
-
 class Gradcheckable(Pickleable, Parentable):
    """
    Adds the functionality for an object to be gradcheckable.
@ -585,12 +579,6 @@ class OptimizationHandlable(Constrainable):
    def __init__(self, name, default_constraint=None, *a, **kw):
        super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)

-    def transform(self):
-        [np.put(self.param_array, ind, c.finv(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
-
-    def untransform(self):
-        [np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
-
    def _get_params_transformed(self):
        # transformed parameters (apply transformation rules)
        p = self.param_array.copy()
@ -604,15 +592,15 @@ class OptimizationHandlable(Constrainable):
        return p

    def _set_params_transformed(self, p):
-        if p is self.param_array:
-            p = p.copy()
+        if not(p is self.param_array):
            if self.has_parent() and self.constraints[__fixed__].size != 0:
                fixes = np.ones(self.size).astype(bool)
                fixes[self.constraints[__fixed__]] = FIXED
                self.param_array.flat[fixes] = p
            elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
            else: self.param_array.flat = p
-        self.untransform()
+        [np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) 
+         for c, ind in self.constraints.iteritems() if c != __fixed__]
        self._trigger_params_changed()

    def _trigger_params_changed(self, trigger_parent=True):
@ -626,7 +614,7 @@ class OptimizationHandlable(Constrainable):
    def num_params(self):
        """
        Return the number of parameters of this parameter_handle.
-        Param objects will allways return 0.
+        Param objects will always return 0.
        """
        raise NotImplemented, "Abstract, please implement in respective classes"

@ -644,6 +632,7 @@ class OptimizationHandlable(Constrainable):
        else: names = [adjust(x.name) for x in self._parameters_]
        if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
        return names
+
    def _get_param_names(self):
        n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
        return n
@ -710,12 +699,18 @@ class Parameterizable(OptimizationHandlable):
        super(Parameterizable, self).__init__(*args, **kwargs)
        from GPy.core.parameterization.lists_and_dicts import ArrayList
        self._parameters_ = ArrayList()
+        self._param_array_ = None
        self.size = 0
        self._added_names_ = set()
+        self.__visited = False # for traversing in reverse order we need to know if we were here already

    @property
    def param_array(self):
-        if not hasattr(self, '_param_array_'):
+        """
+        Array representing the parameters of this class.
+        There is only one copy of all parameters in memory, two during optimization.
+        """
+        if self._param_array_ is None:
            self._param_array_ = np.empty(self.size, dtype=np.float64)
        return self._param_array_

@ -723,6 +718,52 @@ class Parameterizable(OptimizationHandlable):
    def param_array(self, arr):
        self._param_array_ = arr

+    def traverse(self, visit, *args, **kwargs):
+        """
+        Traverse the hierarchy performing visit(self, *args, **kwargs) 
+        at every node passed by downwards. This function includes self!
+
+        See "visitor pattern" in literature. This is implemented in pre-order fashion.
+
+        Example:
+        Collect all children:
+
+        children = []
+        self.traverse(children.append)
+        print children
+        """
+        if not self.__visited:
+            visit(self, *args, **kwargs)
+            self.__visited = True
+            for c in self._parameters_:
+                c.traverse(visit, *args, **kwargs)
+            self.__visited = False
+
+    def traverse_parents(self, visit, *args, **kwargs):
+        """
+        Traverse the hierarchy upwards, visiting all parents and their children except self.
+        See "visitor pattern" in literature. This is implemented in pre-order fashion.
+    
+        Example:
+    
+        parents = []
+        self.traverse_parents(parents.append)
+        print parents
+        """
+        if self.has_parent():
+            self.__visited = True
+            self._parent_._traverse_parents(visit, *args, **kwargs)
+            self.__visited = False
+        
+    def _traverse_parents(self, visit, *args, **kwargs):
+        if not self.__visited:
+            self.__visited = True
+            visit(self, *args, **kwargs)
+            if self.has_parent():
+                self._parent_._traverse_parents(visit, *args, **kwargs)
+                self._parent_.traverse(visit, *args, **kwargs)
+            self.__visited = False
+
    #=========================================================================
    # Gradient handling
    #=========================================================================
@ -789,11 +830,10 @@ class Parameterizable(OptimizationHandlable):
        #    raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
        elif param not in self._parameters_:
            if param.has_parent():
-                parent = param._parent_
-                while parent is not None:
+                def visit(parent, self):
                    if parent is self:
                        raise HierarchyError, "You cannot add a parameter twice into the hierarchy"
-                    parent = parent._parent_
+                param.traverse_parents(visit, self)
                param._parent_.remove_parameter(param)
            # make sure the size is set
            if index is None:
@ -837,7 +877,7 @@ class Parameterizable(OptimizationHandlable):
        :param param: param object to remove from being a parameter of this parameterized object.
        """
        if not param in self._parameters_:
-            raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short())
+            raise RuntimeError, "Parameter {} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name)

        start = sum([p.size for p in self._parameters_[:param._parent_index_]])
        self._remove_parameter_name(param)
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@ -82,15 +82,15 @@ class Parameterized(Parameterizable):
        import pydot  # @UnresolvedImport
        iamroot = False
        if G is None:
-            G = pydot.Dot(graph_type='digraph')
+            G = pydot.Dot(graph_type='digraph', bgcolor=None)
            iamroot=True
-        node = pydot.Node(id(self), shape='record', label=self.name)
+        node = pydot.Node(id(self), shape='box', label=self.name)#, color='white')
        G.add_node(node)
        for child in self._parameters_:
            child_node = child.build_pydot(G)
-            G.add_edge(pydot.Edge(node, child_node))
+            G.add_edge(pydot.Edge(node, child_node))#, color='white'))

-        for o in self.observers.keys():
+        for _, o, _ in self.observers:
            label = o.name if hasattr(o, 'name') else str(o)
            observed_node = pydot.Node(id(o), label=label)
            G.add_node(observed_node)
--- a/GPy/core/parameterization/variational.py
+++ b/GPy/core/parameterization/variational.py
@ -100,6 +100,9 @@ class VariationalPosterior(Parameterized):
            n.__dict__.update(dc)
            n._parameters_[dc['mean']._parent_index_] = dc['mean']
            n._parameters_[dc['variance']._parent_index_] = dc['variance']
+            n._gradient_array_ = None
+            oversize = self.size - self.mean.size - self.variance.size
+            n.size = n.mean.size + n.variance.size + oversize
            n.ndim = n.mean.ndim
            n.shape = n.mean.shape
            n.num_data = n.mean.shape[0]
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -79,29 +79,32 @@ class SparseGP(GP):
            self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
            self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)

-    def _raw_predict(self, Xnew, full_cov=False):
+    def _raw_predict(self, Xnew, full_cov=False, kern=None):
        """
        Make a prediction for the latent function values
        """
+
+        if kern is None: kern = self.kern
+
        if not isinstance(Xnew, VariationalPosterior):
-            Kx = self.kern.K(self.Z, Xnew)
+            Kx = kern.K(self.Z, Xnew)
            mu = np.dot(Kx.T, self.posterior.woodbury_vector)
            if full_cov:
-                Kxx = self.kern.K(Xnew)
+                Kxx = kern.K(Xnew)
                var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
                #var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
                var = var.squeeze()
            else:
-                Kxx = self.kern.Kdiag(Xnew)
+                Kxx = kern.Kdiag(Xnew)
                var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
        else:
-            Kx = self.kern.psi1(self.Z, Xnew)
+            Kx = kern.psi1(self.Z, Xnew)
            mu = np.dot(Kx, self.posterior.woodbury_vector)
            if full_cov:
                raise NotImplementedError, "TODO"
            else:
-                Kxx = self.kern.psi0(self.Z, Xnew)
-                psi2 = self.kern.psi2(self.Z, Xnew)
+                Kxx = kern.psi0(self.Z, Xnew)
+                psi2 = kern.psi2(self.Z, Xnew)
                var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
        return mu, var

--- a/GPy/core/symbolic.py
+++ b/GPy/core/symbolic.py
@ -107,14 +107,14 @@ class Symbolic_core():

            # Do symbolic work to compute derivatives.        
            for key, func in self.expressions.items():
-                if func['function'].is_Matrix:
-                    rows = func['function'].shape[0]
-                    cols = func['function'].shape[1]
-                    self.expressions[key]['derivative'] = sym.zeros(rows, cols)
-                    for i in xrange(rows):
-                        for j in xrange(cols):
-                            self.expressions[key]['derivative'][i, j] = extract_derivative(func['function'][i, j], derivative_arguments)
-                else:
+                # if func['function'].is_Matrix:
+                #     rows = func['function'].shape[0]
+                #     cols = func['function'].shape[1]
+                #     self.expressions[key]['derivative'] = sym.zeros(rows, cols)
+                #     for i in xrange(rows):
+                #         for j in xrange(cols):
+                #             self.expressions[key]['derivative'][i, j] = extract_derivative(func['function'][i, j], derivative_arguments)
+                # else:
                    self.expressions[key]['derivative'] = extract_derivative(func['function'], derivative_arguments)

    def _set_parameters(self, parameters):
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -161,6 +161,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
    import GPy
    from matplotlib import pyplot as plt
    from ..util.misc import param_to_array
+    import numpy as np

    _np.random.seed(0)
    data = GPy.util.datasets.oil()
@ -174,11 +175,10 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
        m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)

    if plot:
-        y = m.Y
        fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
        m.plot_latent(ax=latent_axes, labels=m.data_labels)
-        data_show = GPy.plotting.matplot_dep.visualize.vector_show(y)
-        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean), # @UnusedVariable
+        data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
+        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable
            m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
        raw_input('Press enter to finish')
        plt.close(fig)
@ -408,13 +408,13 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
    data = GPy.util.datasets.osu_run1()
    # optimize
    m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
-    if optimize: m.optimize(messages=verbose, max_f_eval=10000)
+    if optimize: m.optimize('bfgs', messages=verbose, max_f_eval=10000)
    if plot:
        plt.clf
        ax = m.plot_latent()
        y = m.Y[0, :]
        data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
-        vis = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, latent_axes=ax)
+        vis = GPy.plotting.matplot_dep.visualize.lvm(m.X[:1, :].copy(), m, data_show, latent_axes=ax)
        raw_input('Press enter to finish')

    return m
@ -475,24 +475,28 @@ def robot_wireless(optimize=True, verbose=True, plot=True):
 def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
    from GPy.models import BayesianGPLVM
    from matplotlib import pyplot as plt
+    import numpy as np
    import GPy

    data = GPy.util.datasets.osu_run1()
    Q = 6
-    kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2))
+    kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True) 
    m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
+    
+    m.data = data
+    m.likelihood.variance = 0.001
+    
    # optimize
-    m.ensure_default_constraints()
-    if optimize: m.optimize('scg', messages=verbose, max_iters=200, xtol=1e-300, ftol=1e-300)
-    m._set_params(m._get_params())
+    if optimize: m.optimize('bfgs', messages=verbose, max_iters=800, xtol=1e-300, ftol=1e-300)
    if plot:
        plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2)
        plt.sca(latent_axes)
-        m.plot_latent()
-        y = m.likelihood.Y[0, :].copy()
-        data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
-        GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
-        raw_input('Press enter to finish')
+        m.plot_latent(ax=latent_axes)
+        y = m.Y[:1, :].copy()
+        data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect'])
+        GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
+        plt.draw()
+        #raw_input('Press enter to finish')

    return m

@ -509,7 +513,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose
    if optimize: m.optimize(messages=verbose, max_f_eval=10000)
    if plot:
        ax = m.plot_latent()
-        y = m.likelihood.Y[0, :]
+        y = m.Y[0, :]
        data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel'])
        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
        raw_input('Press enter to finish')
--- a/GPy/gpy_config.cfg
+++ b/GPy/gpy_config.cfg
@ -6,6 +6,10 @@
 # some platforms, hence this option.
 openmp=False

+[datasets]
+# location for the local data cache
+dir=$HOME/tmp/GPy-datasets/
+
 [anaconda]
 # if you have an anaconda python installation please specify it here.
 installed = False
--- a/GPy/inference/optimization/scg.py
+++ b/GPy/inference/optimization/scg.py
@ -32,7 +32,7 @@ def print_out(len_maxiters, fnow, current_grad, beta, iteration):
    sys.stdout.flush()

 def exponents(fnow, current_grad):
-    exps = [np.abs(fnow), current_grad]
+    exps = [np.abs(np.float(fnow)), current_grad]
    return np.sign(exps) * np.log10(exps).astype(int)

 def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, xtol=None, ftol=None, gtol=None):
--- a/GPy/kern/init.py
+++ b/GPy/kern/init.py
@ -3,7 +3,7 @@ from _src.rbf import RBF
 from _src.linear import Linear, LinearFull
 from _src.static import Bias, White
 from _src.brownian import Brownian
-from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
+from _src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
 from _src.mlp import MLP
 from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
 from _src.independent_outputs import IndependentOutputs, Hierarchical
@ -13,6 +13,8 @@ from _src.ODE_UY import ODE_UY
 from _src.ODE_UYC import ODE_UYC
 from _src.ODE_st import ODE_st
 from _src.ODE_t import ODE_t
+from _src.poly import Poly
+
 # TODO: put this in an init file somewhere
 #I'm commenting this out because the files were not added. JH. Remember to add the files before commiting
 try:
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@ -170,7 +170,4 @@ class Add(CombinationKernel):
        return self

    def input_sensitivity(self):
-        in_sen = np.zeros(self.input_dim)
-        for i, p in enumerate(self.parts):
-            in_sen[p.active_dims] += p.input_sensitivity()
-        return in_sen
+        return reduce(np.add, [k.input_sensitivity() for k in self.parts])
--- a/GPy/kern/_src/independent_outputs.py
+++ b/GPy/kern/_src/independent_outputs.py
@ -32,7 +32,7 @@ def index_to_slices(index):
    [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
    return ret

-class IndependentOutputs(Kern):
+class IndependentOutputs(CombinationKernel):
    """
    A kernel which can represent several independent functions.  this kernel
    'switches off' parts of the matrix where the output indexes are different.
@ -180,6 +180,9 @@ class Hierarchical(CombinationKernel):
    def Kdiag(self,X):
        return np.diag(self.K(X))

+    def gradients_X(self, dL_dK, X, X2=None):
+        raise NotImplementedError
+
    def update_gradients_full(self,dL_dK,X,X2=None):
        slices = [index_to_slices(X[:,i]) for i in self.extra_dims]
        if X2 is None:
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@ -34,36 +34,24 @@ class Kern(Parameterized):

            is the active_dimensions of inputs X we will work on.
            All kernels will get sliced Xes as inputs, if active_dims is not None
+            Only positive integers are allowed in active_dims!
            if active_dims is None, slicing is switched off and all X will be passed through as given.

        :param int input_dim: the number of input dimensions to the function
-        :param array-like|slice|None active_dims: list of indices on which dimensions this kernel works on, or none if no slicing
+        :param array-like|None active_dims: list of indices on which dimensions this kernel works on, or none if no slicing

        Do not instantiate.
        """
        super(Kern, self).__init__(name=name, *a, **kw)
-        try:
        self.input_dim = int(input_dim)
-            self.active_dims = active_dims# if active_dims is not None else slice(0, input_dim, 1)
-        except TypeError:
-            # input_dim is something else then an integer
-            self.input_dim = input_dim
-            if active_dims is not None:
-                print "WARNING: given input_dim={} is not an integer and active_dims={} is given, switching off slicing"
-            self.active_dims = None

-        if self.active_dims is not None and self.input_dim is not None:
-            assert isinstance(self.active_dims, (slice, list, tuple, np.ndarray)), 'active_dims needs to be an array-like or slice object over dimensions, {} given'.format(self.active_dims.__class__)
-            if isinstance(self.active_dims, slice):
-                self.active_dims = slice(self.active_dims.start or 0, self.active_dims.stop or self.input_dim, self.active_dims.step or 1)
-                active_dim_size = int(np.round((self.active_dims.stop-self.active_dims.start)/self.active_dims.step))
-            elif isinstance(self.active_dims, np.ndarray):
-                #assert np.all(self.active_dims >= 0), 'active dimensions need to be positive. negative indexing is not allowed'
-                assert self.active_dims.ndim == 1, 'only flat indices allowed, given active_dims.shape={}, provide only indexes to the dimensions (columns) of the input'.format(self.active_dims.shape)
-                active_dim_size = self.active_dims.size
-            else:
-                active_dim_size = len(self.active_dims)
-            assert active_dim_size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, active_dim_size, self.active_dims)
+        if active_dims is None:
+            active_dims = np.arange(input_dim)
+
+        self.active_dims = np.array(active_dims, dtype=int)
+
+        assert self.active_dims.size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, self.active_dims.size, self.active_dims)
+
        self._sliced_X = 0
        self.useGPU = self._support_GPU and useGPU

@ -176,8 +164,8 @@ class Kern(Parameterized):
        """
        Shortcut for tensor `prod`.
        """
-        assert self.active_dims == range(self.input_dim), "Can only use kernels, which have their input_dims defined from 0"
-        assert other.active_dims == range(other.input_dim), "Can only use kernels, which have their input_dims defined from 0"
+        assert np.all(self.active_dims == range(self.input_dim)), "Can only use kernels, which have their input_dims defined from 0"
+        assert np.all(other.active_dims == range(other.input_dim)), "Can only use kernels, which have their input_dims defined from 0"
        other.active_dims += self.input_dim
        return self.prod(other)

@ -202,10 +190,10 @@ class Kern(Parameterized):
        return Prod([self, other], name)

    def _check_input_dim(self, X):
-        assert X.shape[1] == self.input_dim, "You did not specify active_dims and X has wrong shape: X_dim={}, whereas input_dim={}".format(X.shape[1], self.input_dim)
+        assert X.shape[1] == self.input_dim, "{} did not specify active_dims and X has wrong shape: X_dim={}, whereas input_dim={}".format(self.name, X.shape[1], self.input_dim)

    def _check_active_dims(self, X):
-        assert X.shape[1] >= len(np.r_[self.active_dims]), "At least {} dimensional X needed, X.shape={!s}".format(len(np.r_[self.active_dims]), X.shape)
+        assert X.shape[1] >= len(self.active_dims), "At least {} dimensional X needed, X.shape={!s}".format(len(self.active_dims), X.shape)


 class CombinationKernel(Kern):
@ -222,9 +210,10 @@ class CombinationKernel(Kern):

        :param list kernels: List of kernels to combine (can be only one element)
        :param str name: name of the combination kernel
-        :param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on
+        :param array-like extra_dims: if needed extra dimensions for the combination kernel to work on
        """
        assert all([isinstance(k, Kern) for k in kernels])
+        extra_dims = np.array(extra_dims, dtype=int)
        input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims)
        # initialize the kernel with the full input_dim
        super(CombinationKernel, self).__init__(input_dim, active_dims, name)
@ -238,16 +227,18 @@ class CombinationKernel(Kern):
    def get_input_dim_active_dims(self, kernels, extra_dims = None):
        #active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
        #active_dims = np.array(np.concatenate((active_dims, extra_dims if extra_dims is not None else [])), dtype=int)
-        input_dim = np.array([k.input_dim for k in kernels])
-        if np.all(input_dim[0]==input_dim):
-            input_dim = input_dim[0]
-        active_dims = None
+        input_dim = reduce(max, (k.active_dims.max() for k in kernels)) + 1
+
+        if extra_dims is not None:
+            input_dim += extra_dims.size
+
+        active_dims = np.arange(input_dim)
        return input_dim, active_dims

    def input_sensitivity(self):
        raise NotImplementedError("Choose the kernel you want to get the sensitivity for. You need to override the default behaviour for getting the input sensitivity to be able to get the input sensitivity. For sum kernel it is the sum of all sensitivities, TODO: product kernel? Other kernels?, also TODO: shall we return all the sensitivities here in the combination kernel? So we can combine them however we want? This could lead to just plot all the sensitivities here...")

-    def _check_input_dim(self, X):
+    def _check_active_dims(self, X):
        return

    def _check_input_dim(self, X):
--- a/GPy/kern/_src/linear.py
+++ b/GPy/kern/_src/linear.py
@ -12,6 +12,7 @@ from ...core.parameterization.transformations import Logexp
 from ...util.caching import Cache_this
 from ...core.parameterization import variational
 from psi_comp import linear_psi_comp
+from ...util.config import *

 class Linear(Kern):
    """
@ -224,12 +225,23 @@ class Linear(Kern):
        AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
        AZZA = AZZA + AZZA.swapaxes(1, 2)
        AZZA_2 = AZZA/2.
+        if config.getboolean('parallel', 'openmp'):
+            pragma_string = '#pragma omp parallel for private(m,mm,q,qq,factor,tmp)'
+            header_string = '#include <omp.h>'
+            weave_options = {'headers'           : ['<omp.h>'],
+                                'extra_compile_args': ['-fopenmp -O3'],
+                                'extra_link_args'   : ['-lgomp'],
+                                'libraries': ['gomp']}
+        else:
+            pragma_string = ''
+            header_string = ''
+            weave_options = {'extra_compile_args': ['-O3']}

        #Using weave, we can exploit the symmetry of this problem:
        code = """
        int n, m, mm,q,qq;
        double factor,tmp;
-        #pragma omp parallel for private(m,mm,q,qq,factor,tmp)
+        %s
        for(n=0;n<N;n++){
          for(m=0;m<num_inducing;m++){
            for(mm=0;mm<=m;mm++){
@ -253,26 +265,36 @@ class Linear(Kern):
            }
          }
        }
-        """
+        """ % pragma_string
        support_code = """
-        #include <omp.h>
+        %s
        #include <math.h>
-        """
-        weave_options = {'headers'           : ['<omp.h>'],
-                         'extra_compile_args': ['-fopenmp -O3'],  #-march=native'],
-                         'extra_link_args'   : ['-lgomp']}
+        """ % header_string
        mu = vp.mean
        N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
-        weave.inline(code, support_code=support_code, libraries=['gomp'],
+        weave.inline(code, support_code=support_code,
                     arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
                     type_converters=weave.converters.blitz,**weave_options)


    def _weave_dpsi2_dZ(self, dL_dpsi2, Z, vp, target):
        AZA = self.variances*self._ZAinner(vp, Z)
+
+        if config.getboolean('parallel', 'openmp'):
+            pragma_string = '#pragma omp parallel for private(n,mm,q)'
+            header_string = '#include <omp.h>'
+            weave_options =  {'headers'           : ['<omp.h>'],
+                              'extra_compile_args': ['-fopenmp -O3'],
+                              'extra_link_args'   : ['-lgomp'],
+                              'libraries': ['gomp']}
+        else:
+            pragma_string = ''
+            header_string = ''
+            weave_options = {'extra_compile_args': ['-O3']}
+
        code="""
        int n,m,mm,q;
-        #pragma omp parallel for private(n,mm,q)
+        %s
        for(m=0;m<num_inducing;m++){
          for(q=0;q<input_dim;q++){
            for(mm=0;mm<num_inducing;mm++){
@ -282,18 +304,15 @@ class Linear(Kern):
            }
          }
        }
-        """
+        """ % pragma_string
        support_code = """
-        #include <omp.h>
+        %s
        #include <math.h>
-        """
-        weave_options = {'headers'           : ['<omp.h>'],
-                         'extra_compile_args': ['-fopenmp -O3'],  #-march=native'],
-                         'extra_link_args'   : ['-lgomp']}
+        """ % header_string

        N,num_inducing,input_dim = vp.mean.shape[0],Z.shape[0],vp.mean.shape[1]
        mu = param_to_array(vp.mean)
-        weave.inline(code, support_code=support_code, libraries=['gomp'],
+        weave.inline(code, support_code=support_code,
                     arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
                     type_converters=weave.converters.blitz,**weave_options)

--- a/GPy/kern/_src/poly.py
+++ b/GPy/kern/_src/poly.py
@ -0,0 +1,42 @@
+# Copyright (c) 2014, James Hensman
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+from kern import Kern
+from ...util.misc import param_to_array
+from ...core.parameterization import Param
+from ...core.parameterization.transformations import Logexp
+class Poly(Kern):
+    """
+    Polynomial kernel
+    """
+
+    def __init__(self, input_dim, variance=1., order=3., active_dims=None, name='poly'):
+        super(Poly, self).__init__(input_dim, active_dims, name)
+        self.variance = Param('variance', variance, Logexp())
+        self.add_parameter(self.variance)
+        self.order=order
+
+    def K(self, X, X2=None):
+        return (self._dot_product(X, X2) + 1.)**self.order * self.variance
+
+    def _dot_product(self, X, X2=None):
+        if X2 is None:
+            return np.dot(X, X.T)
+        else:
+            return np.dot(X, X2.T)
+
+    def Kdiag(self, X):
+        return self.variance*(np.square(X).sum(1) + 1.)**self.order
+
+    def update_gradients_full(self, dL_dK, X, X2=None):
+        self.variance.gradient = np.sum(dL_dK * (self._dot_product(X, X2) + 1.)**self.order)
+
+    def update_gradients_diag(self, dL_dKdiag, X):
+        raise NotImplementedError
+
+    def gradients_X(self, dL_dK, X, X2=None):
+        raise NotImplementedError
+
+    def gradients_X_diag(self, dL_dKdiag, X):
+        raise NotImplementedError
--- a/GPy/kern/_src/rbf.py
+++ b/GPy/kern/_src/rbf.py
@ -10,6 +10,7 @@ from GPy.util.caching import Cache_this
 from ...core.parameterization import variational
 from psi_comp import ssrbf_psi_comp
 from psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF
+from ...util.config import *

 class RBF(Stationary):
    """
@ -231,6 +232,16 @@ class RBF(Stationary):

    @Cache_this(limit=1)
    def _psi2computations(self, Z, vp):
+
+        if config.getboolean('parallel', 'openmp'):
+            pragma_string = '#pragma omp parallel for private(tmp, exponent_tmp)'
+            header_string = '#include <omp.h>'
+            libraries = ['gomp']
+        else:
+            pragma_string = ''
+            header_string = ''
+            libraries = []
+
        mu, S = vp.mean, vp.variance

        N, Q = mu.shape
@ -253,8 +264,7 @@ class RBF(Stationary):
        variance_sq = float(np.square(self.variance))
        code = """
        double tmp, exponent_tmp;
-
-        #pragma omp parallel for private(tmp, exponent_tmp)
+        %s 
        for (int n=0; n<N; n++)
        {
            for (int m=0; m<M; m++)
@ -278,20 +288,20 @@ class RBF(Stationary):
                        tmp = -Zdist_sq(m,mm,q) - tmp - half_log_denom(n,q);
                        exponent_tmp += tmp;
                    }
-                    //compute psi2 by exponontiating
+                    //compute psi2 by exponentiating
                    psi2(n,m,mm) = variance_sq * exp(exponent_tmp);
                    psi2(n,mm,m) = psi2(n,m,mm);
                }
            }
        }
-        """
+        """ % pragma_string

        support_code = """
-        #include <omp.h>
+        %s
        #include <math.h>
-        """
+        """ % header_string
        mu = param_to_array(mu)
-        weave.inline(code, support_code=support_code, libraries=['gomp'],
+        weave.inline(code, support_code=support_code, libraries=libraries,
                     arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'],
                     type_converters=weave.converters.blitz, **self.weave_options)

@ -303,12 +313,20 @@ class RBF(Stationary):
        #return 2.*np.einsum( 'ijk,ijk,ijkl,il->l', dL_dpsi2, psi2, Zdist_sq * (2.*S[:,None,None,:]/l2 + 1.) + mudist_sq + S[:, None, None, :] / l2, 1./(2.*S + l2))*self.lengthscale

        result = np.zeros(self.input_dim)
+        if config.getboolean('parallel', 'openmp'):
+            pragma_string = '#pragma omp parallel for reduction(+:tmp)'
+            header_string = '#include <omp.h>'
+            libraries = ['gomp']
+        else:
+            pragma_string = ''
+            header_string = ''
+            libraries = []
        code = """
        double tmp;
        for(int q=0; q<Q; q++)
        {
            tmp = 0.0;
-            #pragma omp parallel for reduction(+:tmp)
+            %s
            for(int n=0; n<N; n++)
            {
                for(int m=0; m<M; m++)
@ -326,16 +344,16 @@ class RBF(Stationary):
            result(q) = tmp;
        }

-        """
+        """ % pragma_string
        support_code = """
-        #include <omp.h>
+        %s
        #include <math.h>
-        """
+        """ % header_string
        N,Q = S.shape
        M = psi2.shape[-1]

        S = param_to_array(S)
-        weave.inline(code, support_code=support_code, libraries=['gomp'],
+        weave.inline(code, support_code=support_code, libraries=libraries,
                     arg_names=['psi2', 'dL_dpsi2', 'N', 'M', 'Q', 'mudist_sq', 'l2', 'Zdist_sq', 'S', 'result'],
                     type_converters=weave.converters.blitz, **self.weave_options)

--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@ -192,6 +192,27 @@ class Exponential(Stationary):
    def dK_dr(self, r):
        return -0.5*self.K_of_r(r)

+
+class OU(Stationary):
+    """
+    OU kernel:
+
+    .. math::
+
+       k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\  \\text{ where  } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
+
+    """
+
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='OU'):
+        super(OU, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
+
+    def K_of_r(self, r):
+        return self.variance * np.exp(-r)
+
+    def dK_dr(self,r):
+        return -1.*self.variance*np.exp(-r)
+
+
 class Matern32(Stationary):
    """
    Matern 3/2 kernel:
--- a/GPy/likelihoods/init.py
+++ b/GPy/likelihoods/init.py
@ -8,16 +8,16 @@ from likelihood import Likelihood
 from mixed_noise import MixedNoise
 #TODO need to fix this in a config file.
 #TODO need to add the files to the git repo!
-#try:
-    #import sympy as sym
-    #sympy_available=True
-#except ImportError:
-    #sympy_available=False
-#if sympy_available:
-    ## These are likelihoods that rely on symbolic.
-    #from symbolic import Symbolic
-    #from sstudent_t import SstudentT
-    #from negative_binomial import Negative_binomial
-    ##from skew_normal import Skew_normal
-    #from skew_exponential import Skew_exponential
+try:
+    import sympy as sym
+    sympy_available=True
+except ImportError:
+    sympy_available=False
+if sympy_available:
+    #These are likelihoods that rely on symbolic.
+    from symbolic import Symbolic
+    from sstudent_t import SstudentT
+    from negative_binomial import Negative_binomial
+    from skew_normal import Skew_normal
+    from skew_exponential import Skew_exponential
 #    from null_category import Null_category
--- a/GPy/likelihoods/ordinal.py
+++ b/GPy/likelihoods/ordinal.py
@ -0,0 +1,48 @@
+# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+import sympy as sym
+from GPy.util.symbolic import gammaln, normcdfln, normcdf, IndMatrix, create_matrix
+import numpy as np
+from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
+import link_functions
+from symbolic import Symbolic
+from scipy import stats
+
+class Ordinal(Symbolic):
+    """
+    Ordinal
+
+    .. math::
+        p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
+
+    .. Note::
+        Y takes non zero integer values..
+        link function should have a positive domain, e.g. log (default).
+
+    .. See also::
+        symbolic.py, for the parent class
+    """
+    def __init__(self, categories=3, gp_link=None):
+        if gp_link is None:
+            gp_link = link_functions.Identity()
+
+        dispersion = sym.Symbol('width', positive=True, real=True)
+        y_0 = sym.Symbol('y_0', nonnegative=True, integer=True)
+        f_0 = sym.Symbol('f_0', positive=True, real=True) 
+        log_pdf = create_matrix('log_pdf', 1, categories)
+        log_pdf[0] = normcdfln(-f_0)
+        if categories>2:
+            w = create_matrix('w', 1, categories)
+            log_pdf[categories-1] = normcdfln(w.sum() + f_0)
+            for i in range(1, categories-1):
+                log_pdf[i] = sym.log(normcdf(w[0, 0:i-1].sum() + f_0) - normcdf(w[0, 0:i].sum()-f_0) )
+        else:
+            log_pdf[1] = normcdfln(f_0)
+        log_pdf.index_var = y_0
+        super(Ordinal, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Ordinal')
+
+        # TODO: Check this.
+        self.log_concave = True
+
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@ -42,7 +42,7 @@ class BayesianGPLVM(SparseGP):
        assert Z.shape[1] == X.shape[1]

        if kernel is None:
-            kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
+            kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim)

        if likelihood is None:
            likelihood = Gaussian()
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@ -97,7 +97,7 @@ def plot_latent(model, labels=None, which_indices=None,
        elif type(ul) is np.int64:
            this_label = 'class %i' % ul
        else:
-            this_label = 'class %i' % i
+            this_label = unicode(ul)
        m = marker.next()

        index = np.nonzero(labels == ul)[0]
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@ -14,7 +14,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
        which_data_ycols='all', fixed_inputs=[],
        levels=20, samples=0, fignum=None, ax=None, resolution=None,
        plot_raw=False,
-        linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None):
+        linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx'):
    """
    Plot the posterior of the GP.
      - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -97,7 +97,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',

        for d in which_data_ycols:
            plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
-            plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
+            plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)

        #optionally plot some samples
        if samples: #NOTE not tested with fixed_inputs
--- a/GPy/plotting/matplot_dep/visualize.py
+++ b/GPy/plotting/matplot_dep/visualize.py
@ -74,13 +74,16 @@ class vector_show(matplotlib_show):
    """
    def __init__(self, vals, axes=None):
        matplotlib_show.__init__(self, vals, axes)
-        self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals)
+        #assert vals.ndim == 2, "Please give a vector in [n x 1] to plot"
+        #assert vals.shape[1] == 1, "only showing a vector in one dimension"
+        self.size = vals.size
+        self.handle = self.axes.plot(np.arange(0, vals.size)[:, None], vals)[0]

    def modify(self, vals):
        self.vals = vals.copy()
-        for handle, vals in zip(self.handle, self.vals.T):
-            xdata, ydata = handle.get_data()
-            handle.set_data(xdata, vals)
+        xdata, ydata = self.handle.get_data()
+        assert vals.size == self.size, "values passed into modify changed size! vals.size:{} != in.size:{}".format(vals.size, self.size)
+        self.handle.set_data(xdata, self.vals)
        self.axes.figure.canvas.draw()


@ -94,13 +97,12 @@ class lvm(matplotlib_show):
        :type data_visualize: visualize.data_show  type.
        :param latent_axes: the axes where the latent visualization should be plotted.
        """
-        if vals == None:
+        if vals is None:
            if isinstance(model.X, VariationalPosterior):
                vals = param_to_array(model.X.mean)
            else:
                vals = param_to_array(model.X)

-        vals = param_to_array(vals)
        matplotlib_show.__init__(self, vals, axes=latent_axes)

        if isinstance(latent_axes,mpl.axes.Axes):
@ -273,7 +275,7 @@ class image_show(matplotlib_show):
    :type preset_mean: double
    :param preset_std: the preset standard deviation of a scaled image.
    :type preset_std: double"""
-    def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, order='C', invert=False, scale=False, palette=[], preset_mean = 0., preset_std = -1., select_image=0):
+    def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, order='C', invert=False, scale=False, palette=[], preset_mean=0., preset_std=1., select_image=0):
        matplotlib_show.__init__(self, vals, axes)
        self.dimensions = dimensions
        self.transpose = transpose
@ -323,13 +325,12 @@ class image_show(matplotlib_show):
            self.vals = -self.vals

        # un-normalizing, for visualisation purposes:
-        if self.preset_std >= 0: # The Mean is assumed to be in the range (0,255)
        self.vals = self.vals*self.preset_std + self.preset_mean
        # Clipping the values:
-            self.vals[self.vals < 0] = 0
-            self.vals[self.vals > 255] = 255
-        else:
-            self.vals = 255*(self.vals - self.vals.min())/(self.vals.max() - self.vals.min())
+        #self.vals[self.vals < 0] = 0
+        #self.vals[self.vals > 255] = 255
+        #else:
+            #self.vals = 255*(self.vals - self.vals.min())/(self.vals.max() - self.vals.min())
        if not self.palette == []: # applying using an image palette (e.g. if the image has been quantized)
            from PIL import Image
            self.vals = Image.fromarray(self.vals.astype('uint8'))
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@ -304,23 +304,13 @@ class KernelTestsMiscellaneous(unittest.TestCase):
    def setUp(self):
        N, D = 100, 10
        self.X = np.linspace(-np.pi, +np.pi, N)[:,None] * np.random.uniform(-10,10,D)
-        self.rbf = GPy.kern.RBF(2, active_dims=slice(0,4,2))
+        self.rbf = GPy.kern.RBF(2, active_dims=np.arange(0,4,2))
        self.linear = GPy.kern.Linear(2, active_dims=(3,9))
        self.matern = GPy.kern.Matern32(3, active_dims=np.array([1,7,9]))
        self.sumkern = self.rbf + self.linear
        self.sumkern += self.matern
        self.sumkern.randomize()

-    def test_active_dims(self):
-        # test the automatic dim detection expression for slices:
-        start, stop = 0, 277
-        for i in range(start,stop,7):
-            for j in range(1,4):
-                GPy.kern.Kern(int(np.round((i+1)/j)), slice(0, i+1, j), "testkern")
-        # test the ability to have only one dim
-        sk = GPy.kern.RBF(2) + GPy.kern.Matern32(2)
-        self.assertEqual(sk.input_dim, 2)
-
    def test_which_parts(self):
        self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.matern]), self.linear.K(self.X)+self.matern.K(self.X)))
        self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.rbf]), self.linear.K(self.X)+self.rbf.K(self.X)))
@ -344,10 +334,15 @@ class KernelTestsNonContinuous(unittest.TestCase):
        self.X2[(N0*2):, -1] = 1

    def test_IndependentOutputs(self):
-        k = GPy.kern.RBF(self.D)
+        k = GPy.kern.RBF(self.D, active_dims=range(self.D))
        kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
        self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
-        k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
+        k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, active_dims=range(self.D), name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
+        kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
+        self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
+
+    def test_Hierarchical(self):
+        k = [GPy.kern.RBF(2, active_dims=[0,2], name='rbf1'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf2')]
        kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
        self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))

--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@ -27,11 +27,11 @@ class ArrayCoreTest(unittest.TestCase):
 class ParameterizedTest(unittest.TestCase):

    def setUp(self):
-        self.rbf = GPy.kern.RBF(1)
+        self.rbf = GPy.kern.RBF(20)
        self.white = GPy.kern.White(1)
        from GPy.core.parameterization import Param
        from GPy.core.parameterization.transformations import Logistic
-        self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
+        self.param = Param('param', np.random.uniform(0,1,(25,2)), Logistic(0, 1))

        self.test1 = GPy.core.Parameterized("test model")
        self.test1.param = self.param
@ -142,6 +142,8 @@ class ParameterizedTest(unittest.TestCase):
        self.testmodel.randomize()
        self.assertEqual(val, self.testmodel.kern.lengthscale)

+    
+    
    def test_regular_expression_misc(self):
        self.testmodel.kern.lengthscale.fix()
        val = float(self.testmodel.kern.lengthscale)
--- a/GPy/testing/pickle_tests.py
+++ b/GPy/testing/pickle_tests.py
@ -132,6 +132,9 @@ class Test(ListDictTestCase):
        self.assertIsNot(par.full_gradient, pcopy.full_gradient)
        self.assertTrue(pcopy.checkgrad())
        self.assert_(np.any(pcopy.gradient!=0.0))
+        pcopy.optimize('bfgs')
+        par.optimize('bfgs')
+        np.testing.assert_allclose(pcopy.param_array, par.param_array, atol=.001)
        with tempfile.TemporaryFile('w+b') as f:
            par.pickle(f)
            f.seek(0)
--- a/GPy/util/caching.py
+++ b/GPy/util/caching.py
@ -1,84 +1,108 @@
 from ..core.parameterization.parameter_core import Observable
-import itertools
+import itertools, collections, weakref

 class Cacher(object):
-    """


-
-
-    """
-
    def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()):
+        """
+        Parameters:
+        ***********
+        :param callable operation: function to cache
+        :param int limit: depth of cacher
+        :param [int] ignore_args: list of indices, pointing at arguments to ignore in *args of operation(*args). This includes self!
+        :param [str] force_kwargs: list of kwarg names (strings). If a kwarg with that name is given, the cacher will force recompute and wont cache anything.
+        """
        self.limit = int(limit)
        self.ignore_args = ignore_args
        self.force_kwargs = force_kwargs
        self.operation=operation
-        self.cached_inputs = []
-        self.cached_outputs = []
-        self.inputs_changed = []
+        self.order = collections.deque()
+        self.cached_inputs = {} # point from cache_ids to a list of [ind_ids], which where used in cache cache_id
+
+        #=======================================================================
+        # point from each ind_id to [ref(obj), cache_ids]
+        # 0: a weak reference to the object itself
+        # 1: the cache_ids in which this ind_id is used (len will be how many times we have seen this ind_id)
+        self.cached_input_ids = {} 
+        #=======================================================================
+
+        self.cached_outputs = {} # point from cache_ids to outputs
+        self.inputs_changed = {} # point from cache_ids to bools
+
+    def combine_args_kw(self, args, kw):
+        "Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute"
+        return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0]))
+
+    def preprocess(self, combined_args_kw, ignore_args):
+        "get the cacheid (conc. string of argument ids in order) ignoring ignore_args"
+        return "".join(str(id(a)) for i,a in enumerate(combined_args_kw) if i not in ignore_args)
+
+    def ensure_cache_length(self, cache_id):
+        "Ensures the cache is within its limits and has one place free"
+        if len(self.order) == self.limit:
+            # we have reached the limit, so lets release one element
+            cache_id = self.order.popleft()
+            combined_args_kw = self.cached_inputs[cache_id]
+            for ind in combined_args_kw:
+                ind_id = id(ind)
+                ref, cache_ids = self.cached_input_ids[ind_id]
+                if len(cache_ids) == 1 and ref() is not None:
+                    ref().remove_observer(self, self.on_cache_changed)
+                    del self.cached_input_ids[ind_id]
+                else:
+                    cache_ids.remove(cache_id)
+                    self.cached_input_ids[ind_id] = [ref, cache_ids]
+            del self.cached_outputs[cache_id]
+            del self.inputs_changed[cache_id]
+            del self.cached_inputs[cache_id]
+
+    def add_to_cache(self, cache_id, combined_args_kw, output):
+        self.inputs_changed[cache_id] = False
+        self.cached_outputs[cache_id] = output
+        self.order.append(cache_id)
+        self.cached_inputs[cache_id] = combined_args_kw
+        for a in combined_args_kw:
+            ind_id = id(a)
+            v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []])
+            v[1].append(cache_id)
+            if len(v[1]) == 1:
+                a.add_observer(self, self.on_cache_changed)
+            self.cached_input_ids[ind_id] = v

    def __call__(self, *args, **kw):
        """
        A wrapper function for self.operation,
        """

-        #ensure that specified arguments are ignored
-        items = sorted(kw.items(), key=lambda x: x[0])
-        oa_all = args + tuple(a for _,a in items)
-        if len(self.ignore_args) != 0:
-            oa = [a for i,a in itertools.chain(enumerate(args), items) if i not in self.ignore_args and i not in self.force_kwargs]
-        else:
-            oa = oa_all
-
-        # this makes sure we only add an observer once, and that None can be in args
-        observable_args = []
-        for a in oa:
-            if (not any(a is ai for ai in observable_args)) and a is not None:
-                observable_args.append(a)
-
-        #make sure that all the found argument really are observable:
-        #otherswise don't cache anything, pass args straight though
-        if not all([isinstance(arg, Observable) for arg in observable_args]):
-            return self.operation(*args, **kw)
-
+        # 1: Check whether we have forced recompute arguments:
        if len(self.force_kwargs) != 0:
-            # check if there are force args, which force reloading
            for k in self.force_kwargs:
                if k in kw and kw[k] is not None:
                    return self.operation(*args, **kw)
-        # TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
-        # return self.operation(*args, **kw)

-        #if the result is cached, return the cached computation
-        state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
+        # 2: preprocess and get the unique id string for this call
+        combined_args_kw = self.combine_args_kw(args, kw)
+        cache_id = self.preprocess(combined_args_kw, self.ignore_args)
+
+        # 2: if anything is not cachable, we will just return the operation, without caching
+        if reduce(lambda a,b: a or (not isinstance(b, Observable)), combined_args_kw, False):
+            return self.operation(*args, **kw)
+        # 3&4: check whether this cache_id has been cached, then has it changed?
        try:
-            if any(state):
-                i = state.index(True)
-                if self.inputs_changed[i]:
-                    #(elements of) the args have changed since we last computed: update
-                    self.cached_outputs[i] = self.operation(*args, **kw)
-                    self.inputs_changed[i] = False
-                return self.cached_outputs[i]
-            else:
-                #first time we've seen these arguments: compute
-
-                #first make sure the depth limit isn't exceeded
-                if len(self.cached_inputs) == self.limit:
-                    args_ = self.cached_inputs.pop(0)
-                    args_ = [a for i,a in enumerate(args_) if i not in self.ignore_args and i not in self.force_kwargs]
-                    [a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None]
-                    self.inputs_changed.pop(0)
-                    self.cached_outputs.pop(0)
-                #compute
-                self.cached_inputs.append(oa_all)
-                self.cached_outputs.append(self.operation(*args, **kw))
-                self.inputs_changed.append(False)
-                [a.add_observer(self, self.on_cache_changed) for a in observable_args]
-                return self.cached_outputs[-1]#return
+            if(self.inputs_changed[cache_id]):
+                # 4: This happens, when one element has changed for this cache id
+                self.inputs_changed[cache_id] = False
+                self.cached_outputs[cache_id] = self.operation(*args, **kw)
+        except KeyError:
+            # 3: This is when we never saw this chache_id:
+            self.ensure_cache_length(cache_id)
+            self.add_to_cache(cache_id, combined_args_kw, self.operation(*args, **kw))
        except:
            self.reset()
            raise
+        # 5: We have seen this cache_id and it is cached:
+        return self.cached_outputs[cache_id]

    def on_cache_changed(self, direct, which=None):
        """
@ -86,17 +110,19 @@ class Cacher(object):

        this function gets 'hooked up' to the inputs when we cache them, and upon their elements being changed we update here.
        """
-        self.inputs_changed = [any([a is direct or a is which for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)]
+        for ind_id in [id(direct), id(which)]:
+            _, cache_ids = self.cached_input_ids.get(ind_id, [None, []])
+            for cache_id in cache_ids:
+                self.inputs_changed[cache_id] = True

    def reset(self):
        """
        Totally reset the cache
        """
-        [[a.remove_observer(self, self.on_cache_changed) for a in args if isinstance(a, Observable)] for args in self.cached_inputs]
-        [[a.remove_observer(self, self.reset) for a in args if isinstance(a, Observable)] for args in self.cached_inputs]
-        self.cached_inputs = []
-        self.cached_outputs = []
-        self.inputs_changed = []
+        [a().remove_observer(self, self.on_cache_changed) if (a() is not None) else None for [a, _] in self.cached_input_ids.values()]
+        self.cached_input_ids = {}
+        self.cached_outputs = {}
+        self.inputs_changed = {}

    def __deepcopy__(self, memo=None):
        return Cacher(self.operation, self.limit, self.ignore_args, self.force_kwargs)
--- a/GPy/util/data_resources.json
+++ b/GPy/util/data_resources.json
@ -1,65 +1,340 @@
 {
-   "rogers_girolami_data":{
-      "files":[
-         [
-            "firstcoursemldata.tar.gz"
-         ]
-      ],
-      "license":null,
-      "citation":"A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146",
-      "details":"Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.",
-      "urls":[
-         "https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/"
-      ],
-      "suffices":[
-         [
-            "?dl=1"
-         ]
-      ],
-      "size":21949154
-   },
    "ankur_pose_data": {
+        "citation": "3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.",
+        "details": "Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing.",
        "files": [
            [
                "ankurDataPoseSilhouette.mat"
            ]
        ],
-      "citation":"3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.",
        "license": null,
+        "size": 1,
        "urls": [
            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/ankur_pose_data/"
+        ]
+    },
+    "boston_housing": {
+        "citation": "Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.",
+        "details": "The Boston Housing data relates house values in Boston to a range of input variables.",
+        "files": [
+            [
+                "Index",
+                "housing.data",
+                "housing.names"
+            ]
        ],
-      "details":"Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing.",
-      "size":1
+        "license": null,
+        "size": 51276,
+        "urls": [
+            "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/"
+        ]
+    },
+    "boxjenkins_airline": {
+        "citation": "Box & Jenkins (1976), in file: data/airpass, Description: International airline passengers: monthly totals in thousands. Jan 49 \\u2013 Dec 60",
+        "details": "International airline passengers, monthly totals from January 1949 to December 1960.",
+        "files": [
+            [
+                "boxjenkins_airline.csv"
+            ]
+        ],
+        "license": "You may copy and redistribute the data. You may make derivative works from the data. You may use the data for commercial purposes. You may not sublicence the data when redistributing it. You may not redistribute the data under a different license. Source attribution on any use of this data: Must refer source.",
+        "size": 46779,
+        "urls": [
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/boxjenkins_airline/"
+        ]
+    },
+    "brendan_faces": {
+        "citation": "Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.",
+        "details": "A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.",
+        "files": [
+            [
+                "frey_rawface.mat"
+            ]
+        ],
+        "license": null,
+        "size": 1100584,
+        "urls": [
+            "http://www.cs.nyu.edu/~roweis/data/"
+        ]
+    },
+    "cmu_mocap_full": {
+        "citation": "Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.\\nThe database was created with funding from NSF EIA-0196217.",
+        "details": "CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.",
+        "files": [
+            [
+                "allasfamc.zip"
+            ]
+        ],
+        "license": "From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.",
+        "size": null,
+        "urls": [
+            "http://mocap.cs.cmu.edu/subjects"
+        ]
+    },
+    "creep_rupture": {
+        "citation": "Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.",
+        "details": "Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.",
+        "files": [
+            [
+                "creeprupt.tar"
+            ]
+        ],
+        "license": null,
+        "size": 602797,
+        "urls": [
+            "http://www.msm.cam.ac.uk/map/data/tar/"
+        ]
+    },
+    "decampos_characters": {
+        "citation": "T. de Campos, B. R. Babu, and M. Varma. Character recognition in natural images. VISAPP 2009.",
+        "details": "Examples of hand written digits taken from the de Campos et al paper on Character Recognition in Natural Images.",
+        "files": [
+            [
+                "characters.npy",
+                "digits.npy"
+            ]
+        ],
+        "license": null,
+        "size": 2031872,
+        "urls": [
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/decampos_digits/"
+        ]
+    },
+    "della_gatta": {
+        "citation": "Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008",
+        "details": "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
+        "files": [
+            [
+                "DellaGattadata.mat"
+            ]
+        ],
+        "license": null,
+        "size": 3729650,
+        "urls": [
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/della_gatta/"
+        ]
+    },
+    "epomeo_gpx": {
+        "citation": "",
+        "details": "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
+        "files": [
+            [
+                "endomondo_1.gpx",
+                "endomondo_2.gpx",
+                "garmin_watch_via_endomondo.gpx",
+                "viewranger_phone.gpx",
+                "viewranger_tablet.gpx"
+            ]
+        ],
+        "license": null,
+        "size": 2031872,
+        "urls": [
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/epomeo_gpx/"
+        ]
    },
    "football_data": {
+        "citation": "",
+        "details": "Results of English football matches since 1993/94 season.",
        "files": [
            [
-	     "E0.csv", "E1.csv", "E2.csv", "E3.csv"
+                "E0.csv",
+                "E1.csv",
+                "E2.csv",
+                "E3.csv"
            ]
        ],
-      "citation":"",
        "license": null,
+        "size": 1,
        "urls": [
            "http://www.football-data.co.uk/mmz4281/"
-      ],
-      "details":"Results of English football matches since 1993/94 season.",
-      "size":1
+        ]
    },
-   "google_trends":{
+    "fruitfly_tomancak": {
+        "citation": "",
+        "details": "",
        "files": [
            [
+                "tomancak_exprs.csv",
+                "tomancak_se.csv",
+                "tomancak_prctile5.csv",
+                "tomancak_prctile25.csv",
+                "tomancak_prctile50.csv",
+                "tomancak_prctile75.csv",
+                "tomancak_prctile95.csv"
            ]
        ],
-      "citation":"",
        "license": null,
+        "size": 59000000,
+        "urls": [
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/fruitfly_tomancak/"
+        ]
+    },
+    "fruitfly_tomancak_cel_files": {
+        "citation": "'Systematic determination of patterns of gene expression during Drosophila embryogenesis' Pavel Tomancak, Amy Beaton, Richard Weiszmann, Elaine Kwan, ShengQiang Shu, Suzanna E Lewis, Stephen Richards, Michael Ashburner, Volker Hartenstein, Susan E Celniker, and Gerald M Rubin",
+        "details": "Gene expression results from blastoderm development in Drosophila Melanogaster.",
+        "files": [ 
+            [
+                "embryo_tc_4_1.CEL",
+                "embryo_tc_4_2.CEL",
+                "embryo_tc_4_3.CEL",
+                "embryo_tc_4_4.CEL",
+                "embryo_tc_4_5.CEL",
+                "embryo_tc_4_6.CEL",
+                "embryo_tc_4_7.CEL",
+                "embryo_tc_4_8.CEL",
+                "embryo_tc_4_9.CEL",
+                "embryo_tc_4_10.CEL",
+                "embryo_tc_4_11.CEL",
+                "embryo_tc_4_12.CEL",
+                "embryo_tc_6_1.CEL",
+                "embryo_tc_6_2.CEL",
+                "embryo_tc_6_3.CEL",
+                "embryo_tc_6_4.CEL",
+                "embryo_tc_6_5.CEL",
+                "embryo_tc_6_6.CEL",
+                "embryo_tc_6_7.CEL",
+                "embryo_tc_6_8.CEL",
+                "embryo_tc_6_9.CEL",
+                "embryo_tc_6_10.CEL",
+                "embryo_tc_6_11.CEL",
+                "embryo_tc_6_12.CEL",
+                "embryo_tc_8_1.CEL",
+                "embryo_tc_8_2.CEL",
+                "embryo_tc_8_3.CEL",
+                "embryo_tc_8_4.CEL",
+                "embryo_tc_8_5.CEL",
+                "embryo_tc_8_6.CEL",
+                "embryo_tc_8_7.CEL",
+                "embryo_tc_8_8.CEL",
+                "embryo_tc_8_9.CEL",
+                "embryo_tc_8_10.CEL",
+                "embryo_tc_8_11.CEL",
+                "embryo_tc_8_12.CEL",
+                "CG_AffyOligo_Gadfly3_01_13_03",
+                "embryo_tc_rma_release2.txt",
+                "embryo_tc_rma_release3.txt",
+                "na_affy_oligo.dros",
+                "README.TXT"
+            ]
+        ],
+        "license": null,
+        "size": 389000000,
+        "urls": [
+            "ftp://ftp.fruitfly.org/pub/embryo_tc_array_data/"
+        ]
+    },
+    "google_trends": {
+        "citation": "",
+        "details": "Google trends results.",
+        "files": [
+            [
+                
+            ]
+        ],
+        "license": null,
+        "size": 0,
        "urls": [
            "http://www.google.com/trends/"
+        ]
+    },
+    
+    "hapmap3": {
+        "citation": "Gibbs, Richard A., et al. 'The international HapMap project.' Nature 426.6968 (2003): 789-796.",
+        "details": "HapMap Project: Single Nucleotide Polymorphism sequenced in all human populations. \n        The HapMap phase three SNP dataset - 1184 samples out of 11 populations.\n        See http://www.nature.com/nature/journal/v426/n6968/abs/nature02168.html for details.\n\n        SNP_matrix (A) encoding [see Paschou et all. 2007 (PCA-Correlated SNPs...)]:\n        Let (B1,B2) be the alphabetically sorted bases, which occur in the j-th SNP, then\n\n              /  1, iff SNPij==(B1,B1)\n        Aij = |  0, iff SNPij==(B1,B2)\n              \\\\ -1, iff SNPij==(B2,B2)\n\n        The SNP data and the meta information (such as iid, sex and phenotype) are\n        stored in the dataframe datadf, index is the Individual ID, \n        with following columns for metainfo:\n\n            * family_id   -> Family ID\n            * paternal_id -> Paternal ID\n            * maternal_id -> Maternal ID\n            * sex         -> Sex (1=male; 2=female; other=unknown)\n            * phenotype   -> Phenotype (-9, or 0 for unknown)\n            * population  -> Population string (e.g. 'ASW' - 'YRI')\n            * rest are SNP rs (ids)\n\n        More information is given in infodf:\n\n            * Chromosome:\n                - autosomal chromosemes                -> 1-22\n                - X    X chromosome                    -> 23\n                - Y    Y chromosome                    -> 24\n                - XY   Pseudo-autosomal region of X    -> 25\n                - MT   Mitochondrial                   -> 26\n            * Relative Positon (to Chromosome) [base pairs]\n\n        ",
+        "files": [
+            [
+                "hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2",
+                "hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2",
+                "relationships_w_pops_121708.txt"
+            ]
        ],
-      "details":"Google trends results.",
-      "size":0
+        "license": "International HapMap Project Public Access License (http://hapmap.ncbi.nlm.nih.gov/cgi-perl/registration#licence)",
+        "size": 3458246739,
+        "urls": [
+            "http://hapmap.ncbi.nlm.nih.gov/downloads/genotypes/latest_phaseIII_ncbi_b36/plink_format/"
+        ]
+    },
+    "isomap_face_data": {
+        "citation": "A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
+        "details": "Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
+        "files": [
+            [
+                "face_data.mat"
+            ]
+        ],
+        "license": null,
+        "size": 24229368,
+        "urls": [
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/isomap_face_data/"
+        ]
+    },
+    "mauna_loa": {
+        "citation": "Mauna Loa Data. Dr. Pieter Tans, NOAA/ESRL (www.esrl.noaa.gov/gmd/ccgg/trends/) and Dr. Ralph Keeling, Scripps Institution of Oceanography (scrippsco2.ucsd.edu/).",
+        "details": "The 'average' column contains the monthly mean CO2 mole fraction determined from daily averages.  The mole fraction of CO2, expressed as parts per million (ppm) is the number of molecules of CO2 in every one million molecules of dried air (water vapor removed).  If there are missing days concentrated either early or late in the month, the monthly mean is corrected to the middle of the month using the average seasonal cycle.  Missing months are denoted by -99.99. The 'interpolated' column includes average values from the preceding column and interpolated values where data are missing.  Interpolated values are computed in two steps.  First, we compute for each month the average seasonal cycle in a 7-year window around each monthly value.  In this way the seasonal cycle is allowed to change slowly over time.  We then determine the 'trend' value for each month by removing the seasonal cycle; this result is shown in the 'trend' column.  Trend values are linearly interpolated for missing months. The interpolated monthly mean is then the sum of the average seasonal cycle value and the trend value for the missing month.\n\nNOTE: In general, the data presented for the last year are subject to change, depending on recalibration of the reference gas mixtures used, and other quality control procedures. Occasionally, earlier years may also be changed for the same reasons.  Usually these changes are minor.\n\nCO2 expressed as a mole fraction in dry air, micromol/mol, abbreviated as ppm \n\n (-99.99 missing data;  -1 no data for daily means in month)",
+        "files": [
+            [
+                "co2_mm_mlo.txt"
+            ]
+        ],
+        "license": "-------------------------------------------------------------------- USE OF NOAA ESRL DATA\n\n  These data are made freely available to the public and the scientific community in the belief that their wide dissemination will lead to greater understanding and new scientific insights. The availability of these data does not constitute publication of the data.  NOAA relies on the ethics and integrity of the user to insure that ESRL receives fair credit for their work.  If the data  are obtained for potential use in a publication or presentation,  ESRL should be informed at the outset of the nature of this work.   If the ESRL data are essential to the work, or if an important  result or conclusion depends on the ESRL data, co-authorship may be appropriate.  This should be discussed at an early stage in the work.  Manuscripts using the ESRL data should be sent to ESRL for review before they are submitted for publication so we can insure that the quality and limitations of the data are accurately represented.\n\n  Contact:   Pieter Tans (303 497 6678; pieter.tans@noaa.gov)\n\n  RECIPROCITY  Use of these data implies an agreement to reciprocate. Laboratories making similar measurements agree to make their own data available to the general public and to the scientific community in an equally complete and easily accessible form. Modelers are encouraged to make available to the community, upon request, their own tools used in the interpretation of the ESRL data, namely well documented model code, transport fields, and additional information necessary for other scientists to repeat the work and to run modified versions. Model availability includes collaborative support for new users of the models.\n --------------------------------------------------------------------\n\n     See www.esrl.noaa.gov/gmd/ccgg/trends/ for additional details.",
+        "size": 46779,
+        "urls": [
+            "ftp://aftp.cmdl.noaa.gov/products/trends/co2/"
+        ]
+    },
+    "olivetti_faces": {
+        "citation": "Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994",
+        "details": "Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. ",
+        "files": [
+            [
+                "att_faces.zip"
+            ],
+            [
+                "olivettifaces.mat"
+            ]
+        ],
+        "license": null,
+        "size": 8561331,
+        "urls": [
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/",
+            "http://www.cs.nyu.edu/~roweis/data/"
+        ]
+    },
+    "olivetti_glasses": {
+        "citation": "Information recorded in olivetti_faces entry. Should be used from there.",
+        "details": "Information recorded in olivetti_faces entry. Should be used from there.",
+        "files": [
+            [
+                "has_glasses.np"
+            ],
+            [
+                "olivettifaces.mat"
+            ]
+        ],
+        "license": null,
+        "size": 4261047,
+        "urls": [
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/",
+            "http://www.cs.nyu.edu/~roweis/data/"
+        ]
+    },
+    "olympic_marathon_men": {
+        "citation": null,
+        "details": "Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data",
+        "files": [
+            [
+                "olympicMarathonTimes.csv"
+            ]
+        ],
+        "license": null,
+        "size": 584,
+        "urls": [
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olympic_marathon_men/"
+        ]
    },
    "osu_accad": {
+        "citation": "The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.",
+        "details": "Motion capture data of different motions from the Open Motion Data Project at Ohio State University.",
        "files": [
            [
                "swagger1TXT.ZIP",
@ -82,101 +357,47 @@
            ]
        ],
        "license": "Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).",
-      "citation":"The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.",
-      "details":"Motion capture data of different motions from the Open Motion Data Project at Ohio State University.",
+        "size": 15922790,
        "urls": [
            "http://accad.osu.edu/research/mocap/data/",
            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/"
-      ],
-      "size":15922790
+        ]
    },
-   "isomap_face_data":{
+    "osu_run1": {
+        "citation": "The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.",
+        "details": "Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.",
        "files": [
            [
-            "face_data.mat"
-         ]
+                "run1TXT.ZIP"
            ],
-      "license":null,
-      "citation":"A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
-      "details":"Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
-      "urls":[
-         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/isomap_face_data/"
-      ],
-      "size":24229368
-   },
-   "boston_housing":{
-      "files":[
            [
-            "Index",
-            "housing.data",
-            "housing.names"
+                "connections.txt"
            ]
        ],
-      "license":null,
-      "citation":"Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.",
-      "details":"The Boston Housing data relates house values in Boston to a range of input variables.",
+        "license": "Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).",
+        "size": 338103,
        "urls": [
-         "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/"
-      ],
-      "size":51276
-   },
-   "cmu_mocap_full":{
-      "files":[
-         [
-            "allasfamc.zip"
+            "http://accad.osu.edu/research/mocap/data/",
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/"
        ]
-      ],
-      "license":"From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.",
-      "citation":"Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.\nThe database was created with funding from NSF EIA-0196217.",
-      "details":"CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.",
-      "urls":[
-         "http://mocap.cs.cmu.edu/subjects"
-      ],
-      "size":null
-   },
-   "brendan_faces":{
-      "files":[
-         [
-            "frey_rawface.mat"
-         ]
-      ],
-      "license":null,
-      "citation":"Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.",
-      "details":"A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.",
-      "urls":[
-         "http://www.cs.nyu.edu/~roweis/data/"
-      ],
-      "size":1100584
-   },
-   "olympic_marathon_men":{
-      "files":[
-         [
-            "olympicMarathonTimes.csv"
-         ]
-      ],
-      "license":null,
-      "citation":null,
-      "details":"Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data",
-      "urls":[
-         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olympic_marathon_men/"
-      ],
-      "size":584
    },
    "pumadyn-32nm": {
+        "citation": "Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.",
+        "details": "Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.",
        "files": [
            [
                "pumadyn-32nm.tar.gz"
            ]
        ],
        "license": "Data is made available by the Delve system at the University of Toronto",
-      "citation":"Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.",
-      "details":"Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.",
+        "size": 5861646,
        "urls": [
            "ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/"
-      ],
-      "size":5861646
+        ]
    },
    "ripley_prnn_data": {
+        "citation": "Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7",
+        "details": "Data sets from Brian Ripley's Pattern Recognition and Neural Networks",
        "files": [
            [
                "Cushings.dat",
@ -194,14 +415,90 @@
            ]
        ],
        "license": null,
-      "citation":"Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7",
-      "details":"Data sets from Brian Ripley's Pattern Recognition and Neural Networks",
+        "size": 93565,
        "urls": [
            "http://www.stats.ox.ac.uk/pub/PRNN/"
+        ]
+    },
+    "robot_wireless": {
+        "citation": "WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.",
+        "details": "Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.",
+        "files": [
+            [
+                "uw-floor.txt"
+            ]
        ],
-      "size":93565
+        "license": null,
+        "size": 284390,
+        "urls": [
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/robot_wireless/"
+        ]
+    },
+    "rogers_girolami_data": {
+        "citation": "A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146",
+        "details": "Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.",
+        "files": [
+            [
+                "firstcoursemldata.tar.gz"
+            ]
+        ],
+        "license": null,
+        "size": 21949154,
+        "suffices": [
+            [
+                "?dl=1"
+            ]
+        ],
+        "urls": [
+            "https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/"
+        ]
+    },
+    "singlecell": {
+        "citation": "Guoji Guo, Mikael Huss, Guo Qing Tong, Chaoyang Wang, Li Li Sun, Neil D. Clarke, Paul Robson, Resolution of Cell Fate Decisions Revealed by Single-Cell Gene Expression Analysis from Zygote to Blastocyst, Developmental Cell, Volume 18, Issue 4, 20 April 2010, Pages 675-685, ISSN 1534-5807, http://dx.doi.org/10.1016/j.devcel.2010.02.012. (http://www.sciencedirect.com/science/article/pii/S1534580710001103) Keywords: DEVBIO",
+        "details": "qPCR TaqMan array single cell experiment in mouse. The data is taken from the early stages of development when the Blastocyst is forming. At the 32 cell stage the data is already separated into the trophectoderm (TE) which goes onto form the placenta and the inner cellular mass (ICM). The ICM further differentiates into the epiblast (EPI)---which gives rise to the endoderm, mesoderm and ectoderm---and the primitive endoderm (PE) which develops into the amniotic sack. Guo et al selected 48 genes for expression measurement. They labelled the resulting cells and their labels are included as an aide to visualization.",
+        "files": [
+            [
+                "singlecell.csv"
+            ]
+        ],
+        "license": "ScienceDirect: http://www.elsevier.com/locate/termsandconditions?utm_source=sciencedirect&utm_medium=link&utm_campaign=terms",
+        "size": 233.1,
+        "urls": [
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/"
+        ]
+    },
+    "sod1_mouse": {
+        "citation": "Transcriptomic indices of fast and slow disease progression in two mouse models of amyotrophic lateral sclerosis' Nardo G1, Iennaco R, Fusi N, Heath PR, Marino M, Trolese MC, Ferraiuolo L, Lawrence N, Shaw PJ, Bendotti C Brain. 2013 Nov;136(Pt 11):3305-32. doi: 10.1093/brain/awt250. Epub 2013 Sep 24.",
+        "details": "Gene expression data from two separate strains of mice: C57 and 129Sv in wild type and SOD1 mutant strains.",
+        "files": [
+            [
+                "sod1_C57_129_exprs.csv",
+                "sod1_C57_129_se.csv"
+            ]
+        ],
+        "license": null,
+        "size": 0,
+        "urls": [
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/sod1_mouse/"
+        ]
+    },
+    "swiss_roll": {
+        "citation": "A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
+        "details": "Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
+        "files": [
+            [
+                "swiss_roll_data.mat"
+            ]
+        ],
+        "license": null,
+        "size": 800256,
+        "urls": [
+            "http://isomap.stanford.edu/"
+        ]
    },
    "three_phase_oil_flow": {
+        "citation": "Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593",
+        "details": "The three phase oil data used initially for demonstrating the Generative Topographic mapping.",
        "files": [
            [
                "DataTrnLbls.txt",
@ -213,197 +510,23 @@
            ]
        ],
        "license": null,
-      "citation":"Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593",
-      "details":"The three phase oil data used initially for demonstrating the Generative Topographic mapping.",
+        "size": 712796,
        "urls": [
            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/three_phase_oil_flow/"
-      ],
-      "size":712796
-   },
-   "robot_wireless":{
-      "files":[
-         [
-            "uw-floor.txt"
        ]
-      ],
-      "license":null,
-      "citation":"WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.",
-      "details":"Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.",
-      "urls":[
-         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/robot_wireless/"
-      ],
-      "size":284390
    },
    "xw_pen": {
+        "citation": "Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005",
+        "details": "Accelerometer pen data used for robust regression by Tipping and Lawrence.",
        "files": [
            [
                "xw_pen_15.csv"
            ]
        ],
        "license": null,
-      "citation":"Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005",
-      "details":"Accelerometer pen data used for robust regression by Tipping and Lawrence.",
+        "size": 3410,
        "urls": [
            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/xw_pen/"
-      ],
-      "size":3410
-   },
-   "swiss_roll":{
-      "files":[
-         [
-            "swiss_roll_data.mat"
        ]
-      ],
-      "license":null,
-      "citation":"A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
-      "details":"Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
-      "urls":[
-         "http://isomap.stanford.edu/"
-      ],
-      "size":800256
-   },
-   "osu_run1":{
-      "files":[
-         [
-            "run1TXT.ZIP"
-         ],
-         [
-            "connections.txt"
-         ]
-      ],
-      "license":"Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).",
-      "citation":"The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.",
-      "details":"Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.",
-      "urls":[
-         "http://accad.osu.edu/research/mocap/data/",
-         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/"
-      ],
-      "size":338103
-   },
-   "creep_rupture":{
-      "files":[
-         [
-            "creeprupt.tar"
-         ]
-      ],
-      "license":null,
-      "citation":"Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.",
-      "details":"Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.",
-      "urls":[
-         "http://www.msm.cam.ac.uk/map/data/tar/"
-      ],
-      "size":602797
-   },
-   "olivetti_faces":{
-      "files":[
-         [
-            "att_faces.zip"
-         ],
-         [
-            "olivettifaces.mat"
-         ]
-      ],
-      "license":null,
-      "citation":"Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994",
-      "details":"Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. ",
-      "urls":[
-         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/",
-         "http://www.cs.nyu.edu/~roweis/data/"
-      ],
-      "size":8561331
-   },
-   "olivetti_glasses":{
-      "files":[
-         [
-            "has_glasses.np"
-         ],
-         [
-            "olivettifaces.mat"
-         ]
-      ],
-      "license":null,
-      "citation":"Information recorded in olivetti_faces entry. Should be used from there.",
-      "details":"Information recorded in olivetti_faces entry. Should be used from there.",
-      "urls":[
-         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/",
-         "http://www.cs.nyu.edu/~roweis/data/"
-      ],
-      "size":4261047
-   },
-   "della_gatta":{
-      "files":[
-         [
-            "DellaGattadata.mat"
-         ]
-      ],
-      "license":null,
-      "citation":"Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008",
-      "details":"The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
-      "urls":[
-         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/della_gatta/"
-      ],
-      "size":3729650
-   },
-   "epomeo_gpx":{
-      "files":[
-         [
-            "endomondo_1.gpx",
-            "endomondo_2.gpx",
-            "garmin_watch_via_endomondo.gpx",
-            "viewranger_phone.gpx",
-            "viewranger_tablet.gpx"
-         ]
-      ],
-      "license":null,
-      "citation":"",
-      "details":"Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
-      "urls":[
-         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/epomeo_gpx/"
-      ],
-      "size":2031872
-   },
-   "mauna_loa":{
-      "files":[
-         [
-            "co2_mm_mlo.txt"
-         ]
-      ],
-      "license":"-------------------------------------------------------------------- USE OF NOAA ESRL DATA\n\n  These data are made freely available to the public and the scientific community in the belief that their wide dissemination will lead to greater understanding and new scientific insights. The availability of these data does not constitute publication of the data.  NOAA relies on the ethics and integrity of the user to insure that ESRL receives fair credit for their work.  If the data  are obtained for potential use in a publication or presentation,  ESRL should be informed at the outset of the nature of this work.   If the ESRL data are essential to the work, or if an important  result or conclusion depends on the ESRL data, co-authorship may be appropriate.  This should be discussed at an early stage in the work.  Manuscripts using the ESRL data should be sent to ESRL for review before they are submitted for publication so we can insure that the quality and limitations of the data are accurately represented.\n\n  Contact:   Pieter Tans (303 497 6678; pieter.tans@noaa.gov)\n\n  RECIPROCITY  Use of these data implies an agreement to reciprocate. Laboratories making similar measurements agree to make their own data available to the general public and to the scientific community in an equally complete and easily accessible form. Modelers are encouraged to make available to the community, upon request, their own tools used in the interpretation of the ESRL data, namely well documented model code, transport fields, and additional information necessary for other scientists to repeat the work and to run modified versions. Model availability includes collaborative support for new users of the models.\n --------------------------------------------------------------------\n\n     See www.esrl.noaa.gov/gmd/ccgg/trends/ for additional details.",
-      "citation":"Mauna Loa Data. Dr. Pieter Tans, NOAA/ESRL (www.esrl.noaa.gov/gmd/ccgg/trends/) and Dr. Ralph Keeling, Scripps Institution of Oceanography (scrippsco2.ucsd.edu/).",
-      "details":"The 'average' column contains the monthly mean CO2 mole fraction determined from daily averages.  The mole fraction of CO2, expressed as parts per million (ppm) is the number of molecules of CO2 in every one million molecules of dried air (water vapor removed).  If there are missing days concentrated either early or late in the month, the monthly mean is corrected to the middle of the month using the average seasonal cycle.  Missing months are denoted by -99.99. The 'interpolated' column includes average values from the preceding column and interpolated values where data are missing.  Interpolated values are computed in two steps.  First, we compute for each month the average seasonal cycle in a 7-year window around each monthly value.  In this way the seasonal cycle is allowed to change slowly over time.  We then determine the 'trend' value for each month by removing the seasonal cycle; this result is shown in the 'trend' column.  Trend values are linearly interpolated for missing months. The interpolated monthly mean is then the sum of the average seasonal cycle value and the trend value for the missing month.\n\nNOTE: In general, the data presented for the last year are subject to change, depending on recalibration of the reference gas mixtures used, and other quality control procedures. Occasionally, earlier years may also be changed for the same reasons.  Usually these changes are minor.\n\nCO2 expressed as a mole fraction in dry air, micromol/mol, abbreviated as ppm \n\n (-99.99 missing data;  -1 no data for daily means in month)",
-      "urls":[
-         "ftp://aftp.cmdl.noaa.gov/products/trends/co2/"
-      ],
-      "size":46779
-   },
-   "boxjenkins_airline":{
-      "files":[
-         [
-            "boxjenkins_airline.csv"
-         ]
-      ],
-      "license":"You may copy and redistribute the data. You may make derivative works from the data. You may use the data for commercial purposes. You may not sublicence the data when redistributing it. You may not redistribute the data under a different license. Source attribution on any use of this data: Must refer source.",
-      "citation":"Box & Jenkins (1976), in file: data/airpass, Description: International airline passengers: monthly totals in thousands. Jan 49 – Dec 60",
-      "details":"International airline passengers, monthly totals from January 1949 to December 1960.",
-      "urls":[
-                  "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/boxjenkins_airline/"
-      ],
-      "size":46779
-   },
-
-   "decampos_characters":{
-      "files":[
-         [
-            "characters.npy",
-            "digits.npy"
-         ]
-      ],
-      "license":null,
-      "citation":"T. de Campos, B. R. Babu, and M. Varma. Character recognition in natural images. VISAPP 2009.",
-      "details":"Examples of hand written digits taken from the de Campos et al paper on Character Recognition in Natural Images.",
-      "urls":[
-         "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/decampos_digits/"
-      ],
-      "size":2031872
    }
 }
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -12,6 +12,8 @@ import datetime
 import json
 import re

+from config import *
+
 ipython_available=True
 try:
    import IPython
@ -29,7 +31,8 @@ def reporthook(a,b,c):
    sys.stdout.flush()

 # Global variables
-data_path = os.path.join(os.path.dirname(__file__), 'datasets')
+data_path = os.path.expandvars(config.get('datasets', 'dir'))
+#data_path = os.path.join(os.path.dirname(__file__), 'datasets')
 default_seed = 10000
 overide_manual_authorize=False
 neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
@ -108,7 +111,11 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix
            raise ValueError('Tried url ' + url + suffix + ' and received server error ' + str(response.code))
    with open(save_name, 'wb') as f:
        meta = response.info()
-        file_size = int(meta.getheaders("Content-Length")[0])
+        content_length_str = meta.getheaders("Content-Length")
+        if content_length_str:
+            file_size = int(content_length_str[0])
+        else:
+            file_size = None
        status = ""
        file_size_dl = 0
        block_sz = 8192
@ -120,9 +127,15 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix
            file_size_dl += len(buff)
            f.write(buff)
            sys.stdout.write(" "*(len(status)) + "\r")
-            status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1.*1e6), 
-                                                                       full=file_size/(1.*1e6), ll=line_length, 
+            if file_size:
+                status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1048576.), 
+                                                                       full=file_size/(1048576.), ll=line_length, 
                                                                       perc="="*int(line_length*float(file_size_dl)/file_size))
+            else:
+                status = r"[{perc: <{ll}}] {dl:7.3f}MB".format(dl=file_size_dl/(1048576.), 
+                                                                       ll=line_length, 
+                                                                       perc="."*int(line_length*float(file_size_dl/(10*1048576.))))
+                
            sys.stdout.write(status)
            sys.stdout.flush()
        sys.stdout.write(" "*(len(status)) + "\r")
@ -350,6 +363,34 @@ def football_data(season='1314', data_set='football_data'):
        Y = table[:, 4:]
    return data_details_return({'X': X, 'Y': Y}, data_set)

+def sod1_mouse(data_set='sod1_mouse'):
+    if not data_available(data_set):
+        download_data(data_set)
+    from pandas import read_csv
+    dirpath = os.path.join(data_path, data_set)
+    filename = os.path.join(dirpath, 'sod1_C57_129_exprs.csv')
+    Y = read_csv(filename, header=0, index_col=0)
+    num_repeats=4
+    num_time=4
+    num_cond=4
+    X = 1
+    return data_details_return({'X': X, 'Y': Y}, data_set)
+    
+def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None):
+    if not data_available(data_set):
+        download_data(data_set)
+    from pandas import read_csv
+    dirpath = os.path.join(data_path, data_set)
+    filename = os.path.join(dirpath, 'tomancak_exprs.csv')
+    Y = read_csv(filename, header=0, index_col=0).T
+    num_repeats = 3
+    num_time = 12
+    xt = np.linspace(0, num_time-1, num_time)
+    xr = np.linspace(0, num_repeats-1, num_repeats)
+    xtime, xrepeat = np.meshgrid(xt, xr)
+    X = np.vstack((xtime.flatten(), xrepeat.flatten())).T    
+    return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set)
+
 # This will be for downloading google trends data.
 def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'):
    """Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations."""
@ -718,6 +759,21 @@ def hapmap3(data_set='hapmap3'):
                  populations=populations)
    return hapmap

+def singlecell(data_set='singlecell'):
+    if not data_available(data_set):
+        download_data(data_set)
+    
+    from pandas import read_csv
+    dirpath = os.path.join(data_path, data_set)
+    filename = os.path.join(dirpath, 'singlecell.csv')
+    Y = read_csv(filename, header=0, index_col=0)
+    genes = Y.columns
+    labels = Y.index
+    # data = np.loadtxt(os.path.join(dirpath, 'singlecell.csv'), delimiter=",", dtype=str)
+    return data_details_return({'Y': Y, 'info' : "qPCR singlecell experiment in Mouse, measuring 48 gene expressions in 1-64 cell states. The labels have been created as in Guo et al. [2010]",
+                                'genes': genes, 'labels':labels,
+                                }, data_set)
+
 def swiss_roll_1000():
    return swiss_roll(num_samples=1000)

--- a/GPy/util/datasets/data_resources_create.py
+++ b/GPy/util/datasets/data_resources_create.py
@ -1,168 +0,0 @@
-import json
-
-neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
-sam_url = 'http://www.cs.nyu.edu/~roweis/data/'
-cmu_url = 'http://mocap.cs.cmu.edu/subjects/'
-
-data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
-                                       'files' : [['ankurDataPoseSilhouette.mat']],
-                                       'license' : None,
-                                       'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""",
-                                       'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""},
-
-                  'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'],
-                                      'files' : [['Index', 'housing.data', 'housing.names']],
-                                      'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""",
-                                      'details' : """The Boston Housing data relates house values in Boston to a range of input variables.""",
-                                      'license' : None,
-                                      'size' : 51276
-                                      },
-                  'brendan_faces' : {'urls' : [sam_url],
-                                     'files': [['frey_rawface.mat']],
-                                     'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.',
-                                     'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""",
-                                     'license': None,
-                                     'size' : 1100584},
-                  'cmu_mocap_full' : {'urls' : ['http://mocap.cs.cmu.edu'],
-                                      'files' : [['allasfamc.zip']],
-                                      'citation' : """Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.'
-                                      'The database was created with funding from NSF EIA-0196217.""",
-                                      'details' : """CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.""",
-                                      'license' : """From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.""",
-                                      'size' : None},
-                  'creep_rupture' : {'urls' : ['http://www.msm.cam.ac.uk/map/data/tar/'],
-                                     'files' : [['creeprupt.tar']],
-                                     'citation' : 'Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.',
-                                     'details' : """Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.""",
-                                     'license' : None,
-                                     'size' : 602797},
-                  'della_gatta' : {'urls' : [neil_url + 'della_gatta/'],
-                                   'files': [['DellaGattadata.mat']],
-                                   'citation' : 'Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008',
-                                   'details': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
-                                   'license':None,
-                                   'size':3729650},
-                  'epomeo_gpx' : {'urls' : [neil_url + 'epomeo_gpx/'],
-                                   'files': [['endomondo_1.gpx', 'endomondo_2.gpx', 'garmin_watch_via_endomondo.gpx','viewranger_phone.gpx','viewranger_tablet.gpx']],
-                                   'citation' : '',
-                                   'details': "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
-                                   'license':None,
-                                   'size': 2031872},
-                  'three_phase_oil_flow': {'urls' : [neil_url + 'three_phase_oil_flow/'],
-                                           'files' : [['DataTrnLbls.txt', 'DataTrn.txt', 'DataTst.txt', 'DataTstLbls.txt', 'DataVdn.txt', 'DataVdnLbls.txt']],
-                                           'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593',
-                                           'details' : """The three phase oil data used initially for demonstrating the Generative Topographic mapping.""",
-                                           'license' : None,
-                                           'size' : 712796},
-                  'rogers_girolami_data' : {'urls' : ['https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/'],
-                                            'files' : [['firstcoursemldata.tar.gz']],
-                                            'suffices' : [['?dl=1']],
-                                            'citation' : 'A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146',
-                                            'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""",
-                                            'license' : None,
-                                            'size' : 21949154},
-                  'olivetti_faces' : {'urls' : [neil_url + 'olivetti_faces/', sam_url],
-                                      'files' : [['att_faces.zip'], ['olivettifaces.mat']],
-                                            'citation' : 'Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994',
-                                            'details' : """Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. """,
-                                            'license': None,
-                                            'size' : 8561331},
-                  'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'],
-                                            'files' : [['olympicMarathonTimes.csv']],
-                                            'citation' : None,
-                                            'details' : """Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data""",
-                                            'license': None,
-                                            'size' : 584},
-                  'osu_run1' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'],
-                                'files': [['run1TXT.ZIP'],['connections.txt']],
-                                'details' : "Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.",
-                                'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.',
-                                'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).',
-                                'size': 338103},
-                  'osu_accad' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'],
-                                'files': [['swagger1TXT.ZIP','handspring1TXT.ZIP','quickwalkTXT.ZIP','run1TXT.ZIP','sprintTXT.ZIP','dogwalkTXT.ZIP','camper_04TXT.ZIP','dance_KB3_TXT.ZIP','per20_TXT.ZIP','perTWO07_TXT.ZIP','perTWO13_TXT.ZIP','perTWO14_TXT.ZIP','perTWO15_TXT.ZIP','perTWO16_TXT.ZIP'],['connections.txt']],
-                                'details' : "Motion capture data of different motions from the Open Motion Data Project at Ohio State University.",
-                                'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.',
-                                'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).',
-                                'size': 15922790},
-                  'pumadyn-32nm' : {'urls' : ['ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/'],
-                                    'files' : [['pumadyn-32nm.tar.gz']],
-                                    'details' : """Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.""",
-                                    'citation' : """Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.""",
-                                    'license' : """Data is made available by the Delve system at the University of Toronto""",
-                                    'size' : 5861646},
-                  'robot_wireless' : {'urls' : [neil_url + 'robot_wireless/'],
-                                      'files' : [['uw-floor.txt']],
-                                      'citation' : """WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.""",
-                                      'details' : """Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.""",
-                                      'license' : None,
-                                      'size' : 284390},
-                  'swiss_roll' : {'urls' : ['http://isomap.stanford.edu/'],
-                                  'files' : [['swiss_roll_data.mat']],
-                                  'details' : """Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""",
-                                  'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
-                                  'license' : None,
-                                  'size' : 800256},
-                  'ripley_prnn_data' : {'urls' : ['http://www.stats.ox.ac.uk/pub/PRNN/'],
-                                        'files' : [['Cushings.dat', 'README', 'crabs.dat', 'fglass.dat', 'fglass.grp', 'pima.te', 'pima.tr', 'pima.tr2', 'synth.te', 'synth.tr', 'viruses.dat', 'virus3.dat']],
-                                        'details' : """Data sets from Brian Ripley's Pattern Recognition and Neural Networks""",
-                                        'citation': """Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7""",
-                                        'license' : None,
-                                        'size' : 93565},
-                  'isomap_face_data' : {'urls' : [neil_url + 'isomap_face_data/'],
-                                        'files' : [['face_data.mat']],
-                                        'details' : """Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""",
-                                        'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
-                                        'license' : None,
-                                        'size' : 24229368},
-                  'xw_pen' : {'urls' : [neil_url + 'xw_pen/'],
-                                        'files' : [['xw_pen_15.csv']],
-                                        'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""",
-                                        'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005',
-                                        'license' : None,
-                                        'size' : 3410},
-                  'hapmap3' : {'urls' : ['http://hapmap.ncbi.nlm.nih.gov/downloads/genotypes/latest_phaseIII_ncbi_b36/plink_format/'],
-                                 'files' : [['hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2', 'hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2', 'relationships_w_pops_121708.txt']],
-                                 'details' : """
-        HapMap Project: Single Nucleotide Polymorphism sequenced in all human populations. 
-        The HapMap phase three SNP dataset - 1184 samples out of 11 populations.
-        See http://www.nature.com/nature/journal/v426/n6968/abs/nature02168.html for details.
-
-        SNP_matrix (A) encoding [see Paschou et all. 2007 (PCA-Correlated SNPs...)]:
-        Let (B1,B2) be the alphabetically sorted bases, which occur in the j-th SNP, then
-
-              /  1, iff SNPij==(B1,B1)
-        Aij = |  0, iff SNPij==(B1,B2)
-              \ -1, iff SNPij==(B2,B2)
-
-        The SNP data and the meta information (such as iid, sex and phenotype) are
-        stored in the dataframe datadf, index is the Individual ID, 
-        with following columns for metainfo:
-
-            * family_id   -> Family ID
-            * paternal_id -> Paternal ID
-            * maternal_id -> Maternal ID
-            * sex         -> Sex (1=male; 2=female; other=unknown)
-            * phenotype   -> Phenotype (-9, or 0 for unknown)
-            * population  -> Population string (e.g. 'ASW' - 'YRI')
-            * rest are SNP rs (ids)
-
-        More information is given in infodf:
-
-            * Chromosome:
-                - autosomal chromosemes                -> 1-22
-                - X    X chromosome                    -> 23
-                - Y    Y chromosome                    -> 24
-                - XY   Pseudo-autosomal region of X    -> 25
-                - MT   Mitochondrial                   -> 26
-            * Relative Positon (to Chromosome) [base pairs]
-
-        """,
-                                 'citation': """Gibbs, Richard A., et al. "The international HapMap project." Nature 426.6968 (2003): 789-796.""",
-                                 'license' : """International HapMap Project Public Access License (http://hapmap.ncbi.nlm.nih.gov/cgi-perl/registration#licence)""",
-                                 'size' : 2*1729092237 + 62265},
-                  }
-
-with open('data_resources.json', 'w') as f:
-    print "writing data_resources"
-    json.dump(data_resources, f)
--- a/GPy/util/initialization.py
+++ b/GPy/util/initialization.py
@ -13,7 +13,11 @@ def initialize_latent(init, input_dim, Y):
        p = pca(Y)
        PC = p.project(Y, min(input_dim, Y.shape[1]))
        Xr[:PC.shape[0], :PC.shape[1]] = PC
+        var = p.fracs[:input_dim]
    else:
        var = Xr.var(0)
+    
+    Xr -= Xr.mean(0)
+    Xr /= Xr.var(0)
+    
    return Xr, var/var.max()
-    return Xr, p.fracs[:input_dim]
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@ -130,14 +130,14 @@ def fast_array_equal(A, B):
    """ % pragma_string

    if config.getboolean('parallel', 'openmp'):
-        pragma_string = '#include <omp.h>'
+        header_string = '#include <omp.h>'
    else:
-        pragma_string = ''
+        header_string = ''

    support_code = """
    %s
    #include <math.h>
-    """ % pragma_string
+    """ % header_string


    weave_options_openmp = {'headers'           : ['<omp.h>'],
--- a/GPy/util/symbolic.py
+++ b/GPy/util/symbolic.py
@ -2,15 +2,22 @@ import sys
 import numpy as np
 import sympy as sym
 from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma, polygamma
-
+from sympy.matrices import Matrix
 ########################################
 ## Try to do some matrix functions: problem, you can't do derivatives
 ## with respect to matrix functions :-(

+class GPySymMatrix(Matrix):
+    def __init__(self, indices):
+        Matrix.__init__(self)
+    def atoms(self):
+        return [e2 for e in self for e2 in e.atoms()]
+        
 class selector(Function):
    """A function that returns an element of a Matrix depending on input indices."""
    nargs = 3
-
+    def fdiff(self, argindex=1):
+        return selector(*self.args)
    @classmethod
    def eval(cls, X, i, j):
        if i.is_Number and j.is_Number:
--- a/README.md
+++ b/README.md
@ -10,6 +10,16 @@ A Gaussian processes framework in Python.

 Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png)

+Citation
+========
+
+    @Misc{gpy2014,
+      author =   {The GPy authors},
+      title =    {{GPy}: A Gaussian process framework in python},
+      howpublished = {\url{http://github.com/SheffieldML/GPy}},
+      year = {2012--2014}
+    }
+
 Getting started
 ===============
 Installing with pip