merge mu's changes into devel

This commit is contained in:
mu 2014-05-14 11:19:18 +01:00
commit 9171909724
37 changed files with 1177 additions and 878 deletions

View file

@ -72,7 +72,7 @@ class GP(Model):
def log_likelihood(self): def log_likelihood(self):
return self._log_marginal_likelihood return self._log_marginal_likelihood
def _raw_predict(self, _Xnew, full_cov=False): def _raw_predict(self, _Xnew, full_cov=False, kern=None):
""" """
For making predictions, does not account for normalization or likelihood For making predictions, does not account for normalization or likelihood
@ -87,14 +87,17 @@ class GP(Model):
$$ $$
""" """
Kx = self.kern.K(_Xnew, self.X).T if kern is None:
kern = self.kern
Kx = kern.K(_Xnew, self.X).T
WiKx = np.dot(self.posterior.woodbury_inv, Kx) WiKx = np.dot(self.posterior.woodbury_inv, Kx)
mu = np.dot(Kx.T, self.posterior.woodbury_vector) mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov: if full_cov:
Kxx = self.kern.K(_Xnew) Kxx = kern.K(_Xnew)
var = Kxx - np.dot(Kx.T, WiKx) var = Kxx - np.dot(Kx.T, WiKx)
else: else:
Kxx = self.kern.Kdiag(_Xnew) Kxx = kern.Kdiag(_Xnew)
var = Kxx - np.sum(WiKx*Kx, 0) var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1) var = var.reshape(-1, 1)
@ -102,7 +105,7 @@ class GP(Model):
if len(mu.shape)==1: mu = mu[:,None] if len(mu.shape)==1: mu = mu[:,None]
return mu, var return mu, var
def predict(self, Xnew, full_cov=False, Y_metadata=None): def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
""" """
Predict the function(s) at the new point(s) Xnew. Predict the function(s) at the new point(s) Xnew.
@ -111,6 +114,9 @@ class GP(Model):
:param full_cov: whether to return the full covariance matrix, or just :param full_cov: whether to return the full covariance matrix, or just
the diagonal the diagonal
:type full_cov: bool :type full_cov: bool
:param Y_metadata: metadata about the predicting point to pass to the likelihood
:param kern: The kernel to use for prediction (defaults to the model
kern). this is useful for examining e.g. subprocesses.
:returns: mean: posterior mean, a Numpy array, Nnew x self.input_dim :returns: mean: posterior mean, a Numpy array, Nnew x self.input_dim
:returns: var: posterior variance, a Numpy array, Nnew x 1 if :returns: var: posterior variance, a Numpy array, Nnew x 1 if
full_cov=False, Nnew x Nnew otherwise full_cov=False, Nnew x Nnew otherwise
@ -121,9 +127,9 @@ class GP(Model):
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions. This is to allow for different normalizations of the output dimensions.
""" """
#predict the latent function values #predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov) mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
# now push through likelihood # now push through likelihood
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata) mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)

View file

@ -57,15 +57,17 @@ class ObservablesList(object):
def __repr__(self): def __repr__(self):
return self._poc.__repr__() return self._poc.__repr__()
def add(self, priority, observable, callble): def add(self, priority, observable, callble):
ins = 0 if observable is not None:
for pr, _, _ in self: ins = 0
if priority > pr: for pr, _, _ in self:
break if priority > pr:
ins += 1 break
self._poc.insert(ins, (priority, weakref.ref(observable), callble)) ins += 1
self._poc.insert(ins, (priority, weakref.ref(observable), callble))
def __str__(self): def __str__(self):
ret = [] ret = []
curr_p = None curr_p = None
@ -86,18 +88,18 @@ class ObservablesList(object):
def __iter__(self): def __iter__(self):
self.flush() self.flush()
for p, o, c in self._poc: for p, o, c in self._poc:
if o() is not None: yield p, o(), c
yield p, o(), c
def __len__(self): def __len__(self):
self.flush() self.flush()
return self._poc.__len__() return self._poc.__len__()
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
self.flush()
s = ObservablesList() s = ObservablesList()
import copy for p,o,c in self:
s._poc = copy.deepcopy(self._poc, memo) import copy
s.add(p, copy.deepcopy(o, memo), copy.deepcopy(c, memo))
s.flush()
return s return s
def __getstate__(self): def __getstate__(self):

View file

@ -1,7 +1,7 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
__updated__ = '2014-04-15' __updated__ = '2014-05-12'
import numpy as np import numpy as np
from parameter_core import Observable, Pickleable from parameter_core import Observable, Pickleable
@ -15,10 +15,10 @@ class ObsAr(np.ndarray, Pickleable, Observable):
""" """
__array_priority__ = -1 # Never give back ObsAr __array_priority__ = -1 # Never give back ObsAr
def __new__(cls, input_array, *a, **kw): def __new__(cls, input_array, *a, **kw):
# allways make a copy of input paramters, as we need it to be in C order:
if not isinstance(input_array, ObsAr): if not isinstance(input_array, ObsAr):
obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls) obj = np.atleast_1d(np.require(np.copy(input_array), dtype=np.float64, requirements=['W', 'C'])).view(cls)
else: obj = input_array else: obj = input_array
#cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing
super(ObsAr, obj).__init__(*a, **kw) super(ObsAr, obj).__init__(*a, **kw)
return obj return obj

View file

@ -45,7 +45,6 @@ class Param(OptimizationHandlable, ObsAr):
_parameters_ = [] _parameters_ = []
def __new__(cls, name, input_array, default_constraint=None): def __new__(cls, name, input_array, default_constraint=None):
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array)) obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
cls.__name__ = "Param"
obj._current_slice_ = (slice(obj.shape[0]),) obj._current_slice_ = (slice(obj.shape[0]),)
obj._realshape_ = obj.shape obj._realshape_ = obj.shape
obj._realsize_ = obj.size obj._realsize_ = obj.size
@ -58,9 +57,9 @@ class Param(OptimizationHandlable, ObsAr):
def build_pydot(self,G): def build_pydot(self,G):
import pydot import pydot
node = pydot.Node(id(self), shape='record', label=self.name) node = pydot.Node(id(self), shape='trapezium', label=self.name)#, fontcolor='white', color='white')
G.add_node(node) G.add_node(node)
for o in self.observers.keys(): for _, o, _ in self.observers:
label = o.name if hasattr(o, 'name') else str(o) label = o.name if hasattr(o, 'name') else str(o)
observed_node = pydot.Node(id(o), label=label) observed_node = pydot.Node(id(o), label=label)
G.add_node(observed_node) G.add_node(observed_node)
@ -90,6 +89,13 @@ class Param(OptimizationHandlable, ObsAr):
def param_array(self): def param_array(self):
return self return self
@property
def values(self):
"""
Return self as numpy array view
"""
return self.view(np.ndarray)
@property @property
def gradient(self): def gradient(self):
""" """
@ -100,11 +106,11 @@ class Param(OptimizationHandlable, ObsAr):
""" """
if getattr(self, '_gradient_array_', None) is None: if getattr(self, '_gradient_array_', None) is None:
self._gradient_array_ = numpy.empty(self._realshape_, dtype=numpy.float64) self._gradient_array_ = numpy.empty(self._realshape_, dtype=numpy.float64)
return self._gradient_array_[self._current_slice_] return self._gradient_array_#[self._current_slice_]
@gradient.setter @gradient.setter
def gradient(self, val): def gradient(self, val):
self._gradient_array_[self._current_slice_] = val self._gradient_array_[:] = val
#=========================================================================== #===========================================================================
# Array operations -> done # Array operations -> done
@ -112,10 +118,13 @@ class Param(OptimizationHandlable, ObsAr):
def __getitem__(self, s, *args, **kwargs): def __getitem__(self, s, *args, **kwargs):
if not isinstance(s, tuple): if not isinstance(s, tuple):
s = (s,) s = (s,)
if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim: #if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim:
s += (Ellipsis,) # s += (Ellipsis,)
new_arr = super(Param, self).__getitem__(s, *args, **kwargs) new_arr = super(Param, self).__getitem__(s, *args, **kwargs)
try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base try:
new_arr._current_slice_ = s
new_arr._gradient_array_ = self.gradient[s]
new_arr._original_ = self.base is new_arr.base
except AttributeError: pass # returning 0d array or float, double etc except AttributeError: pass # returning 0d array or float, double etc
return new_arr return new_arr
@ -156,6 +165,34 @@ class Param(OptimizationHandlable, ObsAr):
def _ensure_fixes(self): def _ensure_fixes(self):
if not self._has_fixes(): self._fixes_ = numpy.ones(self._realsize_, dtype=bool) if not self._has_fixes(): self._fixes_ = numpy.ones(self._realsize_, dtype=bool)
#===========================================================================
# parameterizable
#===========================================================================
def traverse(self, visit, *args, **kwargs):
"""
Traverse the hierarchy performing visit(self, *args, **kwargs) at every node passed by.
See "visitor pattern" in literature. This is implemented in pre-order fashion.
This will function will just call visit on self, as Param are leaf nodes.
"""
visit(self, *args, **kwargs)
def traverse_parents(self, visit, *args, **kwargs):
"""
Traverse the hierarchy upwards, visiting all parents and their children, except self.
See "visitor pattern" in literature. This is implemented in pre-order fashion.
Example:
parents = []
self.traverse_parents(parents.append)
print parents
"""
if self.has_parent():
self.__visited = True
self._parent_._traverse_parents(visit, *args, **kwargs)
self.__visited = False
#=========================================================================== #===========================================================================
# Convenience # Convenience
#=========================================================================== #===========================================================================
@ -316,8 +353,8 @@ class ParamConcatenation(object):
val = val.values() val = val.values()
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
vals = self.values(); vals[s] = val vals = self.values(); vals[s] = val
[numpy.copyto(p, vals[ps], where=ind[ps]) for p, ps in zip(self.params, self._param_slices_):
for p, ps in zip(self.params, self._param_slices_)] p.flat[ind[ps]] = vals[ps]
if update: if update:
self.update_all_params() self.update_all_params()
def values(self): def values(self):

View file

@ -17,7 +17,7 @@ from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED,
import numpy as np import numpy as np
import re import re
__updated__ = '2014-04-16' __updated__ = '2014-05-12'
class HierarchyError(Exception): class HierarchyError(Exception):
""" """
@ -124,7 +124,7 @@ class Parentable(object):
""" """
Disconnect this object from its parent Disconnect this object from its parent
""" """
raise NotImplementedError, "Abstaract superclass" raise NotImplementedError, "Abstract superclass"
@property @property
def _highest_parent_(self): def _highest_parent_(self):
@ -162,14 +162,13 @@ class Pickleable(object):
:param protocol: pickling protocol to use, python-pickle for details. :param protocol: pickling protocol to use, python-pickle for details.
""" """
import cPickle as pickle import cPickle as pickle
import pickle #TODO: cPickle
if isinstance(f, str): if isinstance(f, str):
with open(f, 'w') as f: with open(f, 'w') as f:
pickle.dump(self, f, protocol) pickle.dump(self, f, protocol)
else: else:
pickle.dump(self, f, protocol) pickle.dump(self, f, protocol)
#=========================================================================== #===========================================================================
# copy and pickling # copy and pickling
#=========================================================================== #===========================================================================
def copy(self): def copy(self):
@ -177,19 +176,23 @@ class Pickleable(object):
#raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy" #raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy"
import copy import copy
memo = {} memo = {}
memo[id(self._parent_)] = None # the next part makes sure that we do not include parents in any form:
memo[id(self.gradient)] = None parents = []
memo[id(self.param_array)] = None self.traverse_parents(parents.append) # collect parents
memo[id(self._fixes_)] = None for p in parents:
c = copy.deepcopy(self, memo) memo[id(p)] = None # set all parents to be None, so they will not be copied
memo[id(self.gradient)] = None # reset the gradient
memo[id(self.param_array)] = None # and param_array
memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
c = copy.deepcopy(self, memo) # and start the copy
c._parent_index_ = None c._parent_index_ = None
return c return c
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
s = self.__new__(self.__class__) s = self.__new__(self.__class__) # fresh instance
memo[id(self)] = s memo[id(self)] = s # be sure to break all cycles --> self is already done
import copy import copy
s.__dict__.update(copy.deepcopy(self.__dict__, memo)) s.__dict__.update(copy.deepcopy(self.__dict__, memo)) # standard copy
return s return s
def __getstate__(self): def __getstate__(self):
@ -202,9 +205,6 @@ class Pickleable(object):
dc = dict() dc = dict()
for k,v in self.__dict__.iteritems(): for k,v in self.__dict__.iteritems():
if k not in ignore_list: if k not in ignore_list:
#if hasattr(v, "__getstate__"):
#dc[k] = v.__getstate__()
#else:
dc[k] = v dc[k] = v
return dc return dc
@ -212,12 +212,6 @@ class Pickleable(object):
self.__dict__.update(state) self.__dict__.update(state)
return self return self
#def __getstate__(self, memo):
# raise NotImplementedError, "get state must be implemented to be able to pickle objects"
#def __setstate__(self, memo):
# raise NotImplementedError, "set state must be implemented to be able to pickle objects"
class Gradcheckable(Pickleable, Parentable): class Gradcheckable(Pickleable, Parentable):
""" """
Adds the functionality for an object to be gradcheckable. Adds the functionality for an object to be gradcheckable.
@ -585,12 +579,6 @@ class OptimizationHandlable(Constrainable):
def __init__(self, name, default_constraint=None, *a, **kw): def __init__(self, name, default_constraint=None, *a, **kw):
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw) super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
def transform(self):
[np.put(self.param_array, ind, c.finv(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def untransform(self):
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def _get_params_transformed(self): def _get_params_transformed(self):
# transformed parameters (apply transformation rules) # transformed parameters (apply transformation rules)
p = self.param_array.copy() p = self.param_array.copy()
@ -604,15 +592,15 @@ class OptimizationHandlable(Constrainable):
return p return p
def _set_params_transformed(self, p): def _set_params_transformed(self, p):
if p is self.param_array: if not(p is self.param_array):
p = p.copy() if self.has_parent() and self.constraints[__fixed__].size != 0:
if self.has_parent() and self.constraints[__fixed__].size != 0: fixes = np.ones(self.size).astype(bool)
fixes = np.ones(self.size).astype(bool) fixes[self.constraints[__fixed__]] = FIXED
fixes[self.constraints[__fixed__]] = FIXED self.param_array.flat[fixes] = p
self.param_array.flat[fixes] = p elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
elif self._has_fixes(): self.param_array.flat[self._fixes_] = p else: self.param_array.flat = p
else: self.param_array.flat = p [np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
self.untransform() for c, ind in self.constraints.iteritems() if c != __fixed__]
self._trigger_params_changed() self._trigger_params_changed()
def _trigger_params_changed(self, trigger_parent=True): def _trigger_params_changed(self, trigger_parent=True):
@ -626,7 +614,7 @@ class OptimizationHandlable(Constrainable):
def num_params(self): def num_params(self):
""" """
Return the number of parameters of this parameter_handle. Return the number of parameters of this parameter_handle.
Param objects will allways return 0. Param objects will always return 0.
""" """
raise NotImplemented, "Abstract, please implement in respective classes" raise NotImplemented, "Abstract, please implement in respective classes"
@ -644,6 +632,7 @@ class OptimizationHandlable(Constrainable):
else: names = [adjust(x.name) for x in self._parameters_] else: names = [adjust(x.name) for x in self._parameters_]
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
return names return names
def _get_param_names(self): def _get_param_names(self):
n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()]) n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
return n return n
@ -710,12 +699,18 @@ class Parameterizable(OptimizationHandlable):
super(Parameterizable, self).__init__(*args, **kwargs) super(Parameterizable, self).__init__(*args, **kwargs)
from GPy.core.parameterization.lists_and_dicts import ArrayList from GPy.core.parameterization.lists_and_dicts import ArrayList
self._parameters_ = ArrayList() self._parameters_ = ArrayList()
self._param_array_ = None
self.size = 0 self.size = 0
self._added_names_ = set() self._added_names_ = set()
self.__visited = False # for traversing in reverse order we need to know if we were here already
@property @property
def param_array(self): def param_array(self):
if not hasattr(self, '_param_array_'): """
Array representing the parameters of this class.
There is only one copy of all parameters in memory, two during optimization.
"""
if self._param_array_ is None:
self._param_array_ = np.empty(self.size, dtype=np.float64) self._param_array_ = np.empty(self.size, dtype=np.float64)
return self._param_array_ return self._param_array_
@ -723,6 +718,52 @@ class Parameterizable(OptimizationHandlable):
def param_array(self, arr): def param_array(self, arr):
self._param_array_ = arr self._param_array_ = arr
def traverse(self, visit, *args, **kwargs):
"""
Traverse the hierarchy performing visit(self, *args, **kwargs)
at every node passed by downwards. This function includes self!
See "visitor pattern" in literature. This is implemented in pre-order fashion.
Example:
Collect all children:
children = []
self.traverse(children.append)
print children
"""
if not self.__visited:
visit(self, *args, **kwargs)
self.__visited = True
for c in self._parameters_:
c.traverse(visit, *args, **kwargs)
self.__visited = False
def traverse_parents(self, visit, *args, **kwargs):
"""
Traverse the hierarchy upwards, visiting all parents and their children except self.
See "visitor pattern" in literature. This is implemented in pre-order fashion.
Example:
parents = []
self.traverse_parents(parents.append)
print parents
"""
if self.has_parent():
self.__visited = True
self._parent_._traverse_parents(visit, *args, **kwargs)
self.__visited = False
def _traverse_parents(self, visit, *args, **kwargs):
if not self.__visited:
self.__visited = True
visit(self, *args, **kwargs)
if self.has_parent():
self._parent_._traverse_parents(visit, *args, **kwargs)
self._parent_.traverse(visit, *args, **kwargs)
self.__visited = False
#========================================================================= #=========================================================================
# Gradient handling # Gradient handling
#========================================================================= #=========================================================================
@ -789,11 +830,10 @@ class Parameterizable(OptimizationHandlable):
# raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short()) # raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
elif param not in self._parameters_: elif param not in self._parameters_:
if param.has_parent(): if param.has_parent():
parent = param._parent_ def visit(parent, self):
while parent is not None:
if parent is self: if parent is self:
raise HierarchyError, "You cannot add a parameter twice into the hierarchy" raise HierarchyError, "You cannot add a parameter twice into the hierarchy"
parent = parent._parent_ param.traverse_parents(visit, self)
param._parent_.remove_parameter(param) param._parent_.remove_parameter(param)
# make sure the size is set # make sure the size is set
if index is None: if index is None:
@ -837,7 +877,7 @@ class Parameterizable(OptimizationHandlable):
:param param: param object to remove from being a parameter of this parameterized object. :param param: param object to remove from being a parameter of this parameterized object.
""" """
if not param in self._parameters_: if not param in self._parameters_:
raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short()) raise RuntimeError, "Parameter {} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name)
start = sum([p.size for p in self._parameters_[:param._parent_index_]]) start = sum([p.size for p in self._parameters_[:param._parent_index_]])
self._remove_parameter_name(param) self._remove_parameter_name(param)

View file

@ -82,15 +82,15 @@ class Parameterized(Parameterizable):
import pydot # @UnresolvedImport import pydot # @UnresolvedImport
iamroot = False iamroot = False
if G is None: if G is None:
G = pydot.Dot(graph_type='digraph') G = pydot.Dot(graph_type='digraph', bgcolor=None)
iamroot=True iamroot=True
node = pydot.Node(id(self), shape='record', label=self.name) node = pydot.Node(id(self), shape='box', label=self.name)#, color='white')
G.add_node(node) G.add_node(node)
for child in self._parameters_: for child in self._parameters_:
child_node = child.build_pydot(G) child_node = child.build_pydot(G)
G.add_edge(pydot.Edge(node, child_node)) G.add_edge(pydot.Edge(node, child_node))#, color='white'))
for o in self.observers.keys(): for _, o, _ in self.observers:
label = o.name if hasattr(o, 'name') else str(o) label = o.name if hasattr(o, 'name') else str(o)
observed_node = pydot.Node(id(o), label=label) observed_node = pydot.Node(id(o), label=label)
G.add_node(observed_node) G.add_node(observed_node)

View file

@ -100,6 +100,9 @@ class VariationalPosterior(Parameterized):
n.__dict__.update(dc) n.__dict__.update(dc)
n._parameters_[dc['mean']._parent_index_] = dc['mean'] n._parameters_[dc['mean']._parent_index_] = dc['mean']
n._parameters_[dc['variance']._parent_index_] = dc['variance'] n._parameters_[dc['variance']._parent_index_] = dc['variance']
n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size
n.size = n.mean.size + n.variance.size + oversize
n.ndim = n.mean.ndim n.ndim = n.mean.ndim
n.shape = n.mean.shape n.shape = n.mean.shape
n.num_data = n.mean.shape[0] n.num_data = n.mean.shape[0]

View file

@ -79,29 +79,32 @@ class SparseGP(GP):
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
def _raw_predict(self, Xnew, full_cov=False): def _raw_predict(self, Xnew, full_cov=False, kern=None):
""" """
Make a prediction for the latent function values Make a prediction for the latent function values
""" """
if kern is None: kern = self.kern
if not isinstance(Xnew, VariationalPosterior): if not isinstance(Xnew, VariationalPosterior):
Kx = self.kern.K(self.Z, Xnew) Kx = kern.K(self.Z, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector) mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew) Kxx = kern.K(Xnew)
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx)) var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
#var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2) #var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
var = var.squeeze() var = var.squeeze()
else: else:
Kxx = self.kern.Kdiag(Xnew) Kxx = kern.Kdiag(Xnew)
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
else: else:
Kx = self.kern.psi1(self.Z, Xnew) Kx = kern.psi1(self.Z, Xnew)
mu = np.dot(Kx, self.posterior.woodbury_vector) mu = np.dot(Kx, self.posterior.woodbury_vector)
if full_cov: if full_cov:
raise NotImplementedError, "TODO" raise NotImplementedError, "TODO"
else: else:
Kxx = self.kern.psi0(self.Z, Xnew) Kxx = kern.psi0(self.Z, Xnew)
psi2 = self.kern.psi2(self.Z, Xnew) psi2 = kern.psi2(self.Z, Xnew)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var return mu, var

View file

@ -107,14 +107,14 @@ class Symbolic_core():
# Do symbolic work to compute derivatives. # Do symbolic work to compute derivatives.
for key, func in self.expressions.items(): for key, func in self.expressions.items():
if func['function'].is_Matrix: # if func['function'].is_Matrix:
rows = func['function'].shape[0] # rows = func['function'].shape[0]
cols = func['function'].shape[1] # cols = func['function'].shape[1]
self.expressions[key]['derivative'] = sym.zeros(rows, cols) # self.expressions[key]['derivative'] = sym.zeros(rows, cols)
for i in xrange(rows): # for i in xrange(rows):
for j in xrange(cols): # for j in xrange(cols):
self.expressions[key]['derivative'][i, j] = extract_derivative(func['function'][i, j], derivative_arguments) # self.expressions[key]['derivative'][i, j] = extract_derivative(func['function'][i, j], derivative_arguments)
else: # else:
self.expressions[key]['derivative'] = extract_derivative(func['function'], derivative_arguments) self.expressions[key]['derivative'] = extract_derivative(func['function'], derivative_arguments)
def _set_parameters(self, parameters): def _set_parameters(self, parameters):

View file

@ -161,6 +161,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
import GPy import GPy
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from ..util.misc import param_to_array from ..util.misc import param_to_array
import numpy as np
_np.random.seed(0) _np.random.seed(0)
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
@ -174,11 +175,10 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05) m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)
if plot: if plot:
y = m.Y
fig, (latent_axes, sense_axes) = plt.subplots(1, 2) fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
m.plot_latent(ax=latent_axes, labels=m.data_labels) m.plot_latent(ax=latent_axes, labels=m.data_labels)
data_show = GPy.plotting.matplot_dep.visualize.vector_show(y) data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean), # @UnusedVariable lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close(fig) plt.close(fig)
@ -186,7 +186,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
_np.random.seed(1234) _np.random.seed(1234)
x = _np.linspace(0, 4 * _np.pi, N)[:, None] x = _np.linspace(0, 4 * _np.pi, N)[:, None]
s1 = _np.vectorize(lambda x: _np.sin(x)) s1 = _np.vectorize(lambda x: _np.sin(x))
s2 = _np.vectorize(lambda x: _np.cos(x)**2) s2 = _np.vectorize(lambda x: _np.cos(x)**2)
@ -408,13 +408,13 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
data = GPy.util.datasets.osu_run1() data = GPy.util.datasets.osu_run1()
# optimize # optimize
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel) m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
if optimize: m.optimize(messages=verbose, max_f_eval=10000) if optimize: m.optimize('bfgs', messages=verbose, max_f_eval=10000)
if plot: if plot:
plt.clf plt.clf
ax = m.plot_latent() ax = m.plot_latent()
y = m.Y[0, :] y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
vis = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, latent_axes=ax) vis = GPy.plotting.matplot_dep.visualize.lvm(m.X[:1, :].copy(), m, data_show, latent_axes=ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
return m return m
@ -475,24 +475,28 @@ def robot_wireless(optimize=True, verbose=True, plot=True):
def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import numpy as np
import GPy import GPy
data = GPy.util.datasets.osu_run1() data = GPy.util.datasets.osu_run1()
Q = 6 Q = 6
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2)) kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True)
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
m.data = data
m.likelihood.variance = 0.001
# optimize # optimize
m.ensure_default_constraints() if optimize: m.optimize('bfgs', messages=verbose, max_iters=800, xtol=1e-300, ftol=1e-300)
if optimize: m.optimize('scg', messages=verbose, max_iters=200, xtol=1e-300, ftol=1e-300)
m._set_params(m._get_params())
if plot: if plot:
plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2) plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2)
plt.sca(latent_axes) plt.sca(latent_axes)
m.plot_latent() m.plot_latent(ax=latent_axes)
y = m.likelihood.Y[0, :].copy() y = m.Y[:1, :].copy()
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect'])
GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
raw_input('Press enter to finish') plt.draw()
#raw_input('Press enter to finish')
return m return m
@ -509,7 +513,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose
if optimize: m.optimize(messages=verbose, max_f_eval=10000) if optimize: m.optimize(messages=verbose, max_f_eval=10000)
if plot: if plot:
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel']) data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel'])
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')

View file

@ -6,6 +6,10 @@
# some platforms, hence this option. # some platforms, hence this option.
openmp=False openmp=False
[datasets]
# location for the local data cache
dir=$HOME/tmp/GPy-datasets/
[anaconda] [anaconda]
# if you have an anaconda python installation please specify it here. # if you have an anaconda python installation please specify it here.
installed = False installed = False

View file

@ -32,7 +32,7 @@ def print_out(len_maxiters, fnow, current_grad, beta, iteration):
sys.stdout.flush() sys.stdout.flush()
def exponents(fnow, current_grad): def exponents(fnow, current_grad):
exps = [np.abs(fnow), current_grad] exps = [np.abs(np.float(fnow)), current_grad]
return np.sign(exps) * np.log10(exps).astype(int) return np.sign(exps) * np.log10(exps).astype(int)
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, xtol=None, ftol=None, gtol=None): def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, xtol=None, ftol=None, gtol=None):

View file

@ -3,7 +3,7 @@ from _src.rbf import RBF
from _src.linear import Linear, LinearFull from _src.linear import Linear, LinearFull
from _src.static import Bias, White from _src.static import Bias, White
from _src.brownian import Brownian from _src.brownian import Brownian
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine from _src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from _src.mlp import MLP from _src.mlp import MLP
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
from _src.independent_outputs import IndependentOutputs, Hierarchical from _src.independent_outputs import IndependentOutputs, Hierarchical
@ -13,6 +13,8 @@ from _src.ODE_UY import ODE_UY
from _src.ODE_UYC import ODE_UYC from _src.ODE_UYC import ODE_UYC
from _src.ODE_st import ODE_st from _src.ODE_st import ODE_st
from _src.ODE_t import ODE_t from _src.ODE_t import ODE_t
from _src.poly import Poly
# TODO: put this in an init file somewhere # TODO: put this in an init file somewhere
#I'm commenting this out because the files were not added. JH. Remember to add the files before commiting #I'm commenting this out because the files were not added. JH. Remember to add the files before commiting
try: try:

View file

@ -170,7 +170,4 @@ class Add(CombinationKernel):
return self return self
def input_sensitivity(self): def input_sensitivity(self):
in_sen = np.zeros(self.input_dim) return reduce(np.add, [k.input_sensitivity() for k in self.parts])
for i, p in enumerate(self.parts):
in_sen[p.active_dims] += p.input_sensitivity()
return in_sen

View file

@ -32,7 +32,7 @@ def index_to_slices(index):
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))] [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret return ret
class IndependentOutputs(Kern): class IndependentOutputs(CombinationKernel):
""" """
A kernel which can represent several independent functions. this kernel A kernel which can represent several independent functions. this kernel
'switches off' parts of the matrix where the output indexes are different. 'switches off' parts of the matrix where the output indexes are different.
@ -180,6 +180,9 @@ class Hierarchical(CombinationKernel):
def Kdiag(self,X): def Kdiag(self,X):
return np.diag(self.K(X)) return np.diag(self.K(X))
def gradients_X(self, dL_dK, X, X2=None):
raise NotImplementedError
def update_gradients_full(self,dL_dK,X,X2=None): def update_gradients_full(self,dL_dK,X,X2=None):
slices = [index_to_slices(X[:,i]) for i in self.extra_dims] slices = [index_to_slices(X[:,i]) for i in self.extra_dims]
if X2 is None: if X2 is None:

View file

@ -34,36 +34,24 @@ class Kern(Parameterized):
is the active_dimensions of inputs X we will work on. is the active_dimensions of inputs X we will work on.
All kernels will get sliced Xes as inputs, if active_dims is not None All kernels will get sliced Xes as inputs, if active_dims is not None
Only positive integers are allowed in active_dims!
if active_dims is None, slicing is switched off and all X will be passed through as given. if active_dims is None, slicing is switched off and all X will be passed through as given.
:param int input_dim: the number of input dimensions to the function :param int input_dim: the number of input dimensions to the function
:param array-like|slice|None active_dims: list of indices on which dimensions this kernel works on, or none if no slicing :param array-like|None active_dims: list of indices on which dimensions this kernel works on, or none if no slicing
Do not instantiate. Do not instantiate.
""" """
super(Kern, self).__init__(name=name, *a, **kw) super(Kern, self).__init__(name=name, *a, **kw)
try: self.input_dim = int(input_dim)
self.input_dim = int(input_dim)
self.active_dims = active_dims# if active_dims is not None else slice(0, input_dim, 1) if active_dims is None:
except TypeError: active_dims = np.arange(input_dim)
# input_dim is something else then an integer
self.input_dim = input_dim self.active_dims = np.array(active_dims, dtype=int)
if active_dims is not None:
print "WARNING: given input_dim={} is not an integer and active_dims={} is given, switching off slicing" assert self.active_dims.size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, self.active_dims.size, self.active_dims)
self.active_dims = None
if self.active_dims is not None and self.input_dim is not None:
assert isinstance(self.active_dims, (slice, list, tuple, np.ndarray)), 'active_dims needs to be an array-like or slice object over dimensions, {} given'.format(self.active_dims.__class__)
if isinstance(self.active_dims, slice):
self.active_dims = slice(self.active_dims.start or 0, self.active_dims.stop or self.input_dim, self.active_dims.step or 1)
active_dim_size = int(np.round((self.active_dims.stop-self.active_dims.start)/self.active_dims.step))
elif isinstance(self.active_dims, np.ndarray):
#assert np.all(self.active_dims >= 0), 'active dimensions need to be positive. negative indexing is not allowed'
assert self.active_dims.ndim == 1, 'only flat indices allowed, given active_dims.shape={}, provide only indexes to the dimensions (columns) of the input'.format(self.active_dims.shape)
active_dim_size = self.active_dims.size
else:
active_dim_size = len(self.active_dims)
assert active_dim_size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, active_dim_size, self.active_dims)
self._sliced_X = 0 self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU self.useGPU = self._support_GPU and useGPU
@ -176,8 +164,8 @@ class Kern(Parameterized):
""" """
Shortcut for tensor `prod`. Shortcut for tensor `prod`.
""" """
assert self.active_dims == range(self.input_dim), "Can only use kernels, which have their input_dims defined from 0" assert np.all(self.active_dims == range(self.input_dim)), "Can only use kernels, which have their input_dims defined from 0"
assert other.active_dims == range(other.input_dim), "Can only use kernels, which have their input_dims defined from 0" assert np.all(other.active_dims == range(other.input_dim)), "Can only use kernels, which have their input_dims defined from 0"
other.active_dims += self.input_dim other.active_dims += self.input_dim
return self.prod(other) return self.prod(other)
@ -202,12 +190,12 @@ class Kern(Parameterized):
return Prod([self, other], name) return Prod([self, other], name)
def _check_input_dim(self, X): def _check_input_dim(self, X):
assert X.shape[1] == self.input_dim, "You did not specify active_dims and X has wrong shape: X_dim={}, whereas input_dim={}".format(X.shape[1], self.input_dim) assert X.shape[1] == self.input_dim, "{} did not specify active_dims and X has wrong shape: X_dim={}, whereas input_dim={}".format(self.name, X.shape[1], self.input_dim)
def _check_active_dims(self, X): def _check_active_dims(self, X):
assert X.shape[1] >= len(np.r_[self.active_dims]), "At least {} dimensional X needed, X.shape={!s}".format(len(np.r_[self.active_dims]), X.shape) assert X.shape[1] >= len(self.active_dims), "At least {} dimensional X needed, X.shape={!s}".format(len(self.active_dims), X.shape)
class CombinationKernel(Kern): class CombinationKernel(Kern):
""" """
Abstract super class for combination kernels. Abstract super class for combination kernels.
@ -222,9 +210,10 @@ class CombinationKernel(Kern):
:param list kernels: List of kernels to combine (can be only one element) :param list kernels: List of kernels to combine (can be only one element)
:param str name: name of the combination kernel :param str name: name of the combination kernel
:param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on :param array-like extra_dims: if needed extra dimensions for the combination kernel to work on
""" """
assert all([isinstance(k, Kern) for k in kernels]) assert all([isinstance(k, Kern) for k in kernels])
extra_dims = np.array(extra_dims, dtype=int)
input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims) input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims)
# initialize the kernel with the full input_dim # initialize the kernel with the full input_dim
super(CombinationKernel, self).__init__(input_dim, active_dims, name) super(CombinationKernel, self).__init__(input_dim, active_dims, name)
@ -238,16 +227,18 @@ class CombinationKernel(Kern):
def get_input_dim_active_dims(self, kernels, extra_dims = None): def get_input_dim_active_dims(self, kernels, extra_dims = None):
#active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int)) #active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
#active_dims = np.array(np.concatenate((active_dims, extra_dims if extra_dims is not None else [])), dtype=int) #active_dims = np.array(np.concatenate((active_dims, extra_dims if extra_dims is not None else [])), dtype=int)
input_dim = np.array([k.input_dim for k in kernels]) input_dim = reduce(max, (k.active_dims.max() for k in kernels)) + 1
if np.all(input_dim[0]==input_dim):
input_dim = input_dim[0] if extra_dims is not None:
active_dims = None input_dim += extra_dims.size
active_dims = np.arange(input_dim)
return input_dim, active_dims return input_dim, active_dims
def input_sensitivity(self): def input_sensitivity(self):
raise NotImplementedError("Choose the kernel you want to get the sensitivity for. You need to override the default behaviour for getting the input sensitivity to be able to get the input sensitivity. For sum kernel it is the sum of all sensitivities, TODO: product kernel? Other kernels?, also TODO: shall we return all the sensitivities here in the combination kernel? So we can combine them however we want? This could lead to just plot all the sensitivities here...") raise NotImplementedError("Choose the kernel you want to get the sensitivity for. You need to override the default behaviour for getting the input sensitivity to be able to get the input sensitivity. For sum kernel it is the sum of all sensitivities, TODO: product kernel? Other kernels?, also TODO: shall we return all the sensitivities here in the combination kernel? So we can combine them however we want? This could lead to just plot all the sensitivities here...")
def _check_input_dim(self, X): def _check_active_dims(self, X):
return return
def _check_input_dim(self, X): def _check_input_dim(self, X):

View file

@ -12,6 +12,7 @@ from ...core.parameterization.transformations import Logexp
from ...util.caching import Cache_this from ...util.caching import Cache_this
from ...core.parameterization import variational from ...core.parameterization import variational
from psi_comp import linear_psi_comp from psi_comp import linear_psi_comp
from ...util.config import *
class Linear(Kern): class Linear(Kern):
""" """
@ -224,12 +225,23 @@ class Linear(Kern):
AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :] AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
AZZA = AZZA + AZZA.swapaxes(1, 2) AZZA = AZZA + AZZA.swapaxes(1, 2)
AZZA_2 = AZZA/2. AZZA_2 = AZZA/2.
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(m,mm,q,qq,factor,tmp)'
header_string = '#include <omp.h>'
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'],
'extra_link_args' : ['-lgomp'],
'libraries': ['gomp']}
else:
pragma_string = ''
header_string = ''
weave_options = {'extra_compile_args': ['-O3']}
#Using weave, we can exploit the symmetry of this problem: #Using weave, we can exploit the symmetry of this problem:
code = """ code = """
int n, m, mm,q,qq; int n, m, mm,q,qq;
double factor,tmp; double factor,tmp;
#pragma omp parallel for private(m,mm,q,qq,factor,tmp) %s
for(n=0;n<N;n++){ for(n=0;n<N;n++){
for(m=0;m<num_inducing;m++){ for(m=0;m<num_inducing;m++){
for(mm=0;mm<=m;mm++){ for(mm=0;mm<=m;mm++){
@ -253,26 +265,36 @@ class Linear(Kern):
} }
} }
} }
""" """ % pragma_string
support_code = """ support_code = """
#include <omp.h> %s
#include <math.h> #include <math.h>
""" """ % header_string
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
mu = vp.mean mu = vp.mean
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu) N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code,
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options) type_converters=weave.converters.blitz,**weave_options)
def _weave_dpsi2_dZ(self, dL_dpsi2, Z, vp, target): def _weave_dpsi2_dZ(self, dL_dpsi2, Z, vp, target):
AZA = self.variances*self._ZAinner(vp, Z) AZA = self.variances*self._ZAinner(vp, Z)
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(n,mm,q)'
header_string = '#include <omp.h>'
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'],
'extra_link_args' : ['-lgomp'],
'libraries': ['gomp']}
else:
pragma_string = ''
header_string = ''
weave_options = {'extra_compile_args': ['-O3']}
code=""" code="""
int n,m,mm,q; int n,m,mm,q;
#pragma omp parallel for private(n,mm,q) %s
for(m=0;m<num_inducing;m++){ for(m=0;m<num_inducing;m++){
for(q=0;q<input_dim;q++){ for(q=0;q<input_dim;q++){
for(mm=0;mm<num_inducing;mm++){ for(mm=0;mm<num_inducing;mm++){
@ -282,18 +304,15 @@ class Linear(Kern):
} }
} }
} }
""" """ % pragma_string
support_code = """ support_code = """
#include <omp.h> %s
#include <math.h> #include <math.h>
""" """ % header_string
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim = vp.mean.shape[0],Z.shape[0],vp.mean.shape[1] N,num_inducing,input_dim = vp.mean.shape[0],Z.shape[0],vp.mean.shape[1]
mu = param_to_array(vp.mean) mu = param_to_array(vp.mean)
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code,
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'], arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options) type_converters=weave.converters.blitz,**weave_options)

42
GPy/kern/_src/poly.py Normal file
View file

@ -0,0 +1,42 @@
# Copyright (c) 2014, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kern import Kern
from ...util.misc import param_to_array
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Poly(Kern):
"""
Polynomial kernel
"""
def __init__(self, input_dim, variance=1., order=3., active_dims=None, name='poly'):
super(Poly, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.add_parameter(self.variance)
self.order=order
def K(self, X, X2=None):
return (self._dot_product(X, X2) + 1.)**self.order * self.variance
def _dot_product(self, X, X2=None):
if X2 is None:
return np.dot(X, X.T)
else:
return np.dot(X, X2.T)
def Kdiag(self, X):
return self.variance*(np.square(X).sum(1) + 1.)**self.order
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.sum(dL_dK * (self._dot_product(X, X2) + 1.)**self.order)
def update_gradients_diag(self, dL_dKdiag, X):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2=None):
raise NotImplementedError
def gradients_X_diag(self, dL_dKdiag, X):
raise NotImplementedError

View file

@ -10,6 +10,7 @@ from GPy.util.caching import Cache_this
from ...core.parameterization import variational from ...core.parameterization import variational
from psi_comp import ssrbf_psi_comp from psi_comp import ssrbf_psi_comp
from psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF from psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF
from ...util.config import *
class RBF(Stationary): class RBF(Stationary):
""" """
@ -231,6 +232,16 @@ class RBF(Stationary):
@Cache_this(limit=1) @Cache_this(limit=1)
def _psi2computations(self, Z, vp): def _psi2computations(self, Z, vp):
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(tmp, exponent_tmp)'
header_string = '#include <omp.h>'
libraries = ['gomp']
else:
pragma_string = ''
header_string = ''
libraries = []
mu, S = vp.mean, vp.variance mu, S = vp.mean, vp.variance
N, Q = mu.shape N, Q = mu.shape
@ -253,8 +264,7 @@ class RBF(Stationary):
variance_sq = float(np.square(self.variance)) variance_sq = float(np.square(self.variance))
code = """ code = """
double tmp, exponent_tmp; double tmp, exponent_tmp;
%s
#pragma omp parallel for private(tmp, exponent_tmp)
for (int n=0; n<N; n++) for (int n=0; n<N; n++)
{ {
for (int m=0; m<M; m++) for (int m=0; m<M; m++)
@ -278,20 +288,20 @@ class RBF(Stationary):
tmp = -Zdist_sq(m,mm,q) - tmp - half_log_denom(n,q); tmp = -Zdist_sq(m,mm,q) - tmp - half_log_denom(n,q);
exponent_tmp += tmp; exponent_tmp += tmp;
} }
//compute psi2 by exponontiating //compute psi2 by exponentiating
psi2(n,m,mm) = variance_sq * exp(exponent_tmp); psi2(n,m,mm) = variance_sq * exp(exponent_tmp);
psi2(n,mm,m) = psi2(n,m,mm); psi2(n,mm,m) = psi2(n,m,mm);
} }
} }
} }
""" """ % pragma_string
support_code = """ support_code = """
#include <omp.h> %s
#include <math.h> #include <math.h>
""" """ % header_string
mu = param_to_array(mu) mu = param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=libraries,
arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'], arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options) type_converters=weave.converters.blitz, **self.weave_options)
@ -303,12 +313,20 @@ class RBF(Stationary):
#return 2.*np.einsum( 'ijk,ijk,ijkl,il->l', dL_dpsi2, psi2, Zdist_sq * (2.*S[:,None,None,:]/l2 + 1.) + mudist_sq + S[:, None, None, :] / l2, 1./(2.*S + l2))*self.lengthscale #return 2.*np.einsum( 'ijk,ijk,ijkl,il->l', dL_dpsi2, psi2, Zdist_sq * (2.*S[:,None,None,:]/l2 + 1.) + mudist_sq + S[:, None, None, :] / l2, 1./(2.*S + l2))*self.lengthscale
result = np.zeros(self.input_dim) result = np.zeros(self.input_dim)
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for reduction(+:tmp)'
header_string = '#include <omp.h>'
libraries = ['gomp']
else:
pragma_string = ''
header_string = ''
libraries = []
code = """ code = """
double tmp; double tmp;
for(int q=0; q<Q; q++) for(int q=0; q<Q; q++)
{ {
tmp = 0.0; tmp = 0.0;
#pragma omp parallel for reduction(+:tmp) %s
for(int n=0; n<N; n++) for(int n=0; n<N; n++)
{ {
for(int m=0; m<M; m++) for(int m=0; m<M; m++)
@ -326,16 +344,16 @@ class RBF(Stationary):
result(q) = tmp; result(q) = tmp;
} }
""" """ % pragma_string
support_code = """ support_code = """
#include <omp.h> %s
#include <math.h> #include <math.h>
""" """ % header_string
N,Q = S.shape N,Q = S.shape
M = psi2.shape[-1] M = psi2.shape[-1]
S = param_to_array(S) S = param_to_array(S)
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=libraries,
arg_names=['psi2', 'dL_dpsi2', 'N', 'M', 'Q', 'mudist_sq', 'l2', 'Zdist_sq', 'S', 'result'], arg_names=['psi2', 'dL_dpsi2', 'N', 'M', 'Q', 'mudist_sq', 'l2', 'Zdist_sq', 'S', 'result'],
type_converters=weave.converters.blitz, **self.weave_options) type_converters=weave.converters.blitz, **self.weave_options)

View file

@ -192,6 +192,27 @@ class Exponential(Stationary):
def dK_dr(self, r): def dK_dr(self, r):
return -0.5*self.K_of_r(r) return -0.5*self.K_of_r(r)
class OU(Stationary):
"""
OU kernel:
.. math::
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='OU'):
super(OU, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * np.exp(-r)
def dK_dr(self,r):
return -1.*self.variance*np.exp(-r)
class Matern32(Stationary): class Matern32(Stationary):
""" """
Matern 3/2 kernel: Matern 3/2 kernel:

View file

@ -6,18 +6,18 @@ from poisson import Poisson
from student_t import StudentT from student_t import StudentT
from likelihood import Likelihood from likelihood import Likelihood
from mixed_noise import MixedNoise from mixed_noise import MixedNoise
# TODO need to fix this in a config file. #TODO need to fix this in a config file.
# TODO need to add the files to the git repo! #TODO need to add the files to the git repo!
#try: try:
#import sympy as sym import sympy as sym
#sympy_available=True sympy_available=True
#except ImportError: except ImportError:
#sympy_available=False sympy_available=False
#if sympy_available: if sympy_available:
## These are likelihoods that rely on symbolic. #These are likelihoods that rely on symbolic.
#from symbolic import Symbolic from symbolic import Symbolic
#from sstudent_t import SstudentT from sstudent_t import SstudentT
#from negative_binomial import Negative_binomial from negative_binomial import Negative_binomial
##from skew_normal import Skew_normal from skew_normal import Skew_normal
#from skew_exponential import Skew_exponential from skew_exponential import Skew_exponential
#from null_category import Null_category # from null_category import Null_category

View file

@ -0,0 +1,48 @@
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sympy as sym
from GPy.util.symbolic import gammaln, normcdfln, normcdf, IndMatrix, create_matrix
import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions
from symbolic import Symbolic
from scipy import stats
class Ordinal(Symbolic):
"""
Ordinal
.. math::
p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
.. Note::
Y takes non zero integer values..
link function should have a positive domain, e.g. log (default).
.. See also::
symbolic.py, for the parent class
"""
def __init__(self, categories=3, gp_link=None):
if gp_link is None:
gp_link = link_functions.Identity()
dispersion = sym.Symbol('width', positive=True, real=True)
y_0 = sym.Symbol('y_0', nonnegative=True, integer=True)
f_0 = sym.Symbol('f_0', positive=True, real=True)
log_pdf = create_matrix('log_pdf', 1, categories)
log_pdf[0] = normcdfln(-f_0)
if categories>2:
w = create_matrix('w', 1, categories)
log_pdf[categories-1] = normcdfln(w.sum() + f_0)
for i in range(1, categories-1):
log_pdf[i] = sym.log(normcdf(w[0, 0:i-1].sum() + f_0) - normcdf(w[0, 0:i].sum()-f_0) )
else:
log_pdf[1] = normcdfln(f_0)
log_pdf.index_var = y_0
super(Ordinal, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Ordinal')
# TODO: Check this.
self.log_concave = True

View file

@ -42,7 +42,7 @@ class BayesianGPLVM(SparseGP):
assert Z.shape[1] == X.shape[1] assert Z.shape[1] == X.shape[1]
if kernel is None: if kernel is None:
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim) kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim)
if likelihood is None: if likelihood is None:
likelihood = Gaussian() likelihood = Gaussian()

View file

@ -97,7 +97,7 @@ def plot_latent(model, labels=None, which_indices=None,
elif type(ul) is np.int64: elif type(ul) is np.int64:
this_label = 'class %i' % ul this_label = 'class %i' % ul
else: else:
this_label = 'class %i' % i this_label = unicode(ul)
m = marker.next() m = marker.next()
index = np.nonzero(labels == ul)[0] index = np.nonzero(labels == ul)[0]
@ -121,7 +121,7 @@ def plot_latent(model, labels=None, which_indices=None,
if plot_inducing: if plot_inducing:
Z = param_to_array(model.Z) Z = param_to_array(model.Z)
ax.plot(Z[:, input_1], Z[:, input_2], '^w') ax.plot(Z[:, input_1], Z[:, input_2], '^w')
ax.set_xlim((xmin, xmax)) ax.set_xlim((xmin, xmax))
ax.set_ylim((ymin, ymax)) ax.set_ylim((ymin, ymax))
@ -132,7 +132,7 @@ def plot_latent(model, labels=None, which_indices=None,
except Exception as e: except Exception as e:
print "Could not invoke tight layout: {}".format(e) print "Could not invoke tight layout: {}".format(e)
pass pass
if updates: if updates:
try: try:
ax.figure.canvas.show() ax.figure.canvas.show()

View file

@ -14,7 +14,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[], which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None, levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False, plot_raw=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None): linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx'):
""" """
Plot the posterior of the GP. Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations. - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -97,7 +97,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
for d in which_data_ycols: for d in which_data_ycols:
plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol) plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5) plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
#optionally plot some samples #optionally plot some samples
if samples: #NOTE not tested with fixed_inputs if samples: #NOTE not tested with fixed_inputs

View file

@ -74,13 +74,16 @@ class vector_show(matplotlib_show):
""" """
def __init__(self, vals, axes=None): def __init__(self, vals, axes=None):
matplotlib_show.__init__(self, vals, axes) matplotlib_show.__init__(self, vals, axes)
self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals) #assert vals.ndim == 2, "Please give a vector in [n x 1] to plot"
#assert vals.shape[1] == 1, "only showing a vector in one dimension"
self.size = vals.size
self.handle = self.axes.plot(np.arange(0, vals.size)[:, None], vals)[0]
def modify(self, vals): def modify(self, vals):
self.vals = vals.copy() self.vals = vals.copy()
for handle, vals in zip(self.handle, self.vals.T): xdata, ydata = self.handle.get_data()
xdata, ydata = handle.get_data() assert vals.size == self.size, "values passed into modify changed size! vals.size:{} != in.size:{}".format(vals.size, self.size)
handle.set_data(xdata, vals) self.handle.set_data(xdata, self.vals)
self.axes.figure.canvas.draw() self.axes.figure.canvas.draw()
@ -94,13 +97,12 @@ class lvm(matplotlib_show):
:type data_visualize: visualize.data_show type. :type data_visualize: visualize.data_show type.
:param latent_axes: the axes where the latent visualization should be plotted. :param latent_axes: the axes where the latent visualization should be plotted.
""" """
if vals == None: if vals is None:
if isinstance(model.X, VariationalPosterior): if isinstance(model.X, VariationalPosterior):
vals = param_to_array(model.X.mean) vals = param_to_array(model.X.mean)
else: else:
vals = param_to_array(model.X) vals = param_to_array(model.X)
vals = param_to_array(vals)
matplotlib_show.__init__(self, vals, axes=latent_axes) matplotlib_show.__init__(self, vals, axes=latent_axes)
if isinstance(latent_axes,mpl.axes.Axes): if isinstance(latent_axes,mpl.axes.Axes):
@ -273,7 +275,7 @@ class image_show(matplotlib_show):
:type preset_mean: double :type preset_mean: double
:param preset_std: the preset standard deviation of a scaled image. :param preset_std: the preset standard deviation of a scaled image.
:type preset_std: double""" :type preset_std: double"""
def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, order='C', invert=False, scale=False, palette=[], preset_mean = 0., preset_std = -1., select_image=0): def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, order='C', invert=False, scale=False, palette=[], preset_mean=0., preset_std=1., select_image=0):
matplotlib_show.__init__(self, vals, axes) matplotlib_show.__init__(self, vals, axes)
self.dimensions = dimensions self.dimensions = dimensions
self.transpose = transpose self.transpose = transpose
@ -323,13 +325,12 @@ class image_show(matplotlib_show):
self.vals = -self.vals self.vals = -self.vals
# un-normalizing, for visualisation purposes: # un-normalizing, for visualisation purposes:
if self.preset_std >= 0: # The Mean is assumed to be in the range (0,255) self.vals = self.vals*self.preset_std + self.preset_mean
self.vals = self.vals*self.preset_std + self.preset_mean # Clipping the values:
# Clipping the values: #self.vals[self.vals < 0] = 0
self.vals[self.vals < 0] = 0 #self.vals[self.vals > 255] = 255
self.vals[self.vals > 255] = 255 #else:
else: #self.vals = 255*(self.vals - self.vals.min())/(self.vals.max() - self.vals.min())
self.vals = 255*(self.vals - self.vals.min())/(self.vals.max() - self.vals.min())
if not self.palette == []: # applying using an image palette (e.g. if the image has been quantized) if not self.palette == []: # applying using an image palette (e.g. if the image has been quantized)
from PIL import Image from PIL import Image
self.vals = Image.fromarray(self.vals.astype('uint8')) self.vals = Image.fromarray(self.vals.astype('uint8'))

View file

@ -304,23 +304,13 @@ class KernelTestsMiscellaneous(unittest.TestCase):
def setUp(self): def setUp(self):
N, D = 100, 10 N, D = 100, 10
self.X = np.linspace(-np.pi, +np.pi, N)[:,None] * np.random.uniform(-10,10,D) self.X = np.linspace(-np.pi, +np.pi, N)[:,None] * np.random.uniform(-10,10,D)
self.rbf = GPy.kern.RBF(2, active_dims=slice(0,4,2)) self.rbf = GPy.kern.RBF(2, active_dims=np.arange(0,4,2))
self.linear = GPy.kern.Linear(2, active_dims=(3,9)) self.linear = GPy.kern.Linear(2, active_dims=(3,9))
self.matern = GPy.kern.Matern32(3, active_dims=np.array([1,7,9])) self.matern = GPy.kern.Matern32(3, active_dims=np.array([1,7,9]))
self.sumkern = self.rbf + self.linear self.sumkern = self.rbf + self.linear
self.sumkern += self.matern self.sumkern += self.matern
self.sumkern.randomize() self.sumkern.randomize()
def test_active_dims(self):
# test the automatic dim detection expression for slices:
start, stop = 0, 277
for i in range(start,stop,7):
for j in range(1,4):
GPy.kern.Kern(int(np.round((i+1)/j)), slice(0, i+1, j), "testkern")
# test the ability to have only one dim
sk = GPy.kern.RBF(2) + GPy.kern.Matern32(2)
self.assertEqual(sk.input_dim, 2)
def test_which_parts(self): def test_which_parts(self):
self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.matern]), self.linear.K(self.X)+self.matern.K(self.X))) self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.matern]), self.linear.K(self.X)+self.matern.K(self.X)))
self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.rbf]), self.linear.K(self.X)+self.rbf.K(self.X))) self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.rbf]), self.linear.K(self.X)+self.rbf.K(self.X)))
@ -344,10 +334,15 @@ class KernelTestsNonContinuous(unittest.TestCase):
self.X2[(N0*2):, -1] = 1 self.X2[(N0*2):, -1] = 1
def test_IndependentOutputs(self): def test_IndependentOutputs(self):
k = GPy.kern.RBF(self.D) k = GPy.kern.RBF(self.D, active_dims=range(self.D))
kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, active_dims=range(self.D), name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
def test_Hierarchical(self):
k = [GPy.kern.RBF(2, active_dims=[0,2], name='rbf1'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf2')]
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))

View file

@ -27,11 +27,11 @@ class ArrayCoreTest(unittest.TestCase):
class ParameterizedTest(unittest.TestCase): class ParameterizedTest(unittest.TestCase):
def setUp(self): def setUp(self):
self.rbf = GPy.kern.RBF(1) self.rbf = GPy.kern.RBF(20)
self.white = GPy.kern.White(1) self.white = GPy.kern.White(1)
from GPy.core.parameterization import Param from GPy.core.parameterization import Param
from GPy.core.parameterization.transformations import Logistic from GPy.core.parameterization.transformations import Logistic
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1)) self.param = Param('param', np.random.uniform(0,1,(25,2)), Logistic(0, 1))
self.test1 = GPy.core.Parameterized("test model") self.test1 = GPy.core.Parameterized("test model")
self.test1.param = self.param self.test1.param = self.param
@ -142,6 +142,8 @@ class ParameterizedTest(unittest.TestCase):
self.testmodel.randomize() self.testmodel.randomize()
self.assertEqual(val, self.testmodel.kern.lengthscale) self.assertEqual(val, self.testmodel.kern.lengthscale)
def test_regular_expression_misc(self): def test_regular_expression_misc(self):
self.testmodel.kern.lengthscale.fix() self.testmodel.kern.lengthscale.fix()
val = float(self.testmodel.kern.lengthscale) val = float(self.testmodel.kern.lengthscale)

View file

@ -132,6 +132,9 @@ class Test(ListDictTestCase):
self.assertIsNot(par.full_gradient, pcopy.full_gradient) self.assertIsNot(par.full_gradient, pcopy.full_gradient)
self.assertTrue(pcopy.checkgrad()) self.assertTrue(pcopy.checkgrad())
self.assert_(np.any(pcopy.gradient!=0.0)) self.assert_(np.any(pcopy.gradient!=0.0))
pcopy.optimize('bfgs')
par.optimize('bfgs')
np.testing.assert_allclose(pcopy.param_array, par.param_array, atol=.001)
with tempfile.TemporaryFile('w+b') as f: with tempfile.TemporaryFile('w+b') as f:
par.pickle(f) par.pickle(f)
f.seek(0) f.seek(0)

View file

@ -1,84 +1,108 @@
from ..core.parameterization.parameter_core import Observable from ..core.parameterization.parameter_core import Observable
import itertools import itertools, collections, weakref
class Cacher(object): class Cacher(object):
"""
"""
def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()): def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()):
"""
Parameters:
***********
:param callable operation: function to cache
:param int limit: depth of cacher
:param [int] ignore_args: list of indices, pointing at arguments to ignore in *args of operation(*args). This includes self!
:param [str] force_kwargs: list of kwarg names (strings). If a kwarg with that name is given, the cacher will force recompute and wont cache anything.
"""
self.limit = int(limit) self.limit = int(limit)
self.ignore_args = ignore_args self.ignore_args = ignore_args
self.force_kwargs = force_kwargs self.force_kwargs = force_kwargs
self.operation=operation self.operation=operation
self.cached_inputs = [] self.order = collections.deque()
self.cached_outputs = [] self.cached_inputs = {} # point from cache_ids to a list of [ind_ids], which where used in cache cache_id
self.inputs_changed = []
#=======================================================================
# point from each ind_id to [ref(obj), cache_ids]
# 0: a weak reference to the object itself
# 1: the cache_ids in which this ind_id is used (len will be how many times we have seen this ind_id)
self.cached_input_ids = {}
#=======================================================================
self.cached_outputs = {} # point from cache_ids to outputs
self.inputs_changed = {} # point from cache_ids to bools
def combine_args_kw(self, args, kw):
"Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute"
return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0]))
def preprocess(self, combined_args_kw, ignore_args):
"get the cacheid (conc. string of argument ids in order) ignoring ignore_args"
return "".join(str(id(a)) for i,a in enumerate(combined_args_kw) if i not in ignore_args)
def ensure_cache_length(self, cache_id):
"Ensures the cache is within its limits and has one place free"
if len(self.order) == self.limit:
# we have reached the limit, so lets release one element
cache_id = self.order.popleft()
combined_args_kw = self.cached_inputs[cache_id]
for ind in combined_args_kw:
ind_id = id(ind)
ref, cache_ids = self.cached_input_ids[ind_id]
if len(cache_ids) == 1 and ref() is not None:
ref().remove_observer(self, self.on_cache_changed)
del self.cached_input_ids[ind_id]
else:
cache_ids.remove(cache_id)
self.cached_input_ids[ind_id] = [ref, cache_ids]
del self.cached_outputs[cache_id]
del self.inputs_changed[cache_id]
del self.cached_inputs[cache_id]
def add_to_cache(self, cache_id, combined_args_kw, output):
self.inputs_changed[cache_id] = False
self.cached_outputs[cache_id] = output
self.order.append(cache_id)
self.cached_inputs[cache_id] = combined_args_kw
for a in combined_args_kw:
ind_id = id(a)
v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []])
v[1].append(cache_id)
if len(v[1]) == 1:
a.add_observer(self, self.on_cache_changed)
self.cached_input_ids[ind_id] = v
def __call__(self, *args, **kw): def __call__(self, *args, **kw):
""" """
A wrapper function for self.operation, A wrapper function for self.operation,
""" """
#ensure that specified arguments are ignored # 1: Check whether we have forced recompute arguments:
items = sorted(kw.items(), key=lambda x: x[0])
oa_all = args + tuple(a for _,a in items)
if len(self.ignore_args) != 0:
oa = [a for i,a in itertools.chain(enumerate(args), items) if i not in self.ignore_args and i not in self.force_kwargs]
else:
oa = oa_all
# this makes sure we only add an observer once, and that None can be in args
observable_args = []
for a in oa:
if (not any(a is ai for ai in observable_args)) and a is not None:
observable_args.append(a)
#make sure that all the found argument really are observable:
#otherswise don't cache anything, pass args straight though
if not all([isinstance(arg, Observable) for arg in observable_args]):
return self.operation(*args, **kw)
if len(self.force_kwargs) != 0: if len(self.force_kwargs) != 0:
# check if there are force args, which force reloading
for k in self.force_kwargs: for k in self.force_kwargs:
if k in kw and kw[k] is not None: if k in kw and kw[k] is not None:
return self.operation(*args, **kw) return self.operation(*args, **kw)
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
# return self.operation(*args, **kw)
#if the result is cached, return the cached computation # 2: preprocess and get the unique id string for this call
state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs] combined_args_kw = self.combine_args_kw(args, kw)
cache_id = self.preprocess(combined_args_kw, self.ignore_args)
# 2: if anything is not cachable, we will just return the operation, without caching
if reduce(lambda a,b: a or (not isinstance(b, Observable)), combined_args_kw, False):
return self.operation(*args, **kw)
# 3&4: check whether this cache_id has been cached, then has it changed?
try: try:
if any(state): if(self.inputs_changed[cache_id]):
i = state.index(True) # 4: This happens, when one element has changed for this cache id
if self.inputs_changed[i]: self.inputs_changed[cache_id] = False
#(elements of) the args have changed since we last computed: update self.cached_outputs[cache_id] = self.operation(*args, **kw)
self.cached_outputs[i] = self.operation(*args, **kw) except KeyError:
self.inputs_changed[i] = False # 3: This is when we never saw this chache_id:
return self.cached_outputs[i] self.ensure_cache_length(cache_id)
else: self.add_to_cache(cache_id, combined_args_kw, self.operation(*args, **kw))
#first time we've seen these arguments: compute
#first make sure the depth limit isn't exceeded
if len(self.cached_inputs) == self.limit:
args_ = self.cached_inputs.pop(0)
args_ = [a for i,a in enumerate(args_) if i not in self.ignore_args and i not in self.force_kwargs]
[a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None]
self.inputs_changed.pop(0)
self.cached_outputs.pop(0)
#compute
self.cached_inputs.append(oa_all)
self.cached_outputs.append(self.operation(*args, **kw))
self.inputs_changed.append(False)
[a.add_observer(self, self.on_cache_changed) for a in observable_args]
return self.cached_outputs[-1]#return
except: except:
self.reset() self.reset()
raise raise
# 5: We have seen this cache_id and it is cached:
return self.cached_outputs[cache_id]
def on_cache_changed(self, direct, which=None): def on_cache_changed(self, direct, which=None):
""" """
@ -86,17 +110,19 @@ class Cacher(object):
this function gets 'hooked up' to the inputs when we cache them, and upon their elements being changed we update here. this function gets 'hooked up' to the inputs when we cache them, and upon their elements being changed we update here.
""" """
self.inputs_changed = [any([a is direct or a is which for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)] for ind_id in [id(direct), id(which)]:
_, cache_ids = self.cached_input_ids.get(ind_id, [None, []])
for cache_id in cache_ids:
self.inputs_changed[cache_id] = True
def reset(self): def reset(self):
""" """
Totally reset the cache Totally reset the cache
""" """
[[a.remove_observer(self, self.on_cache_changed) for a in args if isinstance(a, Observable)] for args in self.cached_inputs] [a().remove_observer(self, self.on_cache_changed) if (a() is not None) else None for [a, _] in self.cached_input_ids.values()]
[[a.remove_observer(self, self.reset) for a in args if isinstance(a, Observable)] for args in self.cached_inputs] self.cached_input_ids = {}
self.cached_inputs = [] self.cached_outputs = {}
self.cached_outputs = [] self.inputs_changed = {}
self.inputs_changed = []
def __deepcopy__(self, memo=None): def __deepcopy__(self, memo=None):
return Cacher(self.operation, self.limit, self.ignore_args, self.force_kwargs) return Cacher(self.operation, self.limit, self.ignore_args, self.force_kwargs)

View file

@ -1,409 +1,532 @@
{ {
"rogers_girolami_data":{ "ankur_pose_data": {
"files":[ "citation": "3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.",
[ "details": "Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing.",
"firstcoursemldata.tar.gz" "files": [
] [
], "ankurDataPoseSilhouette.mat"
"license":null, ]
"citation":"A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146", ],
"details":"Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.", "license": null,
"urls":[ "size": 1,
"https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/" "urls": [
], "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/ankur_pose_data/"
"suffices":[ ]
[ },
"?dl=1" "boston_housing": {
] "citation": "Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.",
], "details": "The Boston Housing data relates house values in Boston to a range of input variables.",
"size":21949154 "files": [
}, [
"ankur_pose_data":{ "Index",
"files":[ "housing.data",
[ "housing.names"
"ankurDataPoseSilhouette.mat" ]
] ],
], "license": null,
"citation":"3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.", "size": 51276,
"license":null, "urls": [
"urls":[ "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/"
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/ankur_pose_data/" ]
], },
"details":"Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing.", "boxjenkins_airline": {
"size":1 "citation": "Box & Jenkins (1976), in file: data/airpass, Description: International airline passengers: monthly totals in thousands. Jan 49 \\u2013 Dec 60",
}, "details": "International airline passengers, monthly totals from January 1949 to December 1960.",
"football_data":{ "files": [
"files":[ [
[ "boxjenkins_airline.csv"
"E0.csv", "E1.csv", "E2.csv", "E3.csv" ]
] ],
], "license": "You may copy and redistribute the data. You may make derivative works from the data. You may use the data for commercial purposes. You may not sublicence the data when redistributing it. You may not redistribute the data under a different license. Source attribution on any use of this data: Must refer source.",
"citation":"", "size": 46779,
"license":null, "urls": [
"urls":[ "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/boxjenkins_airline/"
"http://www.football-data.co.uk/mmz4281/" ]
], },
"details":"Results of English football matches since 1993/94 season.", "brendan_faces": {
"size":1 "citation": "Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.",
}, "details": "A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.",
"google_trends":{ "files": [
"files":[ [
[ "frey_rawface.mat"
] ]
], ],
"citation":"", "license": null,
"license":null, "size": 1100584,
"urls":[ "urls": [
"http://www.google.com/trends/" "http://www.cs.nyu.edu/~roweis/data/"
], ]
"details":"Google trends results.", },
"size":0 "cmu_mocap_full": {
}, "citation": "Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.\\nThe database was created with funding from NSF EIA-0196217.",
"osu_accad":{ "details": "CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.",
"files":[ "files": [
[ [
"swagger1TXT.ZIP", "allasfamc.zip"
"handspring1TXT.ZIP", ]
"quickwalkTXT.ZIP", ],
"run1TXT.ZIP", "license": "From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.",
"sprintTXT.ZIP", "size": null,
"dogwalkTXT.ZIP", "urls": [
"camper_04TXT.ZIP", "http://mocap.cs.cmu.edu/subjects"
"dance_KB3_TXT.ZIP", ]
"per20_TXT.ZIP", },
"perTWO07_TXT.ZIP", "creep_rupture": {
"perTWO13_TXT.ZIP", "citation": "Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.",
"perTWO14_TXT.ZIP", "details": "Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.",
"perTWO15_TXT.ZIP", "files": [
"perTWO16_TXT.ZIP" [
], "creeprupt.tar"
[ ]
"connections.txt" ],
] "license": null,
], "size": 602797,
"license":"Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).", "urls": [
"citation":"The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.", "http://www.msm.cam.ac.uk/map/data/tar/"
"details":"Motion capture data of different motions from the Open Motion Data Project at Ohio State University.", ]
"urls":[ },
"http://accad.osu.edu/research/mocap/data/", "decampos_characters": {
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/" "citation": "T. de Campos, B. R. Babu, and M. Varma. Character recognition in natural images. VISAPP 2009.",
], "details": "Examples of hand written digits taken from the de Campos et al paper on Character Recognition in Natural Images.",
"size":15922790 "files": [
}, [
"isomap_face_data":{ "characters.npy",
"files":[ "digits.npy"
[ ]
"face_data.mat" ],
] "license": null,
], "size": 2031872,
"license":null, "urls": [
"citation":"A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000", "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/decampos_digits/"
"details":"Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.", ]
"urls":[ },
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/isomap_face_data/" "della_gatta": {
], "citation": "Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008",
"size":24229368 "details": "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
}, "files": [
"boston_housing":{ [
"files":[ "DellaGattadata.mat"
[ ]
"Index", ],
"housing.data", "license": null,
"housing.names" "size": 3729650,
] "urls": [
], "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/della_gatta/"
"license":null, ]
"citation":"Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.", },
"details":"The Boston Housing data relates house values in Boston to a range of input variables.", "epomeo_gpx": {
"urls":[ "citation": "",
"http://archive.ics.uci.edu/ml/machine-learning-databases/housing/" "details": "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
], "files": [
"size":51276 [
}, "endomondo_1.gpx",
"cmu_mocap_full":{ "endomondo_2.gpx",
"files":[ "garmin_watch_via_endomondo.gpx",
[ "viewranger_phone.gpx",
"allasfamc.zip" "viewranger_tablet.gpx"
] ]
], ],
"license":"From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.", "license": null,
"citation":"Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.\nThe database was created with funding from NSF EIA-0196217.", "size": 2031872,
"details":"CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.", "urls": [
"urls":[ "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/epomeo_gpx/"
"http://mocap.cs.cmu.edu/subjects" ]
], },
"size":null "football_data": {
}, "citation": "",
"brendan_faces":{ "details": "Results of English football matches since 1993/94 season.",
"files":[ "files": [
[ [
"frey_rawface.mat" "E0.csv",
] "E1.csv",
], "E2.csv",
"license":null, "E3.csv"
"citation":"Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.", ]
"details":"A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.", ],
"urls":[ "license": null,
"http://www.cs.nyu.edu/~roweis/data/" "size": 1,
], "urls": [
"size":1100584 "http://www.football-data.co.uk/mmz4281/"
}, ]
"olympic_marathon_men":{ },
"files":[ "fruitfly_tomancak": {
[ "citation": "",
"olympicMarathonTimes.csv" "details": "",
] "files": [
], [
"license":null, "tomancak_exprs.csv",
"citation":null, "tomancak_se.csv",
"details":"Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data", "tomancak_prctile5.csv",
"urls":[ "tomancak_prctile25.csv",
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olympic_marathon_men/" "tomancak_prctile50.csv",
], "tomancak_prctile75.csv",
"size":584 "tomancak_prctile95.csv"
}, ]
"pumadyn-32nm":{ ],
"files":[ "license": null,
[ "size": 59000000,
"pumadyn-32nm.tar.gz" "urls": [
] "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/fruitfly_tomancak/"
], ]
"license":"Data is made available by the Delve system at the University of Toronto", },
"citation":"Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.", "fruitfly_tomancak_cel_files": {
"details":"Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.", "citation": "'Systematic determination of patterns of gene expression during Drosophila embryogenesis' Pavel Tomancak, Amy Beaton, Richard Weiszmann, Elaine Kwan, ShengQiang Shu, Suzanna E Lewis, Stephen Richards, Michael Ashburner, Volker Hartenstein, Susan E Celniker, and Gerald M Rubin",
"urls":[ "details": "Gene expression results from blastoderm development in Drosophila Melanogaster.",
"ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/" "files": [
], [
"size":5861646 "embryo_tc_4_1.CEL",
}, "embryo_tc_4_2.CEL",
"ripley_prnn_data":{ "embryo_tc_4_3.CEL",
"files":[ "embryo_tc_4_4.CEL",
[ "embryo_tc_4_5.CEL",
"Cushings.dat", "embryo_tc_4_6.CEL",
"README", "embryo_tc_4_7.CEL",
"crabs.dat", "embryo_tc_4_8.CEL",
"fglass.dat", "embryo_tc_4_9.CEL",
"fglass.grp", "embryo_tc_4_10.CEL",
"pima.te", "embryo_tc_4_11.CEL",
"pima.tr", "embryo_tc_4_12.CEL",
"pima.tr2", "embryo_tc_6_1.CEL",
"synth.te", "embryo_tc_6_2.CEL",
"synth.tr", "embryo_tc_6_3.CEL",
"viruses.dat", "embryo_tc_6_4.CEL",
"virus3.dat" "embryo_tc_6_5.CEL",
] "embryo_tc_6_6.CEL",
], "embryo_tc_6_7.CEL",
"license":null, "embryo_tc_6_8.CEL",
"citation":"Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7", "embryo_tc_6_9.CEL",
"details":"Data sets from Brian Ripley's Pattern Recognition and Neural Networks", "embryo_tc_6_10.CEL",
"urls":[ "embryo_tc_6_11.CEL",
"http://www.stats.ox.ac.uk/pub/PRNN/" "embryo_tc_6_12.CEL",
], "embryo_tc_8_1.CEL",
"size":93565 "embryo_tc_8_2.CEL",
}, "embryo_tc_8_3.CEL",
"three_phase_oil_flow":{ "embryo_tc_8_4.CEL",
"files":[ "embryo_tc_8_5.CEL",
[ "embryo_tc_8_6.CEL",
"DataTrnLbls.txt", "embryo_tc_8_7.CEL",
"DataTrn.txt", "embryo_tc_8_8.CEL",
"DataTst.txt", "embryo_tc_8_9.CEL",
"DataTstLbls.txt", "embryo_tc_8_10.CEL",
"DataVdn.txt", "embryo_tc_8_11.CEL",
"DataVdnLbls.txt" "embryo_tc_8_12.CEL",
] "CG_AffyOligo_Gadfly3_01_13_03",
], "embryo_tc_rma_release2.txt",
"license":null, "embryo_tc_rma_release3.txt",
"citation":"Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593", "na_affy_oligo.dros",
"details":"The three phase oil data used initially for demonstrating the Generative Topographic mapping.", "README.TXT"
"urls":[ ]
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/three_phase_oil_flow/" ],
], "license": null,
"size":712796 "size": 389000000,
}, "urls": [
"robot_wireless":{ "ftp://ftp.fruitfly.org/pub/embryo_tc_array_data/"
"files":[ ]
[ },
"uw-floor.txt" "google_trends": {
] "citation": "",
], "details": "Google trends results.",
"license":null, "files": [
"citation":"WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.", [
"details":"Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.",
"urls":[ ]
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/robot_wireless/" ],
], "license": null,
"size":284390 "size": 0,
}, "urls": [
"xw_pen":{ "http://www.google.com/trends/"
"files":[ ]
[ },
"xw_pen_15.csv"
] "hapmap3": {
], "citation": "Gibbs, Richard A., et al. 'The international HapMap project.' Nature 426.6968 (2003): 789-796.",
"license":null, "details": "HapMap Project: Single Nucleotide Polymorphism sequenced in all human populations. \n The HapMap phase three SNP dataset - 1184 samples out of 11 populations.\n See http://www.nature.com/nature/journal/v426/n6968/abs/nature02168.html for details.\n\n SNP_matrix (A) encoding [see Paschou et all. 2007 (PCA-Correlated SNPs...)]:\n Let (B1,B2) be the alphabetically sorted bases, which occur in the j-th SNP, then\n\n / 1, iff SNPij==(B1,B1)\n Aij = | 0, iff SNPij==(B1,B2)\n \\\\ -1, iff SNPij==(B2,B2)\n\n The SNP data and the meta information (such as iid, sex and phenotype) are\n stored in the dataframe datadf, index is the Individual ID, \n with following columns for metainfo:\n\n * family_id -> Family ID\n * paternal_id -> Paternal ID\n * maternal_id -> Maternal ID\n * sex -> Sex (1=male; 2=female; other=unknown)\n * phenotype -> Phenotype (-9, or 0 for unknown)\n * population -> Population string (e.g. 'ASW' - 'YRI')\n * rest are SNP rs (ids)\n\n More information is given in infodf:\n\n * Chromosome:\n - autosomal chromosemes -> 1-22\n - X X chromosome -> 23\n - Y Y chromosome -> 24\n - XY Pseudo-autosomal region of X -> 25\n - MT Mitochondrial -> 26\n * Relative Positon (to Chromosome) [base pairs]\n\n ",
"citation":"Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005", "files": [
"details":"Accelerometer pen data used for robust regression by Tipping and Lawrence.", [
"urls":[ "hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2",
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/xw_pen/" "hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2",
], "relationships_w_pops_121708.txt"
"size":3410 ]
}, ],
"swiss_roll":{ "license": "International HapMap Project Public Access License (http://hapmap.ncbi.nlm.nih.gov/cgi-perl/registration#licence)",
"files":[ "size": 3458246739,
[ "urls": [
"swiss_roll_data.mat" "http://hapmap.ncbi.nlm.nih.gov/downloads/genotypes/latest_phaseIII_ncbi_b36/plink_format/"
] ]
], },
"license":null, "isomap_face_data": {
"citation":"A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000", "citation": "A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
"details":"Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.", "details": "Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
"urls":[ "files": [
"http://isomap.stanford.edu/" [
], "face_data.mat"
"size":800256 ]
}, ],
"osu_run1":{ "license": null,
"files":[ "size": 24229368,
[ "urls": [
"run1TXT.ZIP" "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/isomap_face_data/"
], ]
[ },
"connections.txt" "mauna_loa": {
] "citation": "Mauna Loa Data. Dr. Pieter Tans, NOAA/ESRL (www.esrl.noaa.gov/gmd/ccgg/trends/) and Dr. Ralph Keeling, Scripps Institution of Oceanography (scrippsco2.ucsd.edu/).",
], "details": "The 'average' column contains the monthly mean CO2 mole fraction determined from daily averages. The mole fraction of CO2, expressed as parts per million (ppm) is the number of molecules of CO2 in every one million molecules of dried air (water vapor removed). If there are missing days concentrated either early or late in the month, the monthly mean is corrected to the middle of the month using the average seasonal cycle. Missing months are denoted by -99.99. The 'interpolated' column includes average values from the preceding column and interpolated values where data are missing. Interpolated values are computed in two steps. First, we compute for each month the average seasonal cycle in a 7-year window around each monthly value. In this way the seasonal cycle is allowed to change slowly over time. We then determine the 'trend' value for each month by removing the seasonal cycle; this result is shown in the 'trend' column. Trend values are linearly interpolated for missing months. The interpolated monthly mean is then the sum of the average seasonal cycle value and the trend value for the missing month.\n\nNOTE: In general, the data presented for the last year are subject to change, depending on recalibration of the reference gas mixtures used, and other quality control procedures. Occasionally, earlier years may also be changed for the same reasons. Usually these changes are minor.\n\nCO2 expressed as a mole fraction in dry air, micromol/mol, abbreviated as ppm \n\n (-99.99 missing data; -1 no data for daily means in month)",
"license":"Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).", "files": [
"citation":"The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.", [
"details":"Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.", "co2_mm_mlo.txt"
"urls":[ ]
"http://accad.osu.edu/research/mocap/data/", ],
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/" "license": "-------------------------------------------------------------------- USE OF NOAA ESRL DATA\n\n These data are made freely available to the public and the scientific community in the belief that their wide dissemination will lead to greater understanding and new scientific insights. The availability of these data does not constitute publication of the data. NOAA relies on the ethics and integrity of the user to insure that ESRL receives fair credit for their work. If the data are obtained for potential use in a publication or presentation, ESRL should be informed at the outset of the nature of this work. If the ESRL data are essential to the work, or if an important result or conclusion depends on the ESRL data, co-authorship may be appropriate. This should be discussed at an early stage in the work. Manuscripts using the ESRL data should be sent to ESRL for review before they are submitted for publication so we can insure that the quality and limitations of the data are accurately represented.\n\n Contact: Pieter Tans (303 497 6678; pieter.tans@noaa.gov)\n\n RECIPROCITY Use of these data implies an agreement to reciprocate. Laboratories making similar measurements agree to make their own data available to the general public and to the scientific community in an equally complete and easily accessible form. Modelers are encouraged to make available to the community, upon request, their own tools used in the interpretation of the ESRL data, namely well documented model code, transport fields, and additional information necessary for other scientists to repeat the work and to run modified versions. Model availability includes collaborative support for new users of the models.\n --------------------------------------------------------------------\n\n See www.esrl.noaa.gov/gmd/ccgg/trends/ for additional details.",
], "size": 46779,
"size":338103 "urls": [
}, "ftp://aftp.cmdl.noaa.gov/products/trends/co2/"
"creep_rupture":{ ]
"files":[ },
[ "olivetti_faces": {
"creeprupt.tar" "citation": "Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994",
] "details": "Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. ",
], "files": [
"license":null, [
"citation":"Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.", "att_faces.zip"
"details":"Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.", ],
"urls":[ [
"http://www.msm.cam.ac.uk/map/data/tar/" "olivettifaces.mat"
], ]
"size":602797 ],
}, "license": null,
"olivetti_faces":{ "size": 8561331,
"files":[ "urls": [
[ "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/",
"att_faces.zip" "http://www.cs.nyu.edu/~roweis/data/"
], ]
[ },
"olivettifaces.mat" "olivetti_glasses": {
] "citation": "Information recorded in olivetti_faces entry. Should be used from there.",
], "details": "Information recorded in olivetti_faces entry. Should be used from there.",
"license":null, "files": [
"citation":"Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994", [
"details":"Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. ", "has_glasses.np"
"urls":[ ],
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/", [
"http://www.cs.nyu.edu/~roweis/data/" "olivettifaces.mat"
], ]
"size":8561331 ],
}, "license": null,
"olivetti_glasses":{ "size": 4261047,
"files":[ "urls": [
[ "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/",
"has_glasses.np" "http://www.cs.nyu.edu/~roweis/data/"
], ]
[ },
"olivettifaces.mat" "olympic_marathon_men": {
] "citation": null,
], "details": "Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data",
"license":null, "files": [
"citation":"Information recorded in olivetti_faces entry. Should be used from there.", [
"details":"Information recorded in olivetti_faces entry. Should be used from there.", "olympicMarathonTimes.csv"
"urls":[ ]
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/", ],
"http://www.cs.nyu.edu/~roweis/data/" "license": null,
], "size": 584,
"size":4261047 "urls": [
}, "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olympic_marathon_men/"
"della_gatta":{ ]
"files":[ },
[ "osu_accad": {
"DellaGattadata.mat" "citation": "The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.",
] "details": "Motion capture data of different motions from the Open Motion Data Project at Ohio State University.",
], "files": [
"license":null, [
"citation":"Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008", "swagger1TXT.ZIP",
"details":"The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.", "handspring1TXT.ZIP",
"urls":[ "quickwalkTXT.ZIP",
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/della_gatta/" "run1TXT.ZIP",
], "sprintTXT.ZIP",
"size":3729650 "dogwalkTXT.ZIP",
}, "camper_04TXT.ZIP",
"epomeo_gpx":{ "dance_KB3_TXT.ZIP",
"files":[ "per20_TXT.ZIP",
[ "perTWO07_TXT.ZIP",
"endomondo_1.gpx", "perTWO13_TXT.ZIP",
"endomondo_2.gpx", "perTWO14_TXT.ZIP",
"garmin_watch_via_endomondo.gpx", "perTWO15_TXT.ZIP",
"viewranger_phone.gpx", "perTWO16_TXT.ZIP"
"viewranger_tablet.gpx" ],
] [
], "connections.txt"
"license":null, ]
"citation":"", ],
"details":"Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).", "license": "Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).",
"urls":[ "size": 15922790,
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/epomeo_gpx/" "urls": [
], "http://accad.osu.edu/research/mocap/data/",
"size":2031872 "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/"
}, ]
"mauna_loa":{ },
"files":[ "osu_run1": {
[ "citation": "The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.",
"co2_mm_mlo.txt" "details": "Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.",
] "files": [
], [
"license":"-------------------------------------------------------------------- USE OF NOAA ESRL DATA\n\n These data are made freely available to the public and the scientific community in the belief that their wide dissemination will lead to greater understanding and new scientific insights. The availability of these data does not constitute publication of the data. NOAA relies on the ethics and integrity of the user to insure that ESRL receives fair credit for their work. If the data are obtained for potential use in a publication or presentation, ESRL should be informed at the outset of the nature of this work. If the ESRL data are essential to the work, or if an important result or conclusion depends on the ESRL data, co-authorship may be appropriate. This should be discussed at an early stage in the work. Manuscripts using the ESRL data should be sent to ESRL for review before they are submitted for publication so we can insure that the quality and limitations of the data are accurately represented.\n\n Contact: Pieter Tans (303 497 6678; pieter.tans@noaa.gov)\n\n RECIPROCITY Use of these data implies an agreement to reciprocate. Laboratories making similar measurements agree to make their own data available to the general public and to the scientific community in an equally complete and easily accessible form. Modelers are encouraged to make available to the community, upon request, their own tools used in the interpretation of the ESRL data, namely well documented model code, transport fields, and additional information necessary for other scientists to repeat the work and to run modified versions. Model availability includes collaborative support for new users of the models.\n --------------------------------------------------------------------\n\n See www.esrl.noaa.gov/gmd/ccgg/trends/ for additional details.", "run1TXT.ZIP"
"citation":"Mauna Loa Data. Dr. Pieter Tans, NOAA/ESRL (www.esrl.noaa.gov/gmd/ccgg/trends/) and Dr. Ralph Keeling, Scripps Institution of Oceanography (scrippsco2.ucsd.edu/).", ],
"details":"The 'average' column contains the monthly mean CO2 mole fraction determined from daily averages. The mole fraction of CO2, expressed as parts per million (ppm) is the number of molecules of CO2 in every one million molecules of dried air (water vapor removed). If there are missing days concentrated either early or late in the month, the monthly mean is corrected to the middle of the month using the average seasonal cycle. Missing months are denoted by -99.99. The 'interpolated' column includes average values from the preceding column and interpolated values where data are missing. Interpolated values are computed in two steps. First, we compute for each month the average seasonal cycle in a 7-year window around each monthly value. In this way the seasonal cycle is allowed to change slowly over time. We then determine the 'trend' value for each month by removing the seasonal cycle; this result is shown in the 'trend' column. Trend values are linearly interpolated for missing months. The interpolated monthly mean is then the sum of the average seasonal cycle value and the trend value for the missing month.\n\nNOTE: In general, the data presented for the last year are subject to change, depending on recalibration of the reference gas mixtures used, and other quality control procedures. Occasionally, earlier years may also be changed for the same reasons. Usually these changes are minor.\n\nCO2 expressed as a mole fraction in dry air, micromol/mol, abbreviated as ppm \n\n (-99.99 missing data; -1 no data for daily means in month)", [
"urls":[ "connections.txt"
"ftp://aftp.cmdl.noaa.gov/products/trends/co2/" ]
], ],
"size":46779 "license": "Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).",
}, "size": 338103,
"boxjenkins_airline":{ "urls": [
"files":[ "http://accad.osu.edu/research/mocap/data/",
[ "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/"
"boxjenkins_airline.csv" ]
] },
], "pumadyn-32nm": {
"license":"You may copy and redistribute the data. You may make derivative works from the data. You may use the data for commercial purposes. You may not sublicence the data when redistributing it. You may not redistribute the data under a different license. Source attribution on any use of this data: Must refer source.", "citation": "Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.",
"citation":"Box & Jenkins (1976), in file: data/airpass, Description: International airline passengers: monthly totals in thousands. Jan 49 Dec 60", "details": "Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.",
"details":"International airline passengers, monthly totals from January 1949 to December 1960.", "files": [
"urls":[ [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/boxjenkins_airline/" "pumadyn-32nm.tar.gz"
], ]
"size":46779 ],
}, "license": "Data is made available by the Delve system at the University of Toronto",
"size": 5861646,
"decampos_characters":{ "urls": [
"files":[ "ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/"
[ ]
"characters.npy", },
"digits.npy" "ripley_prnn_data": {
] "citation": "Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7",
], "details": "Data sets from Brian Ripley's Pattern Recognition and Neural Networks",
"license":null, "files": [
"citation":"T. de Campos, B. R. Babu, and M. Varma. Character recognition in natural images. VISAPP 2009.", [
"details":"Examples of hand written digits taken from the de Campos et al paper on Character Recognition in Natural Images.", "Cushings.dat",
"urls":[ "README",
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/decampos_digits/" "crabs.dat",
], "fglass.dat",
"size":2031872 "fglass.grp",
} "pima.te",
"pima.tr",
"pima.tr2",
"synth.te",
"synth.tr",
"viruses.dat",
"virus3.dat"
]
],
"license": null,
"size": 93565,
"urls": [
"http://www.stats.ox.ac.uk/pub/PRNN/"
]
},
"robot_wireless": {
"citation": "WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.",
"details": "Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.",
"files": [
[
"uw-floor.txt"
]
],
"license": null,
"size": 284390,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/robot_wireless/"
]
},
"rogers_girolami_data": {
"citation": "A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146",
"details": "Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.",
"files": [
[
"firstcoursemldata.tar.gz"
]
],
"license": null,
"size": 21949154,
"suffices": [
[
"?dl=1"
]
],
"urls": [
"https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/"
]
},
"singlecell": {
"citation": "Guoji Guo, Mikael Huss, Guo Qing Tong, Chaoyang Wang, Li Li Sun, Neil D. Clarke, Paul Robson, Resolution of Cell Fate Decisions Revealed by Single-Cell Gene Expression Analysis from Zygote to Blastocyst, Developmental Cell, Volume 18, Issue 4, 20 April 2010, Pages 675-685, ISSN 1534-5807, http://dx.doi.org/10.1016/j.devcel.2010.02.012. (http://www.sciencedirect.com/science/article/pii/S1534580710001103) Keywords: DEVBIO",
"details": "qPCR TaqMan array single cell experiment in mouse. The data is taken from the early stages of development when the Blastocyst is forming. At the 32 cell stage the data is already separated into the trophectoderm (TE) which goes onto form the placenta and the inner cellular mass (ICM). The ICM further differentiates into the epiblast (EPI)---which gives rise to the endoderm, mesoderm and ectoderm---and the primitive endoderm (PE) which develops into the amniotic sack. Guo et al selected 48 genes for expression measurement. They labelled the resulting cells and their labels are included as an aide to visualization.",
"files": [
[
"singlecell.csv"
]
],
"license": "ScienceDirect: http://www.elsevier.com/locate/termsandconditions?utm_source=sciencedirect&utm_medium=link&utm_campaign=terms",
"size": 233.1,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/"
]
},
"sod1_mouse": {
"citation": "Transcriptomic indices of fast and slow disease progression in two mouse models of amyotrophic lateral sclerosis' Nardo G1, Iennaco R, Fusi N, Heath PR, Marino M, Trolese MC, Ferraiuolo L, Lawrence N, Shaw PJ, Bendotti C Brain. 2013 Nov;136(Pt 11):3305-32. doi: 10.1093/brain/awt250. Epub 2013 Sep 24.",
"details": "Gene expression data from two separate strains of mice: C57 and 129Sv in wild type and SOD1 mutant strains.",
"files": [
[
"sod1_C57_129_exprs.csv",
"sod1_C57_129_se.csv"
]
],
"license": null,
"size": 0,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/sod1_mouse/"
]
},
"swiss_roll": {
"citation": "A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
"details": "Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
"files": [
[
"swiss_roll_data.mat"
]
],
"license": null,
"size": 800256,
"urls": [
"http://isomap.stanford.edu/"
]
},
"three_phase_oil_flow": {
"citation": "Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593",
"details": "The three phase oil data used initially for demonstrating the Generative Topographic mapping.",
"files": [
[
"DataTrnLbls.txt",
"DataTrn.txt",
"DataTst.txt",
"DataTstLbls.txt",
"DataVdn.txt",
"DataVdnLbls.txt"
]
],
"license": null,
"size": 712796,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/three_phase_oil_flow/"
]
},
"xw_pen": {
"citation": "Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005",
"details": "Accelerometer pen data used for robust regression by Tipping and Lawrence.",
"files": [
[
"xw_pen_15.csv"
]
],
"license": null,
"size": 3410,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/xw_pen/"
]
}
} }

View file

@ -12,6 +12,8 @@ import datetime
import json import json
import re import re
from config import *
ipython_available=True ipython_available=True
try: try:
import IPython import IPython
@ -29,7 +31,8 @@ def reporthook(a,b,c):
sys.stdout.flush() sys.stdout.flush()
# Global variables # Global variables
data_path = os.path.join(os.path.dirname(__file__), 'datasets') data_path = os.path.expandvars(config.get('datasets', 'dir'))
#data_path = os.path.join(os.path.dirname(__file__), 'datasets')
default_seed = 10000 default_seed = 10000
overide_manual_authorize=False overide_manual_authorize=False
neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/' neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
@ -108,7 +111,11 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix
raise ValueError('Tried url ' + url + suffix + ' and received server error ' + str(response.code)) raise ValueError('Tried url ' + url + suffix + ' and received server error ' + str(response.code))
with open(save_name, 'wb') as f: with open(save_name, 'wb') as f:
meta = response.info() meta = response.info()
file_size = int(meta.getheaders("Content-Length")[0]) content_length_str = meta.getheaders("Content-Length")
if content_length_str:
file_size = int(content_length_str[0])
else:
file_size = None
status = "" status = ""
file_size_dl = 0 file_size_dl = 0
block_sz = 8192 block_sz = 8192
@ -120,9 +127,15 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix
file_size_dl += len(buff) file_size_dl += len(buff)
f.write(buff) f.write(buff)
sys.stdout.write(" "*(len(status)) + "\r") sys.stdout.write(" "*(len(status)) + "\r")
status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1.*1e6), if file_size:
full=file_size/(1.*1e6), ll=line_length, status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1048576.),
full=file_size/(1048576.), ll=line_length,
perc="="*int(line_length*float(file_size_dl)/file_size)) perc="="*int(line_length*float(file_size_dl)/file_size))
else:
status = r"[{perc: <{ll}}] {dl:7.3f}MB".format(dl=file_size_dl/(1048576.),
ll=line_length,
perc="."*int(line_length*float(file_size_dl/(10*1048576.))))
sys.stdout.write(status) sys.stdout.write(status)
sys.stdout.flush() sys.stdout.flush()
sys.stdout.write(" "*(len(status)) + "\r") sys.stdout.write(" "*(len(status)) + "\r")
@ -350,6 +363,34 @@ def football_data(season='1314', data_set='football_data'):
Y = table[:, 4:] Y = table[:, 4:]
return data_details_return({'X': X, 'Y': Y}, data_set) return data_details_return({'X': X, 'Y': Y}, data_set)
def sod1_mouse(data_set='sod1_mouse'):
if not data_available(data_set):
download_data(data_set)
from pandas import read_csv
dirpath = os.path.join(data_path, data_set)
filename = os.path.join(dirpath, 'sod1_C57_129_exprs.csv')
Y = read_csv(filename, header=0, index_col=0)
num_repeats=4
num_time=4
num_cond=4
X = 1
return data_details_return({'X': X, 'Y': Y}, data_set)
def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None):
if not data_available(data_set):
download_data(data_set)
from pandas import read_csv
dirpath = os.path.join(data_path, data_set)
filename = os.path.join(dirpath, 'tomancak_exprs.csv')
Y = read_csv(filename, header=0, index_col=0).T
num_repeats = 3
num_time = 12
xt = np.linspace(0, num_time-1, num_time)
xr = np.linspace(0, num_repeats-1, num_repeats)
xtime, xrepeat = np.meshgrid(xt, xr)
X = np.vstack((xtime.flatten(), xrepeat.flatten())).T
return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set)
# This will be for downloading google trends data. # This will be for downloading google trends data.
def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'): def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'):
"""Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations.""" """Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations."""
@ -717,7 +758,22 @@ def hapmap3(data_set='hapmap3'):
inandf=inandf, inandf=inandf,
populations=populations) populations=populations)
return hapmap return hapmap
def singlecell(data_set='singlecell'):
if not data_available(data_set):
download_data(data_set)
from pandas import read_csv
dirpath = os.path.join(data_path, data_set)
filename = os.path.join(dirpath, 'singlecell.csv')
Y = read_csv(filename, header=0, index_col=0)
genes = Y.columns
labels = Y.index
# data = np.loadtxt(os.path.join(dirpath, 'singlecell.csv'), delimiter=",", dtype=str)
return data_details_return({'Y': Y, 'info' : "qPCR singlecell experiment in Mouse, measuring 48 gene expressions in 1-64 cell states. The labels have been created as in Guo et al. [2010]",
'genes': genes, 'labels':labels,
}, data_set)
def swiss_roll_1000(): def swiss_roll_1000():
return swiss_roll(num_samples=1000) return swiss_roll(num_samples=1000)

View file

@ -1,168 +0,0 @@
import json
neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
sam_url = 'http://www.cs.nyu.edu/~roweis/data/'
cmu_url = 'http://mocap.cs.cmu.edu/subjects/'
data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
'files' : [['ankurDataPoseSilhouette.mat']],
'license' : None,
'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""",
'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""},
'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'],
'files' : [['Index', 'housing.data', 'housing.names']],
'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""",
'details' : """The Boston Housing data relates house values in Boston to a range of input variables.""",
'license' : None,
'size' : 51276
},
'brendan_faces' : {'urls' : [sam_url],
'files': [['frey_rawface.mat']],
'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.',
'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""",
'license': None,
'size' : 1100584},
'cmu_mocap_full' : {'urls' : ['http://mocap.cs.cmu.edu'],
'files' : [['allasfamc.zip']],
'citation' : """Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.'
'The database was created with funding from NSF EIA-0196217.""",
'details' : """CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.""",
'license' : """From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.""",
'size' : None},
'creep_rupture' : {'urls' : ['http://www.msm.cam.ac.uk/map/data/tar/'],
'files' : [['creeprupt.tar']],
'citation' : 'Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.',
'details' : """Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.""",
'license' : None,
'size' : 602797},
'della_gatta' : {'urls' : [neil_url + 'della_gatta/'],
'files': [['DellaGattadata.mat']],
'citation' : 'Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008',
'details': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
'license':None,
'size':3729650},
'epomeo_gpx' : {'urls' : [neil_url + 'epomeo_gpx/'],
'files': [['endomondo_1.gpx', 'endomondo_2.gpx', 'garmin_watch_via_endomondo.gpx','viewranger_phone.gpx','viewranger_tablet.gpx']],
'citation' : '',
'details': "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
'license':None,
'size': 2031872},
'three_phase_oil_flow': {'urls' : [neil_url + 'three_phase_oil_flow/'],
'files' : [['DataTrnLbls.txt', 'DataTrn.txt', 'DataTst.txt', 'DataTstLbls.txt', 'DataVdn.txt', 'DataVdnLbls.txt']],
'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593',
'details' : """The three phase oil data used initially for demonstrating the Generative Topographic mapping.""",
'license' : None,
'size' : 712796},
'rogers_girolami_data' : {'urls' : ['https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/'],
'files' : [['firstcoursemldata.tar.gz']],
'suffices' : [['?dl=1']],
'citation' : 'A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146',
'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""",
'license' : None,
'size' : 21949154},
'olivetti_faces' : {'urls' : [neil_url + 'olivetti_faces/', sam_url],
'files' : [['att_faces.zip'], ['olivettifaces.mat']],
'citation' : 'Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994',
'details' : """Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. """,
'license': None,
'size' : 8561331},
'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'],
'files' : [['olympicMarathonTimes.csv']],
'citation' : None,
'details' : """Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data""",
'license': None,
'size' : 584},
'osu_run1' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'],
'files': [['run1TXT.ZIP'],['connections.txt']],
'details' : "Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.",
'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.',
'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).',
'size': 338103},
'osu_accad' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'],
'files': [['swagger1TXT.ZIP','handspring1TXT.ZIP','quickwalkTXT.ZIP','run1TXT.ZIP','sprintTXT.ZIP','dogwalkTXT.ZIP','camper_04TXT.ZIP','dance_KB3_TXT.ZIP','per20_TXT.ZIP','perTWO07_TXT.ZIP','perTWO13_TXT.ZIP','perTWO14_TXT.ZIP','perTWO15_TXT.ZIP','perTWO16_TXT.ZIP'],['connections.txt']],
'details' : "Motion capture data of different motions from the Open Motion Data Project at Ohio State University.",
'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.',
'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).',
'size': 15922790},
'pumadyn-32nm' : {'urls' : ['ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/'],
'files' : [['pumadyn-32nm.tar.gz']],
'details' : """Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.""",
'citation' : """Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.""",
'license' : """Data is made available by the Delve system at the University of Toronto""",
'size' : 5861646},
'robot_wireless' : {'urls' : [neil_url + 'robot_wireless/'],
'files' : [['uw-floor.txt']],
'citation' : """WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.""",
'details' : """Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.""",
'license' : None,
'size' : 284390},
'swiss_roll' : {'urls' : ['http://isomap.stanford.edu/'],
'files' : [['swiss_roll_data.mat']],
'details' : """Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""",
'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
'license' : None,
'size' : 800256},
'ripley_prnn_data' : {'urls' : ['http://www.stats.ox.ac.uk/pub/PRNN/'],
'files' : [['Cushings.dat', 'README', 'crabs.dat', 'fglass.dat', 'fglass.grp', 'pima.te', 'pima.tr', 'pima.tr2', 'synth.te', 'synth.tr', 'viruses.dat', 'virus3.dat']],
'details' : """Data sets from Brian Ripley's Pattern Recognition and Neural Networks""",
'citation': """Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7""",
'license' : None,
'size' : 93565},
'isomap_face_data' : {'urls' : [neil_url + 'isomap_face_data/'],
'files' : [['face_data.mat']],
'details' : """Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""",
'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
'license' : None,
'size' : 24229368},
'xw_pen' : {'urls' : [neil_url + 'xw_pen/'],
'files' : [['xw_pen_15.csv']],
'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""",
'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005',
'license' : None,
'size' : 3410},
'hapmap3' : {'urls' : ['http://hapmap.ncbi.nlm.nih.gov/downloads/genotypes/latest_phaseIII_ncbi_b36/plink_format/'],
'files' : [['hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2', 'hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2', 'relationships_w_pops_121708.txt']],
'details' : """
HapMap Project: Single Nucleotide Polymorphism sequenced in all human populations.
The HapMap phase three SNP dataset - 1184 samples out of 11 populations.
See http://www.nature.com/nature/journal/v426/n6968/abs/nature02168.html for details.
SNP_matrix (A) encoding [see Paschou et all. 2007 (PCA-Correlated SNPs...)]:
Let (B1,B2) be the alphabetically sorted bases, which occur in the j-th SNP, then
/ 1, iff SNPij==(B1,B1)
Aij = | 0, iff SNPij==(B1,B2)
\ -1, iff SNPij==(B2,B2)
The SNP data and the meta information (such as iid, sex and phenotype) are
stored in the dataframe datadf, index is the Individual ID,
with following columns for metainfo:
* family_id -> Family ID
* paternal_id -> Paternal ID
* maternal_id -> Maternal ID
* sex -> Sex (1=male; 2=female; other=unknown)
* phenotype -> Phenotype (-9, or 0 for unknown)
* population -> Population string (e.g. 'ASW' - 'YRI')
* rest are SNP rs (ids)
More information is given in infodf:
* Chromosome:
- autosomal chromosemes -> 1-22
- X X chromosome -> 23
- Y Y chromosome -> 24
- XY Pseudo-autosomal region of X -> 25
- MT Mitochondrial -> 26
* Relative Positon (to Chromosome) [base pairs]
""",
'citation': """Gibbs, Richard A., et al. "The international HapMap project." Nature 426.6968 (2003): 789-796.""",
'license' : """International HapMap Project Public Access License (http://hapmap.ncbi.nlm.nih.gov/cgi-perl/registration#licence)""",
'size' : 2*1729092237 + 62265},
}
with open('data_resources.json', 'w') as f:
print "writing data_resources"
json.dump(data_resources, f)

View file

@ -13,7 +13,11 @@ def initialize_latent(init, input_dim, Y):
p = pca(Y) p = pca(Y)
PC = p.project(Y, min(input_dim, Y.shape[1])) PC = p.project(Y, min(input_dim, Y.shape[1]))
Xr[:PC.shape[0], :PC.shape[1]] = PC Xr[:PC.shape[0], :PC.shape[1]] = PC
var = p.fracs[:input_dim]
else: else:
var = Xr.var(0) var = Xr.var(0)
return Xr, var/var.max()
return Xr, p.fracs[:input_dim] Xr -= Xr.mean(0)
Xr /= Xr.var(0)
return Xr, var/var.max()

View file

@ -130,14 +130,14 @@ def fast_array_equal(A, B):
""" % pragma_string """ % pragma_string
if config.getboolean('parallel', 'openmp'): if config.getboolean('parallel', 'openmp'):
pragma_string = '#include <omp.h>' header_string = '#include <omp.h>'
else: else:
pragma_string = '' header_string = ''
support_code = """ support_code = """
%s %s
#include <math.h> #include <math.h>
""" % pragma_string """ % header_string
weave_options_openmp = {'headers' : ['<omp.h>'], weave_options_openmp = {'headers' : ['<omp.h>'],

View file

@ -2,15 +2,22 @@ import sys
import numpy as np import numpy as np
import sympy as sym import sympy as sym
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma, polygamma from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma, polygamma
from sympy.matrices import Matrix
######################################## ########################################
## Try to do some matrix functions: problem, you can't do derivatives ## Try to do some matrix functions: problem, you can't do derivatives
## with respect to matrix functions :-( ## with respect to matrix functions :-(
class GPySymMatrix(Matrix):
def __init__(self, indices):
Matrix.__init__(self)
def atoms(self):
return [e2 for e in self for e2 in e.atoms()]
class selector(Function): class selector(Function):
"""A function that returns an element of a Matrix depending on input indices.""" """A function that returns an element of a Matrix depending on input indices."""
nargs = 3 nargs = 3
def fdiff(self, argindex=1):
return selector(*self.args)
@classmethod @classmethod
def eval(cls, X, i, j): def eval(cls, X, i, j):
if i.is_Number and j.is_Number: if i.is_Number and j.is_Number:

View file

@ -10,6 +10,16 @@ A Gaussian processes framework in Python.
Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png) Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png)
Citation
========
@Misc{gpy2014,
author = {The GPy authors},
title = {{GPy}: A Gaussian process framework in python},
howpublished = {\url{http://github.com/SheffieldML/GPy}},
year = {2012--2014}
}
Getting started Getting started
=============== ===============
Installing with pip Installing with pip