merge mu's changes into devel

This commit is contained in:
mu 2014-05-14 11:19:18 +01:00
commit 9171909724
37 changed files with 1177 additions and 878 deletions

View file

@ -72,7 +72,7 @@ class GP(Model):
def log_likelihood(self):
return self._log_marginal_likelihood
def _raw_predict(self, _Xnew, full_cov=False):
def _raw_predict(self, _Xnew, full_cov=False, kern=None):
"""
For making predictions, does not account for normalization or likelihood
@ -87,14 +87,17 @@ class GP(Model):
$$
"""
Kx = self.kern.K(_Xnew, self.X).T
if kern is None:
kern = self.kern
Kx = kern.K(_Xnew, self.X).T
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = self.kern.K(_Xnew)
Kxx = kern.K(_Xnew)
var = Kxx - np.dot(Kx.T, WiKx)
else:
Kxx = self.kern.Kdiag(_Xnew)
Kxx = kern.Kdiag(_Xnew)
var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1)
@ -102,7 +105,7 @@ class GP(Model):
if len(mu.shape)==1: mu = mu[:,None]
return mu, var
def predict(self, Xnew, full_cov=False, Y_metadata=None):
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
"""
Predict the function(s) at the new point(s) Xnew.
@ -111,6 +114,9 @@ class GP(Model):
:param full_cov: whether to return the full covariance matrix, or just
the diagonal
:type full_cov: bool
:param Y_metadata: metadata about the predicting point to pass to the likelihood
:param kern: The kernel to use for prediction (defaults to the model
kern). this is useful for examining e.g. subprocesses.
:returns: mean: posterior mean, a Numpy array, Nnew x self.input_dim
:returns: var: posterior variance, a Numpy array, Nnew x 1 if
full_cov=False, Nnew x Nnew otherwise
@ -123,7 +129,7 @@ class GP(Model):
"""
#predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov)
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
# now push through likelihood
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)

View file

@ -58,7 +58,9 @@ class ObservablesList(object):
def __repr__(self):
return self._poc.__repr__()
def add(self, priority, observable, callble):
if observable is not None:
ins = 0
for pr, _, _ in self:
if priority > pr:
@ -86,7 +88,6 @@ class ObservablesList(object):
def __iter__(self):
self.flush()
for p, o, c in self._poc:
if o() is not None:
yield p, o(), c
def __len__(self):
@ -94,10 +95,11 @@ class ObservablesList(object):
return self._poc.__len__()
def __deepcopy__(self, memo):
self.flush()
s = ObservablesList()
for p,o,c in self:
import copy
s._poc = copy.deepcopy(self._poc, memo)
s.add(p, copy.deepcopy(o, memo), copy.deepcopy(c, memo))
s.flush()
return s
def __getstate__(self):

View file

@ -1,7 +1,7 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
__updated__ = '2014-04-15'
__updated__ = '2014-05-12'
import numpy as np
from parameter_core import Observable, Pickleable
@ -15,10 +15,10 @@ class ObsAr(np.ndarray, Pickleable, Observable):
"""
__array_priority__ = -1 # Never give back ObsAr
def __new__(cls, input_array, *a, **kw):
# allways make a copy of input paramters, as we need it to be in C order:
if not isinstance(input_array, ObsAr):
obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls)
obj = np.atleast_1d(np.require(np.copy(input_array), dtype=np.float64, requirements=['W', 'C'])).view(cls)
else: obj = input_array
#cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing
super(ObsAr, obj).__init__(*a, **kw)
return obj

View file

@ -45,7 +45,6 @@ class Param(OptimizationHandlable, ObsAr):
_parameters_ = []
def __new__(cls, name, input_array, default_constraint=None):
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
cls.__name__ = "Param"
obj._current_slice_ = (slice(obj.shape[0]),)
obj._realshape_ = obj.shape
obj._realsize_ = obj.size
@ -58,9 +57,9 @@ class Param(OptimizationHandlable, ObsAr):
def build_pydot(self,G):
import pydot
node = pydot.Node(id(self), shape='record', label=self.name)
node = pydot.Node(id(self), shape='trapezium', label=self.name)#, fontcolor='white', color='white')
G.add_node(node)
for o in self.observers.keys():
for _, o, _ in self.observers:
label = o.name if hasattr(o, 'name') else str(o)
observed_node = pydot.Node(id(o), label=label)
G.add_node(observed_node)
@ -90,6 +89,13 @@ class Param(OptimizationHandlable, ObsAr):
def param_array(self):
return self
@property
def values(self):
"""
Return self as numpy array view
"""
return self.view(np.ndarray)
@property
def gradient(self):
"""
@ -100,11 +106,11 @@ class Param(OptimizationHandlable, ObsAr):
"""
if getattr(self, '_gradient_array_', None) is None:
self._gradient_array_ = numpy.empty(self._realshape_, dtype=numpy.float64)
return self._gradient_array_[self._current_slice_]
return self._gradient_array_#[self._current_slice_]
@gradient.setter
def gradient(self, val):
self._gradient_array_[self._current_slice_] = val
self._gradient_array_[:] = val
#===========================================================================
# Array operations -> done
@ -112,10 +118,13 @@ class Param(OptimizationHandlable, ObsAr):
def __getitem__(self, s, *args, **kwargs):
if not isinstance(s, tuple):
s = (s,)
if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim:
s += (Ellipsis,)
#if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim:
# s += (Ellipsis,)
new_arr = super(Param, self).__getitem__(s, *args, **kwargs)
try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base
try:
new_arr._current_slice_ = s
new_arr._gradient_array_ = self.gradient[s]
new_arr._original_ = self.base is new_arr.base
except AttributeError: pass # returning 0d array or float, double etc
return new_arr
@ -156,6 +165,34 @@ class Param(OptimizationHandlable, ObsAr):
def _ensure_fixes(self):
if not self._has_fixes(): self._fixes_ = numpy.ones(self._realsize_, dtype=bool)
#===========================================================================
# parameterizable
#===========================================================================
def traverse(self, visit, *args, **kwargs):
"""
Traverse the hierarchy performing visit(self, *args, **kwargs) at every node passed by.
See "visitor pattern" in literature. This is implemented in pre-order fashion.
This will function will just call visit on self, as Param are leaf nodes.
"""
visit(self, *args, **kwargs)
def traverse_parents(self, visit, *args, **kwargs):
"""
Traverse the hierarchy upwards, visiting all parents and their children, except self.
See "visitor pattern" in literature. This is implemented in pre-order fashion.
Example:
parents = []
self.traverse_parents(parents.append)
print parents
"""
if self.has_parent():
self.__visited = True
self._parent_._traverse_parents(visit, *args, **kwargs)
self.__visited = False
#===========================================================================
# Convenience
#===========================================================================
@ -316,8 +353,8 @@ class ParamConcatenation(object):
val = val.values()
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
vals = self.values(); vals[s] = val
[numpy.copyto(p, vals[ps], where=ind[ps])
for p, ps in zip(self.params, self._param_slices_)]
for p, ps in zip(self.params, self._param_slices_):
p.flat[ind[ps]] = vals[ps]
if update:
self.update_all_params()
def values(self):

View file

@ -17,7 +17,7 @@ from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED,
import numpy as np
import re
__updated__ = '2014-04-16'
__updated__ = '2014-05-12'
class HierarchyError(Exception):
"""
@ -124,7 +124,7 @@ class Parentable(object):
"""
Disconnect this object from its parent
"""
raise NotImplementedError, "Abstaract superclass"
raise NotImplementedError, "Abstract superclass"
@property
def _highest_parent_(self):
@ -162,7 +162,6 @@ class Pickleable(object):
:param protocol: pickling protocol to use, python-pickle for details.
"""
import cPickle as pickle
import pickle #TODO: cPickle
if isinstance(f, str):
with open(f, 'w') as f:
pickle.dump(self, f, protocol)
@ -177,19 +176,23 @@ class Pickleable(object):
#raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy"
import copy
memo = {}
memo[id(self._parent_)] = None
memo[id(self.gradient)] = None
memo[id(self.param_array)] = None
memo[id(self._fixes_)] = None
c = copy.deepcopy(self, memo)
# the next part makes sure that we do not include parents in any form:
parents = []
self.traverse_parents(parents.append) # collect parents
for p in parents:
memo[id(p)] = None # set all parents to be None, so they will not be copied
memo[id(self.gradient)] = None # reset the gradient
memo[id(self.param_array)] = None # and param_array
memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
c = copy.deepcopy(self, memo) # and start the copy
c._parent_index_ = None
return c
def __deepcopy__(self, memo):
s = self.__new__(self.__class__)
memo[id(self)] = s
s = self.__new__(self.__class__) # fresh instance
memo[id(self)] = s # be sure to break all cycles --> self is already done
import copy
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
s.__dict__.update(copy.deepcopy(self.__dict__, memo)) # standard copy
return s
def __getstate__(self):
@ -202,9 +205,6 @@ class Pickleable(object):
dc = dict()
for k,v in self.__dict__.iteritems():
if k not in ignore_list:
#if hasattr(v, "__getstate__"):
#dc[k] = v.__getstate__()
#else:
dc[k] = v
return dc
@ -212,12 +212,6 @@ class Pickleable(object):
self.__dict__.update(state)
return self
#def __getstate__(self, memo):
# raise NotImplementedError, "get state must be implemented to be able to pickle objects"
#def __setstate__(self, memo):
# raise NotImplementedError, "set state must be implemented to be able to pickle objects"
class Gradcheckable(Pickleable, Parentable):
"""
Adds the functionality for an object to be gradcheckable.
@ -585,12 +579,6 @@ class OptimizationHandlable(Constrainable):
def __init__(self, name, default_constraint=None, *a, **kw):
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
def transform(self):
[np.put(self.param_array, ind, c.finv(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def untransform(self):
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def _get_params_transformed(self):
# transformed parameters (apply transformation rules)
p = self.param_array.copy()
@ -604,15 +592,15 @@ class OptimizationHandlable(Constrainable):
return p
def _set_params_transformed(self, p):
if p is self.param_array:
p = p.copy()
if not(p is self.param_array):
if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
self.param_array.flat[fixes] = p
elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
else: self.param_array.flat = p
self.untransform()
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
for c, ind in self.constraints.iteritems() if c != __fixed__]
self._trigger_params_changed()
def _trigger_params_changed(self, trigger_parent=True):
@ -626,7 +614,7 @@ class OptimizationHandlable(Constrainable):
def num_params(self):
"""
Return the number of parameters of this parameter_handle.
Param objects will allways return 0.
Param objects will always return 0.
"""
raise NotImplemented, "Abstract, please implement in respective classes"
@ -644,6 +632,7 @@ class OptimizationHandlable(Constrainable):
else: names = [adjust(x.name) for x in self._parameters_]
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
return names
def _get_param_names(self):
n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
return n
@ -710,12 +699,18 @@ class Parameterizable(OptimizationHandlable):
super(Parameterizable, self).__init__(*args, **kwargs)
from GPy.core.parameterization.lists_and_dicts import ArrayList
self._parameters_ = ArrayList()
self._param_array_ = None
self.size = 0
self._added_names_ = set()
self.__visited = False # for traversing in reverse order we need to know if we were here already
@property
def param_array(self):
if not hasattr(self, '_param_array_'):
"""
Array representing the parameters of this class.
There is only one copy of all parameters in memory, two during optimization.
"""
if self._param_array_ is None:
self._param_array_ = np.empty(self.size, dtype=np.float64)
return self._param_array_
@ -723,6 +718,52 @@ class Parameterizable(OptimizationHandlable):
def param_array(self, arr):
self._param_array_ = arr
def traverse(self, visit, *args, **kwargs):
"""
Traverse the hierarchy performing visit(self, *args, **kwargs)
at every node passed by downwards. This function includes self!
See "visitor pattern" in literature. This is implemented in pre-order fashion.
Example:
Collect all children:
children = []
self.traverse(children.append)
print children
"""
if not self.__visited:
visit(self, *args, **kwargs)
self.__visited = True
for c in self._parameters_:
c.traverse(visit, *args, **kwargs)
self.__visited = False
def traverse_parents(self, visit, *args, **kwargs):
"""
Traverse the hierarchy upwards, visiting all parents and their children except self.
See "visitor pattern" in literature. This is implemented in pre-order fashion.
Example:
parents = []
self.traverse_parents(parents.append)
print parents
"""
if self.has_parent():
self.__visited = True
self._parent_._traverse_parents(visit, *args, **kwargs)
self.__visited = False
def _traverse_parents(self, visit, *args, **kwargs):
if not self.__visited:
self.__visited = True
visit(self, *args, **kwargs)
if self.has_parent():
self._parent_._traverse_parents(visit, *args, **kwargs)
self._parent_.traverse(visit, *args, **kwargs)
self.__visited = False
#=========================================================================
# Gradient handling
#=========================================================================
@ -789,11 +830,10 @@ class Parameterizable(OptimizationHandlable):
# raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
elif param not in self._parameters_:
if param.has_parent():
parent = param._parent_
while parent is not None:
def visit(parent, self):
if parent is self:
raise HierarchyError, "You cannot add a parameter twice into the hierarchy"
parent = parent._parent_
param.traverse_parents(visit, self)
param._parent_.remove_parameter(param)
# make sure the size is set
if index is None:
@ -837,7 +877,7 @@ class Parameterizable(OptimizationHandlable):
:param param: param object to remove from being a parameter of this parameterized object.
"""
if not param in self._parameters_:
raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short())
raise RuntimeError, "Parameter {} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name)
start = sum([p.size for p in self._parameters_[:param._parent_index_]])
self._remove_parameter_name(param)

View file

@ -82,15 +82,15 @@ class Parameterized(Parameterizable):
import pydot # @UnresolvedImport
iamroot = False
if G is None:
G = pydot.Dot(graph_type='digraph')
G = pydot.Dot(graph_type='digraph', bgcolor=None)
iamroot=True
node = pydot.Node(id(self), shape='record', label=self.name)
node = pydot.Node(id(self), shape='box', label=self.name)#, color='white')
G.add_node(node)
for child in self._parameters_:
child_node = child.build_pydot(G)
G.add_edge(pydot.Edge(node, child_node))
G.add_edge(pydot.Edge(node, child_node))#, color='white'))
for o in self.observers.keys():
for _, o, _ in self.observers:
label = o.name if hasattr(o, 'name') else str(o)
observed_node = pydot.Node(id(o), label=label)
G.add_node(observed_node)

View file

@ -100,6 +100,9 @@ class VariationalPosterior(Parameterized):
n.__dict__.update(dc)
n._parameters_[dc['mean']._parent_index_] = dc['mean']
n._parameters_[dc['variance']._parent_index_] = dc['variance']
n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size
n.size = n.mean.size + n.variance.size + oversize
n.ndim = n.mean.ndim
n.shape = n.mean.shape
n.num_data = n.mean.shape[0]

View file

@ -79,29 +79,32 @@ class SparseGP(GP):
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
def _raw_predict(self, Xnew, full_cov=False):
def _raw_predict(self, Xnew, full_cov=False, kern=None):
"""
Make a prediction for the latent function values
"""
if kern is None: kern = self.kern
if not isinstance(Xnew, VariationalPosterior):
Kx = self.kern.K(self.Z, Xnew)
Kx = kern.K(self.Z, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = self.kern.K(Xnew)
Kxx = kern.K(Xnew)
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
#var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
var = var.squeeze()
else:
Kxx = self.kern.Kdiag(Xnew)
Kxx = kern.Kdiag(Xnew)
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
else:
Kx = self.kern.psi1(self.Z, Xnew)
Kx = kern.psi1(self.Z, Xnew)
mu = np.dot(Kx, self.posterior.woodbury_vector)
if full_cov:
raise NotImplementedError, "TODO"
else:
Kxx = self.kern.psi0(self.Z, Xnew)
psi2 = self.kern.psi2(self.Z, Xnew)
Kxx = kern.psi0(self.Z, Xnew)
psi2 = kern.psi2(self.Z, Xnew)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var

View file

@ -107,14 +107,14 @@ class Symbolic_core():
# Do symbolic work to compute derivatives.
for key, func in self.expressions.items():
if func['function'].is_Matrix:
rows = func['function'].shape[0]
cols = func['function'].shape[1]
self.expressions[key]['derivative'] = sym.zeros(rows, cols)
for i in xrange(rows):
for j in xrange(cols):
self.expressions[key]['derivative'][i, j] = extract_derivative(func['function'][i, j], derivative_arguments)
else:
# if func['function'].is_Matrix:
# rows = func['function'].shape[0]
# cols = func['function'].shape[1]
# self.expressions[key]['derivative'] = sym.zeros(rows, cols)
# for i in xrange(rows):
# for j in xrange(cols):
# self.expressions[key]['derivative'][i, j] = extract_derivative(func['function'][i, j], derivative_arguments)
# else:
self.expressions[key]['derivative'] = extract_derivative(func['function'], derivative_arguments)
def _set_parameters(self, parameters):

View file

@ -161,6 +161,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
import GPy
from matplotlib import pyplot as plt
from ..util.misc import param_to_array
import numpy as np
_np.random.seed(0)
data = GPy.util.datasets.oil()
@ -174,11 +175,10 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)
if plot:
y = m.Y
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
m.plot_latent(ax=latent_axes, labels=m.data_labels)
data_show = GPy.plotting.matplot_dep.visualize.vector_show(y)
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean), # @UnusedVariable
data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
raw_input('Press enter to finish')
plt.close(fig)
@ -408,13 +408,13 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
data = GPy.util.datasets.osu_run1()
# optimize
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
if optimize: m.optimize('bfgs', messages=verbose, max_f_eval=10000)
if plot:
plt.clf
ax = m.plot_latent()
y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
vis = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, latent_axes=ax)
vis = GPy.plotting.matplot_dep.visualize.lvm(m.X[:1, :].copy(), m, data_show, latent_axes=ax)
raw_input('Press enter to finish')
return m
@ -475,24 +475,28 @@ def robot_wireless(optimize=True, verbose=True, plot=True):
def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
from GPy.models import BayesianGPLVM
from matplotlib import pyplot as plt
import numpy as np
import GPy
data = GPy.util.datasets.osu_run1()
Q = 6
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2))
kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True)
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
m.data = data
m.likelihood.variance = 0.001
# optimize
m.ensure_default_constraints()
if optimize: m.optimize('scg', messages=verbose, max_iters=200, xtol=1e-300, ftol=1e-300)
m._set_params(m._get_params())
if optimize: m.optimize('bfgs', messages=verbose, max_iters=800, xtol=1e-300, ftol=1e-300)
if plot:
plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2)
plt.sca(latent_axes)
m.plot_latent()
y = m.likelihood.Y[0, :].copy()
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
raw_input('Press enter to finish')
m.plot_latent(ax=latent_axes)
y = m.Y[:1, :].copy()
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect'])
GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
plt.draw()
#raw_input('Press enter to finish')
return m
@ -509,7 +513,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
if plot:
ax = m.plot_latent()
y = m.likelihood.Y[0, :]
y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel'])
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')

View file

@ -6,6 +6,10 @@
# some platforms, hence this option.
openmp=False
[datasets]
# location for the local data cache
dir=$HOME/tmp/GPy-datasets/
[anaconda]
# if you have an anaconda python installation please specify it here.
installed = False

View file

@ -32,7 +32,7 @@ def print_out(len_maxiters, fnow, current_grad, beta, iteration):
sys.stdout.flush()
def exponents(fnow, current_grad):
exps = [np.abs(fnow), current_grad]
exps = [np.abs(np.float(fnow)), current_grad]
return np.sign(exps) * np.log10(exps).astype(int)
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, xtol=None, ftol=None, gtol=None):

View file

@ -3,7 +3,7 @@ from _src.rbf import RBF
from _src.linear import Linear, LinearFull
from _src.static import Bias, White
from _src.brownian import Brownian
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from _src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from _src.mlp import MLP
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
from _src.independent_outputs import IndependentOutputs, Hierarchical
@ -13,6 +13,8 @@ from _src.ODE_UY import ODE_UY
from _src.ODE_UYC import ODE_UYC
from _src.ODE_st import ODE_st
from _src.ODE_t import ODE_t
from _src.poly import Poly
# TODO: put this in an init file somewhere
#I'm commenting this out because the files were not added. JH. Remember to add the files before commiting
try:

View file

@ -170,7 +170,4 @@ class Add(CombinationKernel):
return self
def input_sensitivity(self):
in_sen = np.zeros(self.input_dim)
for i, p in enumerate(self.parts):
in_sen[p.active_dims] += p.input_sensitivity()
return in_sen
return reduce(np.add, [k.input_sensitivity() for k in self.parts])

View file

@ -32,7 +32,7 @@ def index_to_slices(index):
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret
class IndependentOutputs(Kern):
class IndependentOutputs(CombinationKernel):
"""
A kernel which can represent several independent functions. this kernel
'switches off' parts of the matrix where the output indexes are different.
@ -180,6 +180,9 @@ class Hierarchical(CombinationKernel):
def Kdiag(self,X):
return np.diag(self.K(X))
def gradients_X(self, dL_dK, X, X2=None):
raise NotImplementedError
def update_gradients_full(self,dL_dK,X,X2=None):
slices = [index_to_slices(X[:,i]) for i in self.extra_dims]
if X2 is None:

View file

@ -34,36 +34,24 @@ class Kern(Parameterized):
is the active_dimensions of inputs X we will work on.
All kernels will get sliced Xes as inputs, if active_dims is not None
Only positive integers are allowed in active_dims!
if active_dims is None, slicing is switched off and all X will be passed through as given.
:param int input_dim: the number of input dimensions to the function
:param array-like|slice|None active_dims: list of indices on which dimensions this kernel works on, or none if no slicing
:param array-like|None active_dims: list of indices on which dimensions this kernel works on, or none if no slicing
Do not instantiate.
"""
super(Kern, self).__init__(name=name, *a, **kw)
try:
self.input_dim = int(input_dim)
self.active_dims = active_dims# if active_dims is not None else slice(0, input_dim, 1)
except TypeError:
# input_dim is something else then an integer
self.input_dim = input_dim
if active_dims is not None:
print "WARNING: given input_dim={} is not an integer and active_dims={} is given, switching off slicing"
self.active_dims = None
if self.active_dims is not None and self.input_dim is not None:
assert isinstance(self.active_dims, (slice, list, tuple, np.ndarray)), 'active_dims needs to be an array-like or slice object over dimensions, {} given'.format(self.active_dims.__class__)
if isinstance(self.active_dims, slice):
self.active_dims = slice(self.active_dims.start or 0, self.active_dims.stop or self.input_dim, self.active_dims.step or 1)
active_dim_size = int(np.round((self.active_dims.stop-self.active_dims.start)/self.active_dims.step))
elif isinstance(self.active_dims, np.ndarray):
#assert np.all(self.active_dims >= 0), 'active dimensions need to be positive. negative indexing is not allowed'
assert self.active_dims.ndim == 1, 'only flat indices allowed, given active_dims.shape={}, provide only indexes to the dimensions (columns) of the input'.format(self.active_dims.shape)
active_dim_size = self.active_dims.size
else:
active_dim_size = len(self.active_dims)
assert active_dim_size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, active_dim_size, self.active_dims)
if active_dims is None:
active_dims = np.arange(input_dim)
self.active_dims = np.array(active_dims, dtype=int)
assert self.active_dims.size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, self.active_dims.size, self.active_dims)
self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU
@ -176,8 +164,8 @@ class Kern(Parameterized):
"""
Shortcut for tensor `prod`.
"""
assert self.active_dims == range(self.input_dim), "Can only use kernels, which have their input_dims defined from 0"
assert other.active_dims == range(other.input_dim), "Can only use kernels, which have their input_dims defined from 0"
assert np.all(self.active_dims == range(self.input_dim)), "Can only use kernels, which have their input_dims defined from 0"
assert np.all(other.active_dims == range(other.input_dim)), "Can only use kernels, which have their input_dims defined from 0"
other.active_dims += self.input_dim
return self.prod(other)
@ -202,10 +190,10 @@ class Kern(Parameterized):
return Prod([self, other], name)
def _check_input_dim(self, X):
assert X.shape[1] == self.input_dim, "You did not specify active_dims and X has wrong shape: X_dim={}, whereas input_dim={}".format(X.shape[1], self.input_dim)
assert X.shape[1] == self.input_dim, "{} did not specify active_dims and X has wrong shape: X_dim={}, whereas input_dim={}".format(self.name, X.shape[1], self.input_dim)
def _check_active_dims(self, X):
assert X.shape[1] >= len(np.r_[self.active_dims]), "At least {} dimensional X needed, X.shape={!s}".format(len(np.r_[self.active_dims]), X.shape)
assert X.shape[1] >= len(self.active_dims), "At least {} dimensional X needed, X.shape={!s}".format(len(self.active_dims), X.shape)
class CombinationKernel(Kern):
@ -222,9 +210,10 @@ class CombinationKernel(Kern):
:param list kernels: List of kernels to combine (can be only one element)
:param str name: name of the combination kernel
:param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on
:param array-like extra_dims: if needed extra dimensions for the combination kernel to work on
"""
assert all([isinstance(k, Kern) for k in kernels])
extra_dims = np.array(extra_dims, dtype=int)
input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims)
# initialize the kernel with the full input_dim
super(CombinationKernel, self).__init__(input_dim, active_dims, name)
@ -238,16 +227,18 @@ class CombinationKernel(Kern):
def get_input_dim_active_dims(self, kernels, extra_dims = None):
#active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
#active_dims = np.array(np.concatenate((active_dims, extra_dims if extra_dims is not None else [])), dtype=int)
input_dim = np.array([k.input_dim for k in kernels])
if np.all(input_dim[0]==input_dim):
input_dim = input_dim[0]
active_dims = None
input_dim = reduce(max, (k.active_dims.max() for k in kernels)) + 1
if extra_dims is not None:
input_dim += extra_dims.size
active_dims = np.arange(input_dim)
return input_dim, active_dims
def input_sensitivity(self):
raise NotImplementedError("Choose the kernel you want to get the sensitivity for. You need to override the default behaviour for getting the input sensitivity to be able to get the input sensitivity. For sum kernel it is the sum of all sensitivities, TODO: product kernel? Other kernels?, also TODO: shall we return all the sensitivities here in the combination kernel? So we can combine them however we want? This could lead to just plot all the sensitivities here...")
def _check_input_dim(self, X):
def _check_active_dims(self, X):
return
def _check_input_dim(self, X):

View file

@ -12,6 +12,7 @@ from ...core.parameterization.transformations import Logexp
from ...util.caching import Cache_this
from ...core.parameterization import variational
from psi_comp import linear_psi_comp
from ...util.config import *
class Linear(Kern):
"""
@ -224,12 +225,23 @@ class Linear(Kern):
AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
AZZA = AZZA + AZZA.swapaxes(1, 2)
AZZA_2 = AZZA/2.
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(m,mm,q,qq,factor,tmp)'
header_string = '#include <omp.h>'
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'],
'extra_link_args' : ['-lgomp'],
'libraries': ['gomp']}
else:
pragma_string = ''
header_string = ''
weave_options = {'extra_compile_args': ['-O3']}
#Using weave, we can exploit the symmetry of this problem:
code = """
int n, m, mm,q,qq;
double factor,tmp;
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
%s
for(n=0;n<N;n++){
for(m=0;m<num_inducing;m++){
for(mm=0;mm<=m;mm++){
@ -253,26 +265,36 @@ class Linear(Kern):
}
}
}
"""
""" % pragma_string
support_code = """
#include <omp.h>
%s
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
""" % header_string
mu = vp.mean
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'],
weave.inline(code, support_code=support_code,
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
def _weave_dpsi2_dZ(self, dL_dpsi2, Z, vp, target):
AZA = self.variances*self._ZAinner(vp, Z)
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(n,mm,q)'
header_string = '#include <omp.h>'
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'],
'extra_link_args' : ['-lgomp'],
'libraries': ['gomp']}
else:
pragma_string = ''
header_string = ''
weave_options = {'extra_compile_args': ['-O3']}
code="""
int n,m,mm,q;
#pragma omp parallel for private(n,mm,q)
%s
for(m=0;m<num_inducing;m++){
for(q=0;q<input_dim;q++){
for(mm=0;mm<num_inducing;mm++){
@ -282,18 +304,15 @@ class Linear(Kern):
}
}
}
"""
""" % pragma_string
support_code = """
#include <omp.h>
%s
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
""" % header_string
N,num_inducing,input_dim = vp.mean.shape[0],Z.shape[0],vp.mean.shape[1]
mu = param_to_array(vp.mean)
weave.inline(code, support_code=support_code, libraries=['gomp'],
weave.inline(code, support_code=support_code,
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)

42
GPy/kern/_src/poly.py Normal file
View file

@ -0,0 +1,42 @@
# Copyright (c) 2014, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kern import Kern
from ...util.misc import param_to_array
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Poly(Kern):
"""
Polynomial kernel
"""
def __init__(self, input_dim, variance=1., order=3., active_dims=None, name='poly'):
super(Poly, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.add_parameter(self.variance)
self.order=order
def K(self, X, X2=None):
return (self._dot_product(X, X2) + 1.)**self.order * self.variance
def _dot_product(self, X, X2=None):
if X2 is None:
return np.dot(X, X.T)
else:
return np.dot(X, X2.T)
def Kdiag(self, X):
return self.variance*(np.square(X).sum(1) + 1.)**self.order
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.sum(dL_dK * (self._dot_product(X, X2) + 1.)**self.order)
def update_gradients_diag(self, dL_dKdiag, X):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2=None):
raise NotImplementedError
def gradients_X_diag(self, dL_dKdiag, X):
raise NotImplementedError

View file

@ -10,6 +10,7 @@ from GPy.util.caching import Cache_this
from ...core.parameterization import variational
from psi_comp import ssrbf_psi_comp
from psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF
from ...util.config import *
class RBF(Stationary):
"""
@ -231,6 +232,16 @@ class RBF(Stationary):
@Cache_this(limit=1)
def _psi2computations(self, Z, vp):
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(tmp, exponent_tmp)'
header_string = '#include <omp.h>'
libraries = ['gomp']
else:
pragma_string = ''
header_string = ''
libraries = []
mu, S = vp.mean, vp.variance
N, Q = mu.shape
@ -253,8 +264,7 @@ class RBF(Stationary):
variance_sq = float(np.square(self.variance))
code = """
double tmp, exponent_tmp;
#pragma omp parallel for private(tmp, exponent_tmp)
%s
for (int n=0; n<N; n++)
{
for (int m=0; m<M; m++)
@ -278,20 +288,20 @@ class RBF(Stationary):
tmp = -Zdist_sq(m,mm,q) - tmp - half_log_denom(n,q);
exponent_tmp += tmp;
}
//compute psi2 by exponontiating
//compute psi2 by exponentiating
psi2(n,m,mm) = variance_sq * exp(exponent_tmp);
psi2(n,mm,m) = psi2(n,m,mm);
}
}
}
"""
""" % pragma_string
support_code = """
#include <omp.h>
%s
#include <math.h>
"""
""" % header_string
mu = param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'],
weave.inline(code, support_code=support_code, libraries=libraries,
arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)
@ -303,12 +313,20 @@ class RBF(Stationary):
#return 2.*np.einsum( 'ijk,ijk,ijkl,il->l', dL_dpsi2, psi2, Zdist_sq * (2.*S[:,None,None,:]/l2 + 1.) + mudist_sq + S[:, None, None, :] / l2, 1./(2.*S + l2))*self.lengthscale
result = np.zeros(self.input_dim)
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for reduction(+:tmp)'
header_string = '#include <omp.h>'
libraries = ['gomp']
else:
pragma_string = ''
header_string = ''
libraries = []
code = """
double tmp;
for(int q=0; q<Q; q++)
{
tmp = 0.0;
#pragma omp parallel for reduction(+:tmp)
%s
for(int n=0; n<N; n++)
{
for(int m=0; m<M; m++)
@ -326,16 +344,16 @@ class RBF(Stationary):
result(q) = tmp;
}
"""
""" % pragma_string
support_code = """
#include <omp.h>
%s
#include <math.h>
"""
""" % header_string
N,Q = S.shape
M = psi2.shape[-1]
S = param_to_array(S)
weave.inline(code, support_code=support_code, libraries=['gomp'],
weave.inline(code, support_code=support_code, libraries=libraries,
arg_names=['psi2', 'dL_dpsi2', 'N', 'M', 'Q', 'mudist_sq', 'l2', 'Zdist_sq', 'S', 'result'],
type_converters=weave.converters.blitz, **self.weave_options)

View file

@ -192,6 +192,27 @@ class Exponential(Stationary):
def dK_dr(self, r):
return -0.5*self.K_of_r(r)
class OU(Stationary):
"""
OU kernel:
.. math::
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='OU'):
super(OU, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * np.exp(-r)
def dK_dr(self,r):
return -1.*self.variance*np.exp(-r)
class Matern32(Stationary):
"""
Matern 3/2 kernel:

View file

@ -8,16 +8,16 @@ from likelihood import Likelihood
from mixed_noise import MixedNoise
#TODO need to fix this in a config file.
#TODO need to add the files to the git repo!
#try:
#import sympy as sym
#sympy_available=True
#except ImportError:
#sympy_available=False
#if sympy_available:
## These are likelihoods that rely on symbolic.
#from symbolic import Symbolic
#from sstudent_t import SstudentT
#from negative_binomial import Negative_binomial
##from skew_normal import Skew_normal
#from skew_exponential import Skew_exponential
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
if sympy_available:
#These are likelihoods that rely on symbolic.
from symbolic import Symbolic
from sstudent_t import SstudentT
from negative_binomial import Negative_binomial
from skew_normal import Skew_normal
from skew_exponential import Skew_exponential
# from null_category import Null_category

View file

@ -0,0 +1,48 @@
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sympy as sym
from GPy.util.symbolic import gammaln, normcdfln, normcdf, IndMatrix, create_matrix
import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions
from symbolic import Symbolic
from scipy import stats
class Ordinal(Symbolic):
"""
Ordinal
.. math::
p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
.. Note::
Y takes non zero integer values..
link function should have a positive domain, e.g. log (default).
.. See also::
symbolic.py, for the parent class
"""
def __init__(self, categories=3, gp_link=None):
if gp_link is None:
gp_link = link_functions.Identity()
dispersion = sym.Symbol('width', positive=True, real=True)
y_0 = sym.Symbol('y_0', nonnegative=True, integer=True)
f_0 = sym.Symbol('f_0', positive=True, real=True)
log_pdf = create_matrix('log_pdf', 1, categories)
log_pdf[0] = normcdfln(-f_0)
if categories>2:
w = create_matrix('w', 1, categories)
log_pdf[categories-1] = normcdfln(w.sum() + f_0)
for i in range(1, categories-1):
log_pdf[i] = sym.log(normcdf(w[0, 0:i-1].sum() + f_0) - normcdf(w[0, 0:i].sum()-f_0) )
else:
log_pdf[1] = normcdfln(f_0)
log_pdf.index_var = y_0
super(Ordinal, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Ordinal')
# TODO: Check this.
self.log_concave = True

View file

@ -42,7 +42,7 @@ class BayesianGPLVM(SparseGP):
assert Z.shape[1] == X.shape[1]
if kernel is None:
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim)
if likelihood is None:
likelihood = Gaussian()

View file

@ -97,7 +97,7 @@ def plot_latent(model, labels=None, which_indices=None,
elif type(ul) is np.int64:
this_label = 'class %i' % ul
else:
this_label = 'class %i' % i
this_label = unicode(ul)
m = marker.next()
index = np.nonzero(labels == ul)[0]

View file

@ -14,7 +14,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None):
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx'):
"""
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -97,7 +97,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
for d in which_data_ycols:
plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
#optionally plot some samples
if samples: #NOTE not tested with fixed_inputs

View file

@ -74,13 +74,16 @@ class vector_show(matplotlib_show):
"""
def __init__(self, vals, axes=None):
matplotlib_show.__init__(self, vals, axes)
self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals)
#assert vals.ndim == 2, "Please give a vector in [n x 1] to plot"
#assert vals.shape[1] == 1, "only showing a vector in one dimension"
self.size = vals.size
self.handle = self.axes.plot(np.arange(0, vals.size)[:, None], vals)[0]
def modify(self, vals):
self.vals = vals.copy()
for handle, vals in zip(self.handle, self.vals.T):
xdata, ydata = handle.get_data()
handle.set_data(xdata, vals)
xdata, ydata = self.handle.get_data()
assert vals.size == self.size, "values passed into modify changed size! vals.size:{} != in.size:{}".format(vals.size, self.size)
self.handle.set_data(xdata, self.vals)
self.axes.figure.canvas.draw()
@ -94,13 +97,12 @@ class lvm(matplotlib_show):
:type data_visualize: visualize.data_show type.
:param latent_axes: the axes where the latent visualization should be plotted.
"""
if vals == None:
if vals is None:
if isinstance(model.X, VariationalPosterior):
vals = param_to_array(model.X.mean)
else:
vals = param_to_array(model.X)
vals = param_to_array(vals)
matplotlib_show.__init__(self, vals, axes=latent_axes)
if isinstance(latent_axes,mpl.axes.Axes):
@ -273,7 +275,7 @@ class image_show(matplotlib_show):
:type preset_mean: double
:param preset_std: the preset standard deviation of a scaled image.
:type preset_std: double"""
def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, order='C', invert=False, scale=False, palette=[], preset_mean = 0., preset_std = -1., select_image=0):
def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, order='C', invert=False, scale=False, palette=[], preset_mean=0., preset_std=1., select_image=0):
matplotlib_show.__init__(self, vals, axes)
self.dimensions = dimensions
self.transpose = transpose
@ -323,13 +325,12 @@ class image_show(matplotlib_show):
self.vals = -self.vals
# un-normalizing, for visualisation purposes:
if self.preset_std >= 0: # The Mean is assumed to be in the range (0,255)
self.vals = self.vals*self.preset_std + self.preset_mean
# Clipping the values:
self.vals[self.vals < 0] = 0
self.vals[self.vals > 255] = 255
else:
self.vals = 255*(self.vals - self.vals.min())/(self.vals.max() - self.vals.min())
#self.vals[self.vals < 0] = 0
#self.vals[self.vals > 255] = 255
#else:
#self.vals = 255*(self.vals - self.vals.min())/(self.vals.max() - self.vals.min())
if not self.palette == []: # applying using an image palette (e.g. if the image has been quantized)
from PIL import Image
self.vals = Image.fromarray(self.vals.astype('uint8'))

View file

@ -304,23 +304,13 @@ class KernelTestsMiscellaneous(unittest.TestCase):
def setUp(self):
N, D = 100, 10
self.X = np.linspace(-np.pi, +np.pi, N)[:,None] * np.random.uniform(-10,10,D)
self.rbf = GPy.kern.RBF(2, active_dims=slice(0,4,2))
self.rbf = GPy.kern.RBF(2, active_dims=np.arange(0,4,2))
self.linear = GPy.kern.Linear(2, active_dims=(3,9))
self.matern = GPy.kern.Matern32(3, active_dims=np.array([1,7,9]))
self.sumkern = self.rbf + self.linear
self.sumkern += self.matern
self.sumkern.randomize()
def test_active_dims(self):
# test the automatic dim detection expression for slices:
start, stop = 0, 277
for i in range(start,stop,7):
for j in range(1,4):
GPy.kern.Kern(int(np.round((i+1)/j)), slice(0, i+1, j), "testkern")
# test the ability to have only one dim
sk = GPy.kern.RBF(2) + GPy.kern.Matern32(2)
self.assertEqual(sk.input_dim, 2)
def test_which_parts(self):
self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.matern]), self.linear.K(self.X)+self.matern.K(self.X)))
self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.rbf]), self.linear.K(self.X)+self.rbf.K(self.X)))
@ -344,10 +334,15 @@ class KernelTestsNonContinuous(unittest.TestCase):
self.X2[(N0*2):, -1] = 1
def test_IndependentOutputs(self):
k = GPy.kern.RBF(self.D)
k = GPy.kern.RBF(self.D, active_dims=range(self.D))
kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, active_dims=range(self.D), name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
def test_Hierarchical(self):
k = [GPy.kern.RBF(2, active_dims=[0,2], name='rbf1'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf2')]
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))

View file

@ -27,11 +27,11 @@ class ArrayCoreTest(unittest.TestCase):
class ParameterizedTest(unittest.TestCase):
def setUp(self):
self.rbf = GPy.kern.RBF(1)
self.rbf = GPy.kern.RBF(20)
self.white = GPy.kern.White(1)
from GPy.core.parameterization import Param
from GPy.core.parameterization.transformations import Logistic
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
self.param = Param('param', np.random.uniform(0,1,(25,2)), Logistic(0, 1))
self.test1 = GPy.core.Parameterized("test model")
self.test1.param = self.param
@ -142,6 +142,8 @@ class ParameterizedTest(unittest.TestCase):
self.testmodel.randomize()
self.assertEqual(val, self.testmodel.kern.lengthscale)
def test_regular_expression_misc(self):
self.testmodel.kern.lengthscale.fix()
val = float(self.testmodel.kern.lengthscale)

View file

@ -132,6 +132,9 @@ class Test(ListDictTestCase):
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
self.assertTrue(pcopy.checkgrad())
self.assert_(np.any(pcopy.gradient!=0.0))
pcopy.optimize('bfgs')
par.optimize('bfgs')
np.testing.assert_allclose(pcopy.param_array, par.param_array, atol=.001)
with tempfile.TemporaryFile('w+b') as f:
par.pickle(f)
f.seek(0)

View file

@ -1,84 +1,108 @@
from ..core.parameterization.parameter_core import Observable
import itertools
import itertools, collections, weakref
class Cacher(object):
"""
"""
def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()):
"""
Parameters:
***********
:param callable operation: function to cache
:param int limit: depth of cacher
:param [int] ignore_args: list of indices, pointing at arguments to ignore in *args of operation(*args). This includes self!
:param [str] force_kwargs: list of kwarg names (strings). If a kwarg with that name is given, the cacher will force recompute and wont cache anything.
"""
self.limit = int(limit)
self.ignore_args = ignore_args
self.force_kwargs = force_kwargs
self.operation=operation
self.cached_inputs = []
self.cached_outputs = []
self.inputs_changed = []
self.order = collections.deque()
self.cached_inputs = {} # point from cache_ids to a list of [ind_ids], which where used in cache cache_id
#=======================================================================
# point from each ind_id to [ref(obj), cache_ids]
# 0: a weak reference to the object itself
# 1: the cache_ids in which this ind_id is used (len will be how many times we have seen this ind_id)
self.cached_input_ids = {}
#=======================================================================
self.cached_outputs = {} # point from cache_ids to outputs
self.inputs_changed = {} # point from cache_ids to bools
def combine_args_kw(self, args, kw):
"Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute"
return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0]))
def preprocess(self, combined_args_kw, ignore_args):
"get the cacheid (conc. string of argument ids in order) ignoring ignore_args"
return "".join(str(id(a)) for i,a in enumerate(combined_args_kw) if i not in ignore_args)
def ensure_cache_length(self, cache_id):
"Ensures the cache is within its limits and has one place free"
if len(self.order) == self.limit:
# we have reached the limit, so lets release one element
cache_id = self.order.popleft()
combined_args_kw = self.cached_inputs[cache_id]
for ind in combined_args_kw:
ind_id = id(ind)
ref, cache_ids = self.cached_input_ids[ind_id]
if len(cache_ids) == 1 and ref() is not None:
ref().remove_observer(self, self.on_cache_changed)
del self.cached_input_ids[ind_id]
else:
cache_ids.remove(cache_id)
self.cached_input_ids[ind_id] = [ref, cache_ids]
del self.cached_outputs[cache_id]
del self.inputs_changed[cache_id]
del self.cached_inputs[cache_id]
def add_to_cache(self, cache_id, combined_args_kw, output):
self.inputs_changed[cache_id] = False
self.cached_outputs[cache_id] = output
self.order.append(cache_id)
self.cached_inputs[cache_id] = combined_args_kw
for a in combined_args_kw:
ind_id = id(a)
v = self.cached_input_ids.get(ind_id, [weakref.ref(a), []])
v[1].append(cache_id)
if len(v[1]) == 1:
a.add_observer(self, self.on_cache_changed)
self.cached_input_ids[ind_id] = v
def __call__(self, *args, **kw):
"""
A wrapper function for self.operation,
"""
#ensure that specified arguments are ignored
items = sorted(kw.items(), key=lambda x: x[0])
oa_all = args + tuple(a for _,a in items)
if len(self.ignore_args) != 0:
oa = [a for i,a in itertools.chain(enumerate(args), items) if i not in self.ignore_args and i not in self.force_kwargs]
else:
oa = oa_all
# this makes sure we only add an observer once, and that None can be in args
observable_args = []
for a in oa:
if (not any(a is ai for ai in observable_args)) and a is not None:
observable_args.append(a)
#make sure that all the found argument really are observable:
#otherswise don't cache anything, pass args straight though
if not all([isinstance(arg, Observable) for arg in observable_args]):
return self.operation(*args, **kw)
# 1: Check whether we have forced recompute arguments:
if len(self.force_kwargs) != 0:
# check if there are force args, which force reloading
for k in self.force_kwargs:
if k in kw and kw[k] is not None:
return self.operation(*args, **kw)
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
# return self.operation(*args, **kw)
#if the result is cached, return the cached computation
state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
# 2: preprocess and get the unique id string for this call
combined_args_kw = self.combine_args_kw(args, kw)
cache_id = self.preprocess(combined_args_kw, self.ignore_args)
# 2: if anything is not cachable, we will just return the operation, without caching
if reduce(lambda a,b: a or (not isinstance(b, Observable)), combined_args_kw, False):
return self.operation(*args, **kw)
# 3&4: check whether this cache_id has been cached, then has it changed?
try:
if any(state):
i = state.index(True)
if self.inputs_changed[i]:
#(elements of) the args have changed since we last computed: update
self.cached_outputs[i] = self.operation(*args, **kw)
self.inputs_changed[i] = False
return self.cached_outputs[i]
else:
#first time we've seen these arguments: compute
#first make sure the depth limit isn't exceeded
if len(self.cached_inputs) == self.limit:
args_ = self.cached_inputs.pop(0)
args_ = [a for i,a in enumerate(args_) if i not in self.ignore_args and i not in self.force_kwargs]
[a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None]
self.inputs_changed.pop(0)
self.cached_outputs.pop(0)
#compute
self.cached_inputs.append(oa_all)
self.cached_outputs.append(self.operation(*args, **kw))
self.inputs_changed.append(False)
[a.add_observer(self, self.on_cache_changed) for a in observable_args]
return self.cached_outputs[-1]#return
if(self.inputs_changed[cache_id]):
# 4: This happens, when one element has changed for this cache id
self.inputs_changed[cache_id] = False
self.cached_outputs[cache_id] = self.operation(*args, **kw)
except KeyError:
# 3: This is when we never saw this chache_id:
self.ensure_cache_length(cache_id)
self.add_to_cache(cache_id, combined_args_kw, self.operation(*args, **kw))
except:
self.reset()
raise
# 5: We have seen this cache_id and it is cached:
return self.cached_outputs[cache_id]
def on_cache_changed(self, direct, which=None):
"""
@ -86,17 +110,19 @@ class Cacher(object):
this function gets 'hooked up' to the inputs when we cache them, and upon their elements being changed we update here.
"""
self.inputs_changed = [any([a is direct or a is which for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)]
for ind_id in [id(direct), id(which)]:
_, cache_ids = self.cached_input_ids.get(ind_id, [None, []])
for cache_id in cache_ids:
self.inputs_changed[cache_id] = True
def reset(self):
"""
Totally reset the cache
"""
[[a.remove_observer(self, self.on_cache_changed) for a in args if isinstance(a, Observable)] for args in self.cached_inputs]
[[a.remove_observer(self, self.reset) for a in args if isinstance(a, Observable)] for args in self.cached_inputs]
self.cached_inputs = []
self.cached_outputs = []
self.inputs_changed = []
[a().remove_observer(self, self.on_cache_changed) if (a() is not None) else None for [a, _] in self.cached_input_ids.values()]
self.cached_input_ids = {}
self.cached_outputs = {}
self.inputs_changed = {}
def __deepcopy__(self, memo=None):
return Cacher(self.operation, self.limit, self.ignore_args, self.force_kwargs)

View file

@ -1,65 +1,340 @@
{
"rogers_girolami_data":{
"files":[
[
"firstcoursemldata.tar.gz"
]
],
"license":null,
"citation":"A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146",
"details":"Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.",
"urls":[
"https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/"
],
"suffices":[
[
"?dl=1"
]
],
"size":21949154
},
"ankur_pose_data": {
"citation": "3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.",
"details": "Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing.",
"files": [
[
"ankurDataPoseSilhouette.mat"
]
],
"citation":"3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.",
"license": null,
"size": 1,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/ankur_pose_data/"
]
},
"boston_housing": {
"citation": "Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.",
"details": "The Boston Housing data relates house values in Boston to a range of input variables.",
"files": [
[
"Index",
"housing.data",
"housing.names"
]
],
"details":"Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing.",
"size":1
"license": null,
"size": 51276,
"urls": [
"http://archive.ics.uci.edu/ml/machine-learning-databases/housing/"
]
},
"boxjenkins_airline": {
"citation": "Box & Jenkins (1976), in file: data/airpass, Description: International airline passengers: monthly totals in thousands. Jan 49 \\u2013 Dec 60",
"details": "International airline passengers, monthly totals from January 1949 to December 1960.",
"files": [
[
"boxjenkins_airline.csv"
]
],
"license": "You may copy and redistribute the data. You may make derivative works from the data. You may use the data for commercial purposes. You may not sublicence the data when redistributing it. You may not redistribute the data under a different license. Source attribution on any use of this data: Must refer source.",
"size": 46779,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/boxjenkins_airline/"
]
},
"brendan_faces": {
"citation": "Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.",
"details": "A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.",
"files": [
[
"frey_rawface.mat"
]
],
"license": null,
"size": 1100584,
"urls": [
"http://www.cs.nyu.edu/~roweis/data/"
]
},
"cmu_mocap_full": {
"citation": "Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.\\nThe database was created with funding from NSF EIA-0196217.",
"details": "CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.",
"files": [
[
"allasfamc.zip"
]
],
"license": "From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.",
"size": null,
"urls": [
"http://mocap.cs.cmu.edu/subjects"
]
},
"creep_rupture": {
"citation": "Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.",
"details": "Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.",
"files": [
[
"creeprupt.tar"
]
],
"license": null,
"size": 602797,
"urls": [
"http://www.msm.cam.ac.uk/map/data/tar/"
]
},
"decampos_characters": {
"citation": "T. de Campos, B. R. Babu, and M. Varma. Character recognition in natural images. VISAPP 2009.",
"details": "Examples of hand written digits taken from the de Campos et al paper on Character Recognition in Natural Images.",
"files": [
[
"characters.npy",
"digits.npy"
]
],
"license": null,
"size": 2031872,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/decampos_digits/"
]
},
"della_gatta": {
"citation": "Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008",
"details": "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
"files": [
[
"DellaGattadata.mat"
]
],
"license": null,
"size": 3729650,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/della_gatta/"
]
},
"epomeo_gpx": {
"citation": "",
"details": "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
"files": [
[
"endomondo_1.gpx",
"endomondo_2.gpx",
"garmin_watch_via_endomondo.gpx",
"viewranger_phone.gpx",
"viewranger_tablet.gpx"
]
],
"license": null,
"size": 2031872,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/epomeo_gpx/"
]
},
"football_data": {
"citation": "",
"details": "Results of English football matches since 1993/94 season.",
"files": [
[
"E0.csv", "E1.csv", "E2.csv", "E3.csv"
"E0.csv",
"E1.csv",
"E2.csv",
"E3.csv"
]
],
"citation":"",
"license": null,
"size": 1,
"urls": [
"http://www.football-data.co.uk/mmz4281/"
],
"details":"Results of English football matches since 1993/94 season.",
"size":1
]
},
"google_trends":{
"fruitfly_tomancak": {
"citation": "",
"details": "",
"files": [
[
"tomancak_exprs.csv",
"tomancak_se.csv",
"tomancak_prctile5.csv",
"tomancak_prctile25.csv",
"tomancak_prctile50.csv",
"tomancak_prctile75.csv",
"tomancak_prctile95.csv"
]
],
"citation":"",
"license": null,
"size": 59000000,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/fruitfly_tomancak/"
]
},
"fruitfly_tomancak_cel_files": {
"citation": "'Systematic determination of patterns of gene expression during Drosophila embryogenesis' Pavel Tomancak, Amy Beaton, Richard Weiszmann, Elaine Kwan, ShengQiang Shu, Suzanna E Lewis, Stephen Richards, Michael Ashburner, Volker Hartenstein, Susan E Celniker, and Gerald M Rubin",
"details": "Gene expression results from blastoderm development in Drosophila Melanogaster.",
"files": [
[
"embryo_tc_4_1.CEL",
"embryo_tc_4_2.CEL",
"embryo_tc_4_3.CEL",
"embryo_tc_4_4.CEL",
"embryo_tc_4_5.CEL",
"embryo_tc_4_6.CEL",
"embryo_tc_4_7.CEL",
"embryo_tc_4_8.CEL",
"embryo_tc_4_9.CEL",
"embryo_tc_4_10.CEL",
"embryo_tc_4_11.CEL",
"embryo_tc_4_12.CEL",
"embryo_tc_6_1.CEL",
"embryo_tc_6_2.CEL",
"embryo_tc_6_3.CEL",
"embryo_tc_6_4.CEL",
"embryo_tc_6_5.CEL",
"embryo_tc_6_6.CEL",
"embryo_tc_6_7.CEL",
"embryo_tc_6_8.CEL",
"embryo_tc_6_9.CEL",
"embryo_tc_6_10.CEL",
"embryo_tc_6_11.CEL",
"embryo_tc_6_12.CEL",
"embryo_tc_8_1.CEL",
"embryo_tc_8_2.CEL",
"embryo_tc_8_3.CEL",
"embryo_tc_8_4.CEL",
"embryo_tc_8_5.CEL",
"embryo_tc_8_6.CEL",
"embryo_tc_8_7.CEL",
"embryo_tc_8_8.CEL",
"embryo_tc_8_9.CEL",
"embryo_tc_8_10.CEL",
"embryo_tc_8_11.CEL",
"embryo_tc_8_12.CEL",
"CG_AffyOligo_Gadfly3_01_13_03",
"embryo_tc_rma_release2.txt",
"embryo_tc_rma_release3.txt",
"na_affy_oligo.dros",
"README.TXT"
]
],
"license": null,
"size": 389000000,
"urls": [
"ftp://ftp.fruitfly.org/pub/embryo_tc_array_data/"
]
},
"google_trends": {
"citation": "",
"details": "Google trends results.",
"files": [
[
]
],
"license": null,
"size": 0,
"urls": [
"http://www.google.com/trends/"
]
},
"hapmap3": {
"citation": "Gibbs, Richard A., et al. 'The international HapMap project.' Nature 426.6968 (2003): 789-796.",
"details": "HapMap Project: Single Nucleotide Polymorphism sequenced in all human populations. \n The HapMap phase three SNP dataset - 1184 samples out of 11 populations.\n See http://www.nature.com/nature/journal/v426/n6968/abs/nature02168.html for details.\n\n SNP_matrix (A) encoding [see Paschou et all. 2007 (PCA-Correlated SNPs...)]:\n Let (B1,B2) be the alphabetically sorted bases, which occur in the j-th SNP, then\n\n / 1, iff SNPij==(B1,B1)\n Aij = | 0, iff SNPij==(B1,B2)\n \\\\ -1, iff SNPij==(B2,B2)\n\n The SNP data and the meta information (such as iid, sex and phenotype) are\n stored in the dataframe datadf, index is the Individual ID, \n with following columns for metainfo:\n\n * family_id -> Family ID\n * paternal_id -> Paternal ID\n * maternal_id -> Maternal ID\n * sex -> Sex (1=male; 2=female; other=unknown)\n * phenotype -> Phenotype (-9, or 0 for unknown)\n * population -> Population string (e.g. 'ASW' - 'YRI')\n * rest are SNP rs (ids)\n\n More information is given in infodf:\n\n * Chromosome:\n - autosomal chromosemes -> 1-22\n - X X chromosome -> 23\n - Y Y chromosome -> 24\n - XY Pseudo-autosomal region of X -> 25\n - MT Mitochondrial -> 26\n * Relative Positon (to Chromosome) [base pairs]\n\n ",
"files": [
[
"hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2",
"hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2",
"relationships_w_pops_121708.txt"
]
],
"details":"Google trends results.",
"size":0
"license": "International HapMap Project Public Access License (http://hapmap.ncbi.nlm.nih.gov/cgi-perl/registration#licence)",
"size": 3458246739,
"urls": [
"http://hapmap.ncbi.nlm.nih.gov/downloads/genotypes/latest_phaseIII_ncbi_b36/plink_format/"
]
},
"isomap_face_data": {
"citation": "A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
"details": "Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
"files": [
[
"face_data.mat"
]
],
"license": null,
"size": 24229368,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/isomap_face_data/"
]
},
"mauna_loa": {
"citation": "Mauna Loa Data. Dr. Pieter Tans, NOAA/ESRL (www.esrl.noaa.gov/gmd/ccgg/trends/) and Dr. Ralph Keeling, Scripps Institution of Oceanography (scrippsco2.ucsd.edu/).",
"details": "The 'average' column contains the monthly mean CO2 mole fraction determined from daily averages. The mole fraction of CO2, expressed as parts per million (ppm) is the number of molecules of CO2 in every one million molecules of dried air (water vapor removed). If there are missing days concentrated either early or late in the month, the monthly mean is corrected to the middle of the month using the average seasonal cycle. Missing months are denoted by -99.99. The 'interpolated' column includes average values from the preceding column and interpolated values where data are missing. Interpolated values are computed in two steps. First, we compute for each month the average seasonal cycle in a 7-year window around each monthly value. In this way the seasonal cycle is allowed to change slowly over time. We then determine the 'trend' value for each month by removing the seasonal cycle; this result is shown in the 'trend' column. Trend values are linearly interpolated for missing months. The interpolated monthly mean is then the sum of the average seasonal cycle value and the trend value for the missing month.\n\nNOTE: In general, the data presented for the last year are subject to change, depending on recalibration of the reference gas mixtures used, and other quality control procedures. Occasionally, earlier years may also be changed for the same reasons. Usually these changes are minor.\n\nCO2 expressed as a mole fraction in dry air, micromol/mol, abbreviated as ppm \n\n (-99.99 missing data; -1 no data for daily means in month)",
"files": [
[
"co2_mm_mlo.txt"
]
],
"license": "-------------------------------------------------------------------- USE OF NOAA ESRL DATA\n\n These data are made freely available to the public and the scientific community in the belief that their wide dissemination will lead to greater understanding and new scientific insights. The availability of these data does not constitute publication of the data. NOAA relies on the ethics and integrity of the user to insure that ESRL receives fair credit for their work. If the data are obtained for potential use in a publication or presentation, ESRL should be informed at the outset of the nature of this work. If the ESRL data are essential to the work, or if an important result or conclusion depends on the ESRL data, co-authorship may be appropriate. This should be discussed at an early stage in the work. Manuscripts using the ESRL data should be sent to ESRL for review before they are submitted for publication so we can insure that the quality and limitations of the data are accurately represented.\n\n Contact: Pieter Tans (303 497 6678; pieter.tans@noaa.gov)\n\n RECIPROCITY Use of these data implies an agreement to reciprocate. Laboratories making similar measurements agree to make their own data available to the general public and to the scientific community in an equally complete and easily accessible form. Modelers are encouraged to make available to the community, upon request, their own tools used in the interpretation of the ESRL data, namely well documented model code, transport fields, and additional information necessary for other scientists to repeat the work and to run modified versions. Model availability includes collaborative support for new users of the models.\n --------------------------------------------------------------------\n\n See www.esrl.noaa.gov/gmd/ccgg/trends/ for additional details.",
"size": 46779,
"urls": [
"ftp://aftp.cmdl.noaa.gov/products/trends/co2/"
]
},
"olivetti_faces": {
"citation": "Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994",
"details": "Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. ",
"files": [
[
"att_faces.zip"
],
[
"olivettifaces.mat"
]
],
"license": null,
"size": 8561331,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/",
"http://www.cs.nyu.edu/~roweis/data/"
]
},
"olivetti_glasses": {
"citation": "Information recorded in olivetti_faces entry. Should be used from there.",
"details": "Information recorded in olivetti_faces entry. Should be used from there.",
"files": [
[
"has_glasses.np"
],
[
"olivettifaces.mat"
]
],
"license": null,
"size": 4261047,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/",
"http://www.cs.nyu.edu/~roweis/data/"
]
},
"olympic_marathon_men": {
"citation": null,
"details": "Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data",
"files": [
[
"olympicMarathonTimes.csv"
]
],
"license": null,
"size": 584,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olympic_marathon_men/"
]
},
"osu_accad": {
"citation": "The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.",
"details": "Motion capture data of different motions from the Open Motion Data Project at Ohio State University.",
"files": [
[
"swagger1TXT.ZIP",
@ -82,101 +357,47 @@
]
],
"license": "Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).",
"citation":"The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.",
"details":"Motion capture data of different motions from the Open Motion Data Project at Ohio State University.",
"size": 15922790,
"urls": [
"http://accad.osu.edu/research/mocap/data/",
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/"
],
"size":15922790
]
},
"isomap_face_data":{
"osu_run1": {
"citation": "The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.",
"details": "Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.",
"files": [
[
"face_data.mat"
]
"run1TXT.ZIP"
],
"license":null,
"citation":"A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
"details":"Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
"urls":[
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/isomap_face_data/"
],
"size":24229368
},
"boston_housing":{
"files":[
[
"Index",
"housing.data",
"housing.names"
"connections.txt"
]
],
"license":null,
"citation":"Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.",
"details":"The Boston Housing data relates house values in Boston to a range of input variables.",
"license": "Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).",
"size": 338103,
"urls": [
"http://archive.ics.uci.edu/ml/machine-learning-databases/housing/"
],
"size":51276
},
"cmu_mocap_full":{
"files":[
[
"allasfamc.zip"
"http://accad.osu.edu/research/mocap/data/",
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/"
]
],
"license":"From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.",
"citation":"Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.\nThe database was created with funding from NSF EIA-0196217.",
"details":"CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.",
"urls":[
"http://mocap.cs.cmu.edu/subjects"
],
"size":null
},
"brendan_faces":{
"files":[
[
"frey_rawface.mat"
]
],
"license":null,
"citation":"Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.",
"details":"A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.",
"urls":[
"http://www.cs.nyu.edu/~roweis/data/"
],
"size":1100584
},
"olympic_marathon_men":{
"files":[
[
"olympicMarathonTimes.csv"
]
],
"license":null,
"citation":null,
"details":"Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data",
"urls":[
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olympic_marathon_men/"
],
"size":584
},
"pumadyn-32nm": {
"citation": "Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.",
"details": "Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.",
"files": [
[
"pumadyn-32nm.tar.gz"
]
],
"license": "Data is made available by the Delve system at the University of Toronto",
"citation":"Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.",
"details":"Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.",
"size": 5861646,
"urls": [
"ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/"
],
"size":5861646
]
},
"ripley_prnn_data": {
"citation": "Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7",
"details": "Data sets from Brian Ripley's Pattern Recognition and Neural Networks",
"files": [
[
"Cushings.dat",
@ -194,14 +415,90 @@
]
],
"license": null,
"citation":"Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7",
"details":"Data sets from Brian Ripley's Pattern Recognition and Neural Networks",
"size": 93565,
"urls": [
"http://www.stats.ox.ac.uk/pub/PRNN/"
]
},
"robot_wireless": {
"citation": "WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.",
"details": "Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.",
"files": [
[
"uw-floor.txt"
]
],
"size":93565
"license": null,
"size": 284390,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/robot_wireless/"
]
},
"rogers_girolami_data": {
"citation": "A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146",
"details": "Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.",
"files": [
[
"firstcoursemldata.tar.gz"
]
],
"license": null,
"size": 21949154,
"suffices": [
[
"?dl=1"
]
],
"urls": [
"https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/"
]
},
"singlecell": {
"citation": "Guoji Guo, Mikael Huss, Guo Qing Tong, Chaoyang Wang, Li Li Sun, Neil D. Clarke, Paul Robson, Resolution of Cell Fate Decisions Revealed by Single-Cell Gene Expression Analysis from Zygote to Blastocyst, Developmental Cell, Volume 18, Issue 4, 20 April 2010, Pages 675-685, ISSN 1534-5807, http://dx.doi.org/10.1016/j.devcel.2010.02.012. (http://www.sciencedirect.com/science/article/pii/S1534580710001103) Keywords: DEVBIO",
"details": "qPCR TaqMan array single cell experiment in mouse. The data is taken from the early stages of development when the Blastocyst is forming. At the 32 cell stage the data is already separated into the trophectoderm (TE) which goes onto form the placenta and the inner cellular mass (ICM). The ICM further differentiates into the epiblast (EPI)---which gives rise to the endoderm, mesoderm and ectoderm---and the primitive endoderm (PE) which develops into the amniotic sack. Guo et al selected 48 genes for expression measurement. They labelled the resulting cells and their labels are included as an aide to visualization.",
"files": [
[
"singlecell.csv"
]
],
"license": "ScienceDirect: http://www.elsevier.com/locate/termsandconditions?utm_source=sciencedirect&utm_medium=link&utm_campaign=terms",
"size": 233.1,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/"
]
},
"sod1_mouse": {
"citation": "Transcriptomic indices of fast and slow disease progression in two mouse models of amyotrophic lateral sclerosis' Nardo G1, Iennaco R, Fusi N, Heath PR, Marino M, Trolese MC, Ferraiuolo L, Lawrence N, Shaw PJ, Bendotti C Brain. 2013 Nov;136(Pt 11):3305-32. doi: 10.1093/brain/awt250. Epub 2013 Sep 24.",
"details": "Gene expression data from two separate strains of mice: C57 and 129Sv in wild type and SOD1 mutant strains.",
"files": [
[
"sod1_C57_129_exprs.csv",
"sod1_C57_129_se.csv"
]
],
"license": null,
"size": 0,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/sod1_mouse/"
]
},
"swiss_roll": {
"citation": "A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
"details": "Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
"files": [
[
"swiss_roll_data.mat"
]
],
"license": null,
"size": 800256,
"urls": [
"http://isomap.stanford.edu/"
]
},
"three_phase_oil_flow": {
"citation": "Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593",
"details": "The three phase oil data used initially for demonstrating the Generative Topographic mapping.",
"files": [
[
"DataTrnLbls.txt",
@ -213,197 +510,23 @@
]
],
"license": null,
"citation":"Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593",
"details":"The three phase oil data used initially for demonstrating the Generative Topographic mapping.",
"size": 712796,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/three_phase_oil_flow/"
],
"size":712796
},
"robot_wireless":{
"files":[
[
"uw-floor.txt"
]
],
"license":null,
"citation":"WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.",
"details":"Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.",
"urls":[
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/robot_wireless/"
],
"size":284390
},
"xw_pen": {
"citation": "Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005",
"details": "Accelerometer pen data used for robust regression by Tipping and Lawrence.",
"files": [
[
"xw_pen_15.csv"
]
],
"license": null,
"citation":"Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005",
"details":"Accelerometer pen data used for robust regression by Tipping and Lawrence.",
"size": 3410,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/xw_pen/"
],
"size":3410
},
"swiss_roll":{
"files":[
[
"swiss_roll_data.mat"
]
],
"license":null,
"citation":"A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
"details":"Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
"urls":[
"http://isomap.stanford.edu/"
],
"size":800256
},
"osu_run1":{
"files":[
[
"run1TXT.ZIP"
],
[
"connections.txt"
]
],
"license":"Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).",
"citation":"The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.",
"details":"Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.",
"urls":[
"http://accad.osu.edu/research/mocap/data/",
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/stick/"
],
"size":338103
},
"creep_rupture":{
"files":[
[
"creeprupt.tar"
]
],
"license":null,
"citation":"Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.",
"details":"Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.",
"urls":[
"http://www.msm.cam.ac.uk/map/data/tar/"
],
"size":602797
},
"olivetti_faces":{
"files":[
[
"att_faces.zip"
],
[
"olivettifaces.mat"
]
],
"license":null,
"citation":"Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994",
"details":"Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. ",
"urls":[
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/",
"http://www.cs.nyu.edu/~roweis/data/"
],
"size":8561331
},
"olivetti_glasses":{
"files":[
[
"has_glasses.np"
],
[
"olivettifaces.mat"
]
],
"license":null,
"citation":"Information recorded in olivetti_faces entry. Should be used from there.",
"details":"Information recorded in olivetti_faces entry. Should be used from there.",
"urls":[
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/olivetti_faces/",
"http://www.cs.nyu.edu/~roweis/data/"
],
"size":4261047
},
"della_gatta":{
"files":[
[
"DellaGattadata.mat"
]
],
"license":null,
"citation":"Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008",
"details":"The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
"urls":[
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/della_gatta/"
],
"size":3729650
},
"epomeo_gpx":{
"files":[
[
"endomondo_1.gpx",
"endomondo_2.gpx",
"garmin_watch_via_endomondo.gpx",
"viewranger_phone.gpx",
"viewranger_tablet.gpx"
]
],
"license":null,
"citation":"",
"details":"Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
"urls":[
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/epomeo_gpx/"
],
"size":2031872
},
"mauna_loa":{
"files":[
[
"co2_mm_mlo.txt"
]
],
"license":"-------------------------------------------------------------------- USE OF NOAA ESRL DATA\n\n These data are made freely available to the public and the scientific community in the belief that their wide dissemination will lead to greater understanding and new scientific insights. The availability of these data does not constitute publication of the data. NOAA relies on the ethics and integrity of the user to insure that ESRL receives fair credit for their work. If the data are obtained for potential use in a publication or presentation, ESRL should be informed at the outset of the nature of this work. If the ESRL data are essential to the work, or if an important result or conclusion depends on the ESRL data, co-authorship may be appropriate. This should be discussed at an early stage in the work. Manuscripts using the ESRL data should be sent to ESRL for review before they are submitted for publication so we can insure that the quality and limitations of the data are accurately represented.\n\n Contact: Pieter Tans (303 497 6678; pieter.tans@noaa.gov)\n\n RECIPROCITY Use of these data implies an agreement to reciprocate. Laboratories making similar measurements agree to make their own data available to the general public and to the scientific community in an equally complete and easily accessible form. Modelers are encouraged to make available to the community, upon request, their own tools used in the interpretation of the ESRL data, namely well documented model code, transport fields, and additional information necessary for other scientists to repeat the work and to run modified versions. Model availability includes collaborative support for new users of the models.\n --------------------------------------------------------------------\n\n See www.esrl.noaa.gov/gmd/ccgg/trends/ for additional details.",
"citation":"Mauna Loa Data. Dr. Pieter Tans, NOAA/ESRL (www.esrl.noaa.gov/gmd/ccgg/trends/) and Dr. Ralph Keeling, Scripps Institution of Oceanography (scrippsco2.ucsd.edu/).",
"details":"The 'average' column contains the monthly mean CO2 mole fraction determined from daily averages. The mole fraction of CO2, expressed as parts per million (ppm) is the number of molecules of CO2 in every one million molecules of dried air (water vapor removed). If there are missing days concentrated either early or late in the month, the monthly mean is corrected to the middle of the month using the average seasonal cycle. Missing months are denoted by -99.99. The 'interpolated' column includes average values from the preceding column and interpolated values where data are missing. Interpolated values are computed in two steps. First, we compute for each month the average seasonal cycle in a 7-year window around each monthly value. In this way the seasonal cycle is allowed to change slowly over time. We then determine the 'trend' value for each month by removing the seasonal cycle; this result is shown in the 'trend' column. Trend values are linearly interpolated for missing months. The interpolated monthly mean is then the sum of the average seasonal cycle value and the trend value for the missing month.\n\nNOTE: In general, the data presented for the last year are subject to change, depending on recalibration of the reference gas mixtures used, and other quality control procedures. Occasionally, earlier years may also be changed for the same reasons. Usually these changes are minor.\n\nCO2 expressed as a mole fraction in dry air, micromol/mol, abbreviated as ppm \n\n (-99.99 missing data; -1 no data for daily means in month)",
"urls":[
"ftp://aftp.cmdl.noaa.gov/products/trends/co2/"
],
"size":46779
},
"boxjenkins_airline":{
"files":[
[
"boxjenkins_airline.csv"
]
],
"license":"You may copy and redistribute the data. You may make derivative works from the data. You may use the data for commercial purposes. You may not sublicence the data when redistributing it. You may not redistribute the data under a different license. Source attribution on any use of this data: Must refer source.",
"citation":"Box & Jenkins (1976), in file: data/airpass, Description: International airline passengers: monthly totals in thousands. Jan 49 Dec 60",
"details":"International airline passengers, monthly totals from January 1949 to December 1960.",
"urls":[
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/boxjenkins_airline/"
],
"size":46779
},
"decampos_characters":{
"files":[
[
"characters.npy",
"digits.npy"
]
],
"license":null,
"citation":"T. de Campos, B. R. Babu, and M. Varma. Character recognition in natural images. VISAPP 2009.",
"details":"Examples of hand written digits taken from the de Campos et al paper on Character Recognition in Natural Images.",
"urls":[
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/decampos_digits/"
],
"size":2031872
}
}

View file

@ -12,6 +12,8 @@ import datetime
import json
import re
from config import *
ipython_available=True
try:
import IPython
@ -29,7 +31,8 @@ def reporthook(a,b,c):
sys.stdout.flush()
# Global variables
data_path = os.path.join(os.path.dirname(__file__), 'datasets')
data_path = os.path.expandvars(config.get('datasets', 'dir'))
#data_path = os.path.join(os.path.dirname(__file__), 'datasets')
default_seed = 10000
overide_manual_authorize=False
neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
@ -108,7 +111,11 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix
raise ValueError('Tried url ' + url + suffix + ' and received server error ' + str(response.code))
with open(save_name, 'wb') as f:
meta = response.info()
file_size = int(meta.getheaders("Content-Length")[0])
content_length_str = meta.getheaders("Content-Length")
if content_length_str:
file_size = int(content_length_str[0])
else:
file_size = None
status = ""
file_size_dl = 0
block_sz = 8192
@ -120,9 +127,15 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix
file_size_dl += len(buff)
f.write(buff)
sys.stdout.write(" "*(len(status)) + "\r")
status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1.*1e6),
full=file_size/(1.*1e6), ll=line_length,
if file_size:
status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1048576.),
full=file_size/(1048576.), ll=line_length,
perc="="*int(line_length*float(file_size_dl)/file_size))
else:
status = r"[{perc: <{ll}}] {dl:7.3f}MB".format(dl=file_size_dl/(1048576.),
ll=line_length,
perc="."*int(line_length*float(file_size_dl/(10*1048576.))))
sys.stdout.write(status)
sys.stdout.flush()
sys.stdout.write(" "*(len(status)) + "\r")
@ -350,6 +363,34 @@ def football_data(season='1314', data_set='football_data'):
Y = table[:, 4:]
return data_details_return({'X': X, 'Y': Y}, data_set)
def sod1_mouse(data_set='sod1_mouse'):
if not data_available(data_set):
download_data(data_set)
from pandas import read_csv
dirpath = os.path.join(data_path, data_set)
filename = os.path.join(dirpath, 'sod1_C57_129_exprs.csv')
Y = read_csv(filename, header=0, index_col=0)
num_repeats=4
num_time=4
num_cond=4
X = 1
return data_details_return({'X': X, 'Y': Y}, data_set)
def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None):
if not data_available(data_set):
download_data(data_set)
from pandas import read_csv
dirpath = os.path.join(data_path, data_set)
filename = os.path.join(dirpath, 'tomancak_exprs.csv')
Y = read_csv(filename, header=0, index_col=0).T
num_repeats = 3
num_time = 12
xt = np.linspace(0, num_time-1, num_time)
xr = np.linspace(0, num_repeats-1, num_repeats)
xtime, xrepeat = np.meshgrid(xt, xr)
X = np.vstack((xtime.flatten(), xrepeat.flatten())).T
return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set)
# This will be for downloading google trends data.
def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'):
"""Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations."""
@ -718,6 +759,21 @@ def hapmap3(data_set='hapmap3'):
populations=populations)
return hapmap
def singlecell(data_set='singlecell'):
if not data_available(data_set):
download_data(data_set)
from pandas import read_csv
dirpath = os.path.join(data_path, data_set)
filename = os.path.join(dirpath, 'singlecell.csv')
Y = read_csv(filename, header=0, index_col=0)
genes = Y.columns
labels = Y.index
# data = np.loadtxt(os.path.join(dirpath, 'singlecell.csv'), delimiter=",", dtype=str)
return data_details_return({'Y': Y, 'info' : "qPCR singlecell experiment in Mouse, measuring 48 gene expressions in 1-64 cell states. The labels have been created as in Guo et al. [2010]",
'genes': genes, 'labels':labels,
}, data_set)
def swiss_roll_1000():
return swiss_roll(num_samples=1000)

View file

@ -1,168 +0,0 @@
import json
neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
sam_url = 'http://www.cs.nyu.edu/~roweis/data/'
cmu_url = 'http://mocap.cs.cmu.edu/subjects/'
data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
'files' : [['ankurDataPoseSilhouette.mat']],
'license' : None,
'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""",
'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""},
'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'],
'files' : [['Index', 'housing.data', 'housing.names']],
'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""",
'details' : """The Boston Housing data relates house values in Boston to a range of input variables.""",
'license' : None,
'size' : 51276
},
'brendan_faces' : {'urls' : [sam_url],
'files': [['frey_rawface.mat']],
'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.',
'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""",
'license': None,
'size' : 1100584},
'cmu_mocap_full' : {'urls' : ['http://mocap.cs.cmu.edu'],
'files' : [['allasfamc.zip']],
'citation' : """Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.'
'The database was created with funding from NSF EIA-0196217.""",
'details' : """CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.""",
'license' : """From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.""",
'size' : None},
'creep_rupture' : {'urls' : ['http://www.msm.cam.ac.uk/map/data/tar/'],
'files' : [['creeprupt.tar']],
'citation' : 'Materials Algorithms Project Data Library: MAP_DATA_CREEP_RUPTURE. F. Brun and T. Yoshida.',
'details' : """Provides 2066 creep rupture test results of steels (mainly of two kinds of steels: 2.25Cr and 9-12 wt% Cr ferritic steels). See http://www.msm.cam.ac.uk/map/data/materials/creeprupt-b.html.""",
'license' : None,
'size' : 602797},
'della_gatta' : {'urls' : [neil_url + 'della_gatta/'],
'files': [['DellaGattadata.mat']],
'citation' : 'Direct targets of the TRP63 transcription factor revealed by a combination of gene expression profiling and reverse engineering. Giusy Della Gatta, Mukesh Bansal, Alberto Ambesi-Impiombato, Dario Antonini, Caterina Missero, and Diego di Bernardo, Genome Research 2008',
'details': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA.",
'license':None,
'size':3729650},
'epomeo_gpx' : {'urls' : [neil_url + 'epomeo_gpx/'],
'files': [['endomondo_1.gpx', 'endomondo_2.gpx', 'garmin_watch_via_endomondo.gpx','viewranger_phone.gpx','viewranger_tablet.gpx']],
'citation' : '',
'details': "Five different GPS traces of the same run up Mount Epomeo in Ischia. The traces are from different sources. endomondo_1 and endomondo_2 are traces from the mobile phone app Endomondo, with a split in the middle. garmin_watch_via_endomondo is the trace from a Garmin watch, with a segment missing about 4 kilometers in. viewranger_phone and viewranger_tablet are traces from a phone and a tablet through the viewranger app. The viewranger_phone data comes from the same mobile phone as the Endomondo data (i.e. there are 3 GPS devices, but one device recorded two traces).",
'license':None,
'size': 2031872},
'three_phase_oil_flow': {'urls' : [neil_url + 'three_phase_oil_flow/'],
'files' : [['DataTrnLbls.txt', 'DataTrn.txt', 'DataTst.txt', 'DataTstLbls.txt', 'DataVdn.txt', 'DataVdnLbls.txt']],
'citation' : 'Bishop, C. M. and G. D. James (1993). Analysis of multiphase flows using dual-energy gamma densitometry and neural networks. Nuclear Instruments and Methods in Physics Research A327, 580-593',
'details' : """The three phase oil data used initially for demonstrating the Generative Topographic mapping.""",
'license' : None,
'size' : 712796},
'rogers_girolami_data' : {'urls' : ['https://www.dropbox.com/sh/7p6tu1t29idgliq/_XqlH_3nt9/'],
'files' : [['firstcoursemldata.tar.gz']],
'suffices' : [['?dl=1']],
'citation' : 'A First Course in Machine Learning. Simon Rogers and Mark Girolami: Chapman & Hall/CRC, ISBN-13: 978-1439824146',
'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""",
'license' : None,
'size' : 21949154},
'olivetti_faces' : {'urls' : [neil_url + 'olivetti_faces/', sam_url],
'files' : [['att_faces.zip'], ['olivettifaces.mat']],
'citation' : 'Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994',
'details' : """Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. """,
'license': None,
'size' : 8561331},
'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'],
'files' : [['olympicMarathonTimes.csv']],
'citation' : None,
'details' : """Olympic mens' marathon gold medal winning times from 1896 to 2012. Time given in pace (minutes per kilometer). Data is originally downloaded and collated from Wikipedia, we are not responsible for errors in the data""",
'license': None,
'size' : 584},
'osu_run1' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'],
'files': [['run1TXT.ZIP'],['connections.txt']],
'details' : "Motion capture data of a stick man running from the Open Motion Data Project at Ohio State University.",
'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.',
'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).',
'size': 338103},
'osu_accad' : {'urls': ['http://accad.osu.edu/research/mocap/data/', neil_url + 'stick/'],
'files': [['swagger1TXT.ZIP','handspring1TXT.ZIP','quickwalkTXT.ZIP','run1TXT.ZIP','sprintTXT.ZIP','dogwalkTXT.ZIP','camper_04TXT.ZIP','dance_KB3_TXT.ZIP','per20_TXT.ZIP','perTWO07_TXT.ZIP','perTWO13_TXT.ZIP','perTWO14_TXT.ZIP','perTWO15_TXT.ZIP','perTWO16_TXT.ZIP'],['connections.txt']],
'details' : "Motion capture data of different motions from the Open Motion Data Project at Ohio State University.",
'citation' : 'The Open Motion Data Project by The Ohio State University Advanced Computing Center for the Arts and Design, http://accad.osu.edu/research/mocap/mocap_data.htm.',
'license' : 'Data is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-nc-sa/3.0/).',
'size': 15922790},
'pumadyn-32nm' : {'urls' : ['ftp://ftp.cs.toronto.edu/pub/neuron/delve/data/tarfiles/pumadyn-family/'],
'files' : [['pumadyn-32nm.tar.gz']],
'details' : """Pumadyn non linear 32 input data set with moderate noise. See http://www.cs.utoronto.ca/~delve/data/pumadyn/desc.html for details.""",
'citation' : """Created by Zoubin Ghahramani using the Matlab Robotics Toolbox of Peter Corke. Corke, P. I. (1996). A Robotics Toolbox for MATLAB. IEEE Robotics and Automation Magazine, 3 (1): 24-32.""",
'license' : """Data is made available by the Delve system at the University of Toronto""",
'size' : 5861646},
'robot_wireless' : {'urls' : [neil_url + 'robot_wireless/'],
'files' : [['uw-floor.txt']],
'citation' : """WiFi-SLAM using Gaussian Process Latent Variable Models by Brian Ferris, Dieter Fox and Neil Lawrence in IJCAI'07 Proceedings pages 2480-2485. Data used in A Unifying Probabilistic Perspective for Spectral Dimensionality Reduction: Insights and New Models by Neil D. Lawrence, JMLR 13 pg 1609--1638, 2012.""",
'details' : """Data created by Brian Ferris and Dieter Fox. Consists of WiFi access point strengths taken during a circuit of the Paul Allen building at the University of Washington.""",
'license' : None,
'size' : 284390},
'swiss_roll' : {'urls' : ['http://isomap.stanford.edu/'],
'files' : [['swiss_roll_data.mat']],
'details' : """Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""",
'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
'license' : None,
'size' : 800256},
'ripley_prnn_data' : {'urls' : ['http://www.stats.ox.ac.uk/pub/PRNN/'],
'files' : [['Cushings.dat', 'README', 'crabs.dat', 'fglass.dat', 'fglass.grp', 'pima.te', 'pima.tr', 'pima.tr2', 'synth.te', 'synth.tr', 'viruses.dat', 'virus3.dat']],
'details' : """Data sets from Brian Ripley's Pattern Recognition and Neural Networks""",
'citation': """Pattern Recognition and Neural Networks by B.D. Ripley (1996) Cambridge University Press ISBN 0 521 46986 7""",
'license' : None,
'size' : 93565},
'isomap_face_data' : {'urls' : [neil_url + 'isomap_face_data/'],
'files' : [['face_data.mat']],
'details' : """Face data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.""",
'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000',
'license' : None,
'size' : 24229368},
'xw_pen' : {'urls' : [neil_url + 'xw_pen/'],
'files' : [['xw_pen_15.csv']],
'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""",
'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005',
'license' : None,
'size' : 3410},
'hapmap3' : {'urls' : ['http://hapmap.ncbi.nlm.nih.gov/downloads/genotypes/latest_phaseIII_ncbi_b36/plink_format/'],
'files' : [['hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2', 'hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2', 'relationships_w_pops_121708.txt']],
'details' : """
HapMap Project: Single Nucleotide Polymorphism sequenced in all human populations.
The HapMap phase three SNP dataset - 1184 samples out of 11 populations.
See http://www.nature.com/nature/journal/v426/n6968/abs/nature02168.html for details.
SNP_matrix (A) encoding [see Paschou et all. 2007 (PCA-Correlated SNPs...)]:
Let (B1,B2) be the alphabetically sorted bases, which occur in the j-th SNP, then
/ 1, iff SNPij==(B1,B1)
Aij = | 0, iff SNPij==(B1,B2)
\ -1, iff SNPij==(B2,B2)
The SNP data and the meta information (such as iid, sex and phenotype) are
stored in the dataframe datadf, index is the Individual ID,
with following columns for metainfo:
* family_id -> Family ID
* paternal_id -> Paternal ID
* maternal_id -> Maternal ID
* sex -> Sex (1=male; 2=female; other=unknown)
* phenotype -> Phenotype (-9, or 0 for unknown)
* population -> Population string (e.g. 'ASW' - 'YRI')
* rest are SNP rs (ids)
More information is given in infodf:
* Chromosome:
- autosomal chromosemes -> 1-22
- X X chromosome -> 23
- Y Y chromosome -> 24
- XY Pseudo-autosomal region of X -> 25
- MT Mitochondrial -> 26
* Relative Positon (to Chromosome) [base pairs]
""",
'citation': """Gibbs, Richard A., et al. "The international HapMap project." Nature 426.6968 (2003): 789-796.""",
'license' : """International HapMap Project Public Access License (http://hapmap.ncbi.nlm.nih.gov/cgi-perl/registration#licence)""",
'size' : 2*1729092237 + 62265},
}
with open('data_resources.json', 'w') as f:
print "writing data_resources"
json.dump(data_resources, f)

View file

@ -13,7 +13,11 @@ def initialize_latent(init, input_dim, Y):
p = pca(Y)
PC = p.project(Y, min(input_dim, Y.shape[1]))
Xr[:PC.shape[0], :PC.shape[1]] = PC
var = p.fracs[:input_dim]
else:
var = Xr.var(0)
Xr -= Xr.mean(0)
Xr /= Xr.var(0)
return Xr, var/var.max()
return Xr, p.fracs[:input_dim]

View file

@ -130,14 +130,14 @@ def fast_array_equal(A, B):
""" % pragma_string
if config.getboolean('parallel', 'openmp'):
pragma_string = '#include <omp.h>'
header_string = '#include <omp.h>'
else:
pragma_string = ''
header_string = ''
support_code = """
%s
#include <math.h>
""" % pragma_string
""" % header_string
weave_options_openmp = {'headers' : ['<omp.h>'],

View file

@ -2,15 +2,22 @@ import sys
import numpy as np
import sympy as sym
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma, polygamma
from sympy.matrices import Matrix
########################################
## Try to do some matrix functions: problem, you can't do derivatives
## with respect to matrix functions :-(
class GPySymMatrix(Matrix):
def __init__(self, indices):
Matrix.__init__(self)
def atoms(self):
return [e2 for e in self for e2 in e.atoms()]
class selector(Function):
"""A function that returns an element of a Matrix depending on input indices."""
nargs = 3
def fdiff(self, argindex=1):
return selector(*self.args)
@classmethod
def eval(cls, X, i, j):
if i.is_Number and j.is_Number:

View file

@ -10,6 +10,16 @@ A Gaussian processes framework in Python.
Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png)
Citation
========
@Misc{gpy2014,
author = {The GPy authors},
title = {{GPy}: A Gaussian process framework in python},
howpublished = {\url{http://github.com/SheffieldML/GPy}},
year = {2012--2014}
}
Getting started
===============
Installing with pip