Add ordinal and attempt to fix downloads

This commit is contained in:
Neil Lawrence 2014-05-13 12:17:42 +01:00
parent c138ee868f
commit db644408ea
5 changed files with 87 additions and 3 deletions

View file

@ -6,6 +6,10 @@
# some platforms, hence this option.
openmp=False
[datasets]
# location for the local data cache
dir=$HOME/tmp/GPy-datasets/
[anaconda]
# if you have an anaconda python installation please specify it here.
installed = False

View file

@ -0,0 +1,48 @@
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sympy as sym
from GPy.util.symbolic import gammaln, normcdfln, normcdf, IndMatrix, create_matrix
import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions
from symbolic import Symbolic
from scipy import stats
class Ordinal(Symbolic):
"""
Ordinal
.. math::
p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
.. Note::
Y takes non zero integer values..
link function should have a positive domain, e.g. log (default).
.. See also::
symbolic.py, for the parent class
"""
def __init__(self, categories=3, gp_link=None):
if gp_link is None:
gp_link = link_functions.Identity()
dispersion = sym.Symbol('width', positive=True, real=True)
y_0 = sym.Symbol('y_0', nonnegative=True, integer=True)
f_0 = sym.Symbol('f_0', positive=True, real=True)
log_pdf = create_matrix('log_pdf', 1, categories)
log_pdf[0] = normcdfln(-f_0)
if categories>2:
w = create_matrix('w', 1, categories)
log_pdf[categories-1] = normcdfln(w.sum() + f_0)
for i in range(1, categories-1):
log_pdf[i] = sym.log(normcdf(w[0, 0:i-1].sum() + f_0) - normcdf(w[0, 0:i].sum()-f_0) )
else:
log_pdf[1] = normcdfln(f_0)
log_pdf.index_var = y_0
super(Ordinal, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Ordinal')
# TODO: Check this.
self.log_concave = True

View file

@ -97,7 +97,7 @@ def plot_latent(model, labels=None, which_indices=None,
elif type(ul) is np.int64:
this_label = 'class %i' % ul
else:
this_label = unicode(i)
this_label = unicode(ul)
m = marker.next()
index = np.nonzero(labels == ul)[0]

View file

@ -467,6 +467,21 @@
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/"
]
},
"sod1_mouse": {
"citation": "Transcriptomic indices of fast and slow disease progression in two mouse models of amyotrophic lateral sclerosis' Nardo G1, Iennaco R, Fusi N, Heath PR, Marino M, Trolese MC, Ferraiuolo L, Lawrence N, Shaw PJ, Bendotti C Brain. 2013 Nov;136(Pt 11):3305-32. doi: 10.1093/brain/awt250. Epub 2013 Sep 24.",
"details": "Gene expression data from two separate strains of mice: C57 and 129Sv in wild type and SOD1 mutant strains.",
"files": [
[
"sod1_C59_129_exprs.csv",
"sod1_C59_129_se.csv"
]
],
"license": null,
"size": 0,
"urls": [
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/sod1_mouse/"
]
},
"swiss_roll": {
"citation": "A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
"details": "Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",

View file

@ -12,6 +12,8 @@ import datetime
import json
import re
from config import *
ipython_available=True
try:
import IPython
@ -29,7 +31,8 @@ def reporthook(a,b,c):
sys.stdout.flush()
# Global variables
data_path = os.path.join(os.path.dirname(__file__), 'datasets')
data_path = os.path.expandvar(config.get('datasets', 'dir'))
#data_path = os.path.join(os.path.dirname(__file__), 'datasets')
default_seed = 10000
overide_manual_authorize=False
neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
@ -360,11 +363,25 @@ def football_data(season='1314', data_set='football_data'):
Y = table[:, 4:]
return data_details_return({'X': X, 'Y': Y}, data_set)
def sod1_mouse(data_set='sod1_mouse'):
if not data_available(data_set):
download_data(data_set)
from pandas import read_csv
dirpath = os.path.join(data_path, data_set)
filename = os.path.join(dirpath, 'sod1_C57_129_exprs.csv')
Y = read_csv(filename, header=0, index_col=0).T
num_repeats=4
num_time=4
num_cond=4
X = 1
return data_details_return({'X': X, 'Y': Y}, data_set)
def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None):
if not data_available(data_set):
download_data(data_set)
from pandas import read_csv
filename = os.path.join(data_path, 'tomancak_expr.csv')
dirpath = os.path.join(data_path, data_set)
filename = os.path.join(dirpath, 'tomancak_expr.csv')
Y = read_csv(filename, header=0, index_col=0).T
num_repeats = 3
num_time = 12