mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-02 14:45:15 +02:00
Add ordinal and attempt to fix downloads
This commit is contained in:
parent
c138ee868f
commit
db644408ea
5 changed files with 87 additions and 3 deletions
|
|
@ -6,6 +6,10 @@
|
|||
# some platforms, hence this option.
|
||||
openmp=False
|
||||
|
||||
[datasets]
|
||||
# location for the local data cache
|
||||
dir=$HOME/tmp/GPy-datasets/
|
||||
|
||||
[anaconda]
|
||||
# if you have an anaconda python installation please specify it here.
|
||||
installed = False
|
||||
|
|
|
|||
48
GPy/likelihoods/ordinal.py
Normal file
48
GPy/likelihoods/ordinal.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import sympy as sym
|
||||
from GPy.util.symbolic import gammaln, normcdfln, normcdf, IndMatrix, create_matrix
|
||||
import numpy as np
|
||||
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
||||
import link_functions
|
||||
from symbolic import Symbolic
|
||||
from scipy import stats
|
||||
|
||||
class Ordinal(Symbolic):
|
||||
"""
|
||||
Ordinal
|
||||
|
||||
.. math::
|
||||
p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
|
||||
|
||||
.. Note::
|
||||
Y takes non zero integer values..
|
||||
link function should have a positive domain, e.g. log (default).
|
||||
|
||||
.. See also::
|
||||
symbolic.py, for the parent class
|
||||
"""
|
||||
def __init__(self, categories=3, gp_link=None):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Identity()
|
||||
|
||||
dispersion = sym.Symbol('width', positive=True, real=True)
|
||||
y_0 = sym.Symbol('y_0', nonnegative=True, integer=True)
|
||||
f_0 = sym.Symbol('f_0', positive=True, real=True)
|
||||
log_pdf = create_matrix('log_pdf', 1, categories)
|
||||
log_pdf[0] = normcdfln(-f_0)
|
||||
if categories>2:
|
||||
w = create_matrix('w', 1, categories)
|
||||
log_pdf[categories-1] = normcdfln(w.sum() + f_0)
|
||||
for i in range(1, categories-1):
|
||||
log_pdf[i] = sym.log(normcdf(w[0, 0:i-1].sum() + f_0) - normcdf(w[0, 0:i].sum()-f_0) )
|
||||
else:
|
||||
log_pdf[1] = normcdfln(f_0)
|
||||
log_pdf.index_var = y_0
|
||||
super(Ordinal, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Ordinal')
|
||||
|
||||
# TODO: Check this.
|
||||
self.log_concave = True
|
||||
|
||||
|
|
@ -97,7 +97,7 @@ def plot_latent(model, labels=None, which_indices=None,
|
|||
elif type(ul) is np.int64:
|
||||
this_label = 'class %i' % ul
|
||||
else:
|
||||
this_label = unicode(i)
|
||||
this_label = unicode(ul)
|
||||
m = marker.next()
|
||||
|
||||
index = np.nonzero(labels == ul)[0]
|
||||
|
|
|
|||
|
|
@ -467,6 +467,21 @@
|
|||
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/"
|
||||
]
|
||||
},
|
||||
"sod1_mouse": {
|
||||
"citation": "Transcriptomic indices of fast and slow disease progression in two mouse models of amyotrophic lateral sclerosis' Nardo G1, Iennaco R, Fusi N, Heath PR, Marino M, Trolese MC, Ferraiuolo L, Lawrence N, Shaw PJ, Bendotti C Brain. 2013 Nov;136(Pt 11):3305-32. doi: 10.1093/brain/awt250. Epub 2013 Sep 24.",
|
||||
"details": "Gene expression data from two separate strains of mice: C57 and 129Sv in wild type and SOD1 mutant strains.",
|
||||
"files": [
|
||||
[
|
||||
"sod1_C59_129_exprs.csv",
|
||||
"sod1_C59_129_se.csv"
|
||||
]
|
||||
],
|
||||
"license": null,
|
||||
"size": 0,
|
||||
"urls": [
|
||||
"http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/sod1_mouse/"
|
||||
]
|
||||
},
|
||||
"swiss_roll": {
|
||||
"citation": "A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
|
||||
"details": "Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ import datetime
|
|||
import json
|
||||
import re
|
||||
|
||||
from config import *
|
||||
|
||||
ipython_available=True
|
||||
try:
|
||||
import IPython
|
||||
|
|
@ -29,7 +31,8 @@ def reporthook(a,b,c):
|
|||
sys.stdout.flush()
|
||||
|
||||
# Global variables
|
||||
data_path = os.path.join(os.path.dirname(__file__), 'datasets')
|
||||
data_path = os.path.expandvar(config.get('datasets', 'dir'))
|
||||
#data_path = os.path.join(os.path.dirname(__file__), 'datasets')
|
||||
default_seed = 10000
|
||||
overide_manual_authorize=False
|
||||
neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
|
||||
|
|
@ -360,11 +363,25 @@ def football_data(season='1314', data_set='football_data'):
|
|||
Y = table[:, 4:]
|
||||
return data_details_return({'X': X, 'Y': Y}, data_set)
|
||||
|
||||
def sod1_mouse(data_set='sod1_mouse'):
|
||||
if not data_available(data_set):
|
||||
download_data(data_set)
|
||||
from pandas import read_csv
|
||||
dirpath = os.path.join(data_path, data_set)
|
||||
filename = os.path.join(dirpath, 'sod1_C57_129_exprs.csv')
|
||||
Y = read_csv(filename, header=0, index_col=0).T
|
||||
num_repeats=4
|
||||
num_time=4
|
||||
num_cond=4
|
||||
X = 1
|
||||
return data_details_return({'X': X, 'Y': Y}, data_set)
|
||||
|
||||
def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None):
|
||||
if not data_available(data_set):
|
||||
download_data(data_set)
|
||||
from pandas import read_csv
|
||||
filename = os.path.join(data_path, 'tomancak_expr.csv')
|
||||
dirpath = os.path.join(data_path, data_set)
|
||||
filename = os.path.join(dirpath, 'tomancak_expr.csv')
|
||||
Y = read_csv(filename, header=0, index_col=0).T
|
||||
num_repeats = 3
|
||||
num_time = 12
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue