Add ordinal and attempt to fix downloads

2026-07-17 16:41:04 +02:00 · 2014-05-13 12:17:42 +01:00 · 2014-05-13 12:17:42 +01:00 · db644408ea
commit db644408ea
parent c138ee868f
5 changed files with 87 additions and 3 deletions
--- a/GPy/gpy_config.cfg
+++ b/GPy/gpy_config.cfg
@ -6,6 +6,10 @@
 # some platforms, hence this option.
 openmp=False

+[datasets]
+# location for the local data cache
+dir=$HOME/tmp/GPy-datasets/
+
 [anaconda]
 # if you have an anaconda python installation please specify it here.
 installed = False
--- a/GPy/likelihoods/ordinal.py
+++ b/GPy/likelihoods/ordinal.py
@ -0,0 +1,48 @@
+# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+import sympy as sym
+from GPy.util.symbolic import gammaln, normcdfln, normcdf, IndMatrix, create_matrix
+import numpy as np
+from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
+import link_functions
+from symbolic import Symbolic
+from scipy import stats
+
+class Ordinal(Symbolic):
+    """
+    Ordinal
+
+    .. math::
+        p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
+
+    .. Note::
+        Y takes non zero integer values..
+        link function should have a positive domain, e.g. log (default).
+
+    .. See also::
+        symbolic.py, for the parent class
+    """
+    def __init__(self, categories=3, gp_link=None):
+        if gp_link is None:
+            gp_link = link_functions.Identity()
+
+        dispersion = sym.Symbol('width', positive=True, real=True)
+        y_0 = sym.Symbol('y_0', nonnegative=True, integer=True)
+        f_0 = sym.Symbol('f_0', positive=True, real=True) 
+        log_pdf = create_matrix('log_pdf', 1, categories)
+        log_pdf[0] = normcdfln(-f_0)
+        if categories>2:
+            w = create_matrix('w', 1, categories)
+            log_pdf[categories-1] = normcdfln(w.sum() + f_0)
+            for i in range(1, categories-1):
+                log_pdf[i] = sym.log(normcdf(w[0, 0:i-1].sum() + f_0) - normcdf(w[0, 0:i].sum()-f_0) )
+        else:
+            log_pdf[1] = normcdfln(f_0)
+        log_pdf.index_var = y_0
+        super(Ordinal, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Ordinal')
+
+        # TODO: Check this.
+        self.log_concave = True
+
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@ -97,7 +97,7 @@ def plot_latent(model, labels=None, which_indices=None,
        elif type(ul) is np.int64:
            this_label = 'class %i' % ul
        else:
-            this_label = unicode(i)
+            this_label = unicode(ul)
        m = marker.next()

        index = np.nonzero(labels == ul)[0]
--- a/GPy/util/data_resources.json
+++ b/GPy/util/data_resources.json
@ -467,6 +467,21 @@
            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/"
        ]
    },
+    "sod1_mouse": {
+        "citation": "Transcriptomic indices of fast and slow disease progression in two mouse models of amyotrophic lateral sclerosis' Nardo G1, Iennaco R, Fusi N, Heath PR, Marino M, Trolese MC, Ferraiuolo L, Lawrence N, Shaw PJ, Bendotti C Brain. 2013 Nov;136(Pt 11):3305-32. doi: 10.1093/brain/awt250. Epub 2013 Sep 24.",
+        "details": "Gene expression data from two separate strains of mice: C57 and 129Sv in wild type and SOD1 mutant strains.",
+        "files": [
+            [
+                "sod1_C59_129_exprs.csv",
+                "sod1_C59_129_se.csv"
+            ]
+        ],
+        "license": null,
+        "size": 0,
+        "urls": [
+            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/sod1_mouse/"
+        ]
+    },
    "swiss_roll": {
        "citation": "A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000",
        "details": "Swiss roll data made available by Tenenbaum, de Silva and Langford to demonstrate isomap, available from http://isomap.stanford.edu/datasets.html.",
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -12,6 +12,8 @@ import datetime
 import json
 import re

+from config import *
+
 ipython_available=True
 try:
    import IPython
@ -29,7 +31,8 @@ def reporthook(a,b,c):
    sys.stdout.flush()

 # Global variables
-data_path = os.path.join(os.path.dirname(__file__), 'datasets')
+data_path = os.path.expandvar(config.get('datasets', 'dir'))
+#data_path = os.path.join(os.path.dirname(__file__), 'datasets')
 default_seed = 10000
 overide_manual_authorize=False
 neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
@ -360,11 +363,25 @@ def football_data(season='1314', data_set='football_data'):
        Y = table[:, 4:]
    return data_details_return({'X': X, 'Y': Y}, data_set)

+def sod1_mouse(data_set='sod1_mouse'):
+    if not data_available(data_set):
+        download_data(data_set)
+    from pandas import read_csv
+    dirpath = os.path.join(data_path, data_set)
+    filename = os.path.join(dirpath, 'sod1_C57_129_exprs.csv')
+    Y = read_csv(filename, header=0, index_col=0).T
+    num_repeats=4
+    num_time=4
+    num_cond=4
+    X = 1
+    return data_details_return({'X': X, 'Y': Y}, data_set)
+    
 def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None):
    if not data_available(data_set):
        download_data(data_set)
    from pandas import read_csv
-    filename = os.path.join(data_path, 'tomancak_expr.csv')
+    dirpath = os.path.join(data_path, data_set)
+    filename = os.path.join(dirpath, 'tomancak_expr.csv')
    Y = read_csv(filename, header=0, index_col=0).T
    num_repeats = 3
    num_time = 12