[param_to_array] deprecated and removed param_to_array from code, use param.values instead

2026-05-11 04:52:37 +02:00 · 2014-10-06 08:59:24 +01:00 · 2014-10-06 08:59:24 +01:00 · 6a260409fa
commit 6a260409fa
parent c1d998e272
16 changed files with 349 additions and 231 deletions
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -23,9 +23,6 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
    X = _np.random.rand(num_inputs, input_dim)
    lengthscales = _np.random.rand(input_dim)
    k = GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
         ##+ GPy.kern.white(input_dim, 0.01)
         #)
    #k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
    K = k.K(X)
    Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
@ -159,7 +156,6 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4
 def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
    import GPy
    from matplotlib import pyplot as plt
    from ..util.misc import param_to_array
    import numpy as np
    _np.random.seed(0)
@ -177,7 +173,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
        fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
        m.plot_latent(ax=latent_axes, labels=m.data_labels)
        data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
-        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable
+        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1,:], # @UnusedVariable
            m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
        raw_input('Press enter to finish')
        plt.close(fig)
@ -186,8 +182,6 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
 def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
    import GPy
    from matplotlib import pyplot as plt
    from ..util.misc import param_to_array
    import numpy as np
    _np.random.seed(0)
    data = GPy.util.datasets.oil()
@ -204,7 +198,7 @@ def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40
        fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
        m.plot_latent(ax=latent_axes, labels=m.data_labels)
        data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
-        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable
+        lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1,:], # @UnusedVariable
            m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
        raw_input('Press enter to finish')
        plt.close(fig)
@ -228,10 +222,10 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
    Ylist = [Y1, Y2, Y3]
    if plot_sim:
-        import pylab
+        from matplotlib import pyplot as plt
        import matplotlib.cm as cm
        import itertools
-        fig = pylab.figure("MRD Simulation Data", figsize=(8, 6))
+        fig = plt.figure("MRD Simulation Data", figsize=(8, 6))
        fig.clf()
        ax = fig.add_subplot(2, 1, 1)
        labls = slist_names
@ -242,29 +236,11 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
            ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
            ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
            ax.set_title("Y{}".format(i + 1))
-        pylab.draw()
+        plt.draw()
-        pylab.tight_layout()
+        plt.tight_layout()
    return slist, [S1, S2, S3], Ylist
 def _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS):
    S1 = _np.hstack([s1, sS])
    S2 = _np.hstack([s2, s3, sS])
    S3 = _np.hstack([s3, sS])
    Y1 = S1.dot(_np.random.randn(S1.shape[1], D1))
    Y2 = S2.dot(_np.random.randn(S2.shape[1], D2))
    Y3 = S3.dot(_np.random.randn(S3.shape[1], D3))
    Y1 += .3 * _np.random.randn(*Y1.shape)
    Y2 += .2 * _np.random.randn(*Y2.shape)
    Y3 += .25 * _np.random.randn(*Y3.shape)
    Y1 -= Y1.mean(0)
    Y2 -= Y2.mean(0)
    Y3 -= Y3.mean(0)
    Y1 /= Y1.std(0)
    Y2 /= Y2.std(0)
    Y3 /= Y3.std(0)
    return Y1, Y2, Y3, S1, S2, S3
 def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
    _np.random.seed(1234)
@ -291,10 +267,10 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
    Ylist = [Y1, Y2, Y3]
    if plot_sim:
-        import pylab
+        from matplotlib import pyplot as plt
        import matplotlib.cm as cm
        import itertools
-        fig = pylab.figure("MRD Simulation Data", figsize=(8, 6))
+        fig = plt.figure("MRD Simulation Data", figsize=(8, 6))
        fig.clf()
        ax = fig.add_subplot(2, 1, 1)
        labls = slist_names
@ -305,28 +281,28 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
            ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
            ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
            ax.set_title("Y{}".format(i + 1))
-        pylab.draw()
+        plt.draw()
-        pylab.tight_layout()
+        plt.tight_layout()
    return slist, [S1, S2, S3], Ylist
-# def bgplvm_simulation_matlab_compare():
+def _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS):
-#     from GPy.util.datasets import simulation_BGPLVM
+    S1 = _np.hstack([s1, sS])
-#     from GPy import kern
+    S2 = _np.hstack([s2, s3, sS])
-#     from GPy.models import BayesianGPLVM
+    S3 = _np.hstack([s3, sS])
-#
+    Y1 = S1.dot(_np.random.randn(S1.shape[1], D1))
-#     sim_data = simulation_BGPLVM()
+    Y2 = S2.dot(_np.random.randn(S2.shape[1], D2))
-#     Y = sim_data['Y']
+    Y3 = S3.dot(_np.random.randn(S3.shape[1], D3))
-#     mu = sim_data['mu']
+    Y1 += .3 * _np.random.randn(*Y1.shape)
-#     num_inducing, [_, Q] = 3, mu.shape
+    Y2 += .2 * _np.random.randn(*Y2.shape)
-#
+    Y3 += .25 * _np.random.randn(*Y3.shape)
-#     k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2))
+    Y1 -= Y1.mean(0)
-#     m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k,
+    Y2 -= Y2.mean(0)
-#                        _debug=False)
+    Y3 -= Y3.mean(0)
-#     m.auto_scale_factor = True
+    Y1 /= Y1.std(0)
-#     m['noise'] = Y.var() / 100.
+    Y2 /= Y2.std(0)
-#     m['linear_variance'] = .01
+    Y3 /= Y3.std(0)
-#     return m
+    return Y1, Y2, Y3, S1, S2, S3
 def bgplvm_simulation(optimize=True, verbose=1,
                      plot=True, plot_sim=False,
--- a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
@ -1,7 +1,6 @@
 import numpy as np
 from ...util import diag
 from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR
 from ...util.misc import param_to_array
 from ...core.parameterization.variational import VariationalPosterior
 from . import LatentFunctionInference
 from posterior import Posterior
@ -23,7 +22,7 @@ class EPDTC(LatentFunctionInference):
        self.get_YYTfactor.limit = limit
    def _get_trYYT(self, Y):
-        return param_to_array(np.sum(np.square(Y)))
+        return np.sum(np.square(Y))
    def __getstate__(self):
        # has to be overridden, as Cacher objects cannot be pickled.
@ -44,7 +43,7 @@ class EPDTC(LatentFunctionInference):
        """
        N, D = Y.shape
        if (N>=D):
-            return param_to_array(Y)
+            return Y
        else:
            return jitchol(tdot(Y))
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@ -12,7 +12,6 @@
 import numpy as np
 from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify, pdinv
 from ...util.misc import param_to_array
 from posterior import Posterior
 import warnings
 from scipy import optimize
@ -39,9 +38,6 @@ class Laplace(LatentFunctionInference):
        Returns a Posterior class containing essential quantities of the posterior
        """
        #make Y a normal array!
        Y = param_to_array(Y)
        # Compute K
        K = kern.K(X)
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@ -6,7 +6,6 @@ from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrt
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior
 import numpy as np
 from ...util.misc import param_to_array
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)
 import logging, itertools
@ -35,7 +34,7 @@ class VarDTC(LatentFunctionInference):
        self.get_YYTfactor.limit = limit
    def _get_trYYT(self, Y):
-        return param_to_array(np.sum(np.square(Y)))
+        return np.sum(np.square(Y))
    def __getstate__(self):
        # has to be overridden, as Cacher objects cannot be pickled.
@ -56,7 +55,7 @@ class VarDTC(LatentFunctionInference):
        """
        N, D = Y.shape
        if (N>=D):
-            return param_to_array(Y)
+            return Y.view(np.ndarray)
        else:
            return jitchol(tdot(Y))
--- a/GPy/inference/latent_function_inference/var_dtc_gpu.py
+++ b/GPy/inference/latent_function_inference/var_dtc_gpu.py
@ -6,7 +6,6 @@ from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior
 import numpy as np
 from ...util.misc import param_to_array
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)
@ -117,7 +116,7 @@ class VarDTC_GPU(LatentFunctionInference):
        """
        N, D = Y.shape
        if (N>=D):
-            return param_to_array(Y)
+            return Y.view(np.ndarray)
        else:
            return jitchol(tdot(Y))
--- a/GPy/inference/latent_function_inference/var_dtc_parallel.py
+++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py
@ -6,7 +6,6 @@ from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdi
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior
 import numpy as np
 from ...util.misc import param_to_array
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)
@ -60,7 +59,7 @@ class VarDTC_minibatch(LatentFunctionInference):
        self.get_YYTfactor.limit = limit
    def _get_trYYT(self, Y):
-        return param_to_array(np.sum(np.square(Y)))
+        return np.sum(np.square(Y))
    def _get_YYTfactor(self, Y):
        """
@ -70,7 +69,7 @@ class VarDTC_minibatch(LatentFunctionInference):
        """
        N, D = Y.shape
        if (N>=D):
-            return param_to_array(Y)
+            return Y.view(np.ndarray)
        else:
            return jitchol(tdot(Y))
--- a/GPy/kern/_src/poly.py
+++ b/GPy/kern/_src/poly.py
@ -3,7 +3,6 @@
 import numpy as np
 from kern import Kern
 from ...util.misc import param_to_array
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 class Poly(Kern):
--- a/GPy/models/sparse_gp_regression.py
+++ b/GPy/models/sparse_gp_regression.py
@ -7,7 +7,6 @@ from ..core import SparseGP
 from .. import likelihoods
 from .. import kern
 from ..inference.latent_function_inference import VarDTC
 from ..util.misc import param_to_array
 from ..core.parameterization.variational import NormalPosterior
 class SparseGPRegression(SparseGP):
@ -40,7 +39,7 @@ class SparseGPRegression(SparseGP):
        # Z defaults to a subset of the data
        if Z is None:
            i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
-            Z = param_to_array(X)[i].copy()
+            Z = X.view(np.ndarray)[i].copy()
        else:
            assert Z.shape[1] == input_dim
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@ -1,7 +1,6 @@
 import numpy as np
 from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController
 from ...util.misc import param_to_array
 from ...core.parameterization.variational import VariationalPosterior
 from .base_plots import x_frame2D
 import itertools
@ -55,9 +54,9 @@ def plot_latent(model, labels=None, which_indices=None,
    #fethch the data points X that we'd like to plot
    X = model.X
    if isinstance(X, VariationalPosterior):
-        X = param_to_array(X.mean)
+        X = X.mean
    else:
-        X = param_to_array(X)
+        X = X
    if X.shape[0] > 1000:
@ -175,7 +174,7 @@ def plot_latent(model, labels=None, which_indices=None,
    ax.set_aspect('auto') # set a nice aspect ratio
    if plot_inducing:
-        Z = param_to_array(model.Z)
+        Z = model.Z
        ax.plot(Z[:, input_1], Z[:, input_2], '^w')
    ax.set_xlim((xmin, xmax))
--- a/GPy/plotting/matplot_dep/kernel_plots.py
+++ b/GPy/plotting/matplot_dep/kernel_plots.py
@ -35,8 +35,7 @@ def add_bar_labels(fig, ax, bars, bottom=0):
 def plot_bars(fig, ax, x, ard_params, color, name, bottom=0):
-    from ...util.misc import param_to_array
+    return ax.bar(left=x, height=ard_params.view(np.ndarray), width=.8,
    return ax.bar(left=x, height=param_to_array(ard_params), width=.8,
                  bottom=bottom, align='center',
                  color=color, edgecolor='k', linewidth=1.2,
                  label=name.replace("_"," "))
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@ -8,7 +8,6 @@ except:
    pass
 import numpy as np
 from base_plots import gpplot, x_frame1D, x_frame2D
 from ...util.misc import param_to_array
 from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
 from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
 from scipy import sparse
@ -67,7 +66,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
        X_variance = model.X.variance
    else:
        X = model.X
    #X, Y = param_to_array(X, model.Y)
    Y = model.Y
    if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
--- a/GPy/plotting/matplot_dep/variational_plots.py
+++ b/GPy/plotting/matplot_dep/variational_plots.py
@ -1,5 +1,4 @@
 import pylab as pb, numpy as np
 from ...util.misc import param_to_array
 def plot(parameterized, fignum=None, ax=None, colors=None):
    """
@ -21,7 +20,7 @@ def plot(parameterized, fignum=None, ax=None, colors=None):
    else:
        colors = iter(colors)
    plots = []
-    means, variances = param_to_array(parameterized.mean, parameterized.variance)
+    means, variances = parameterized.mean, parameterized.variance
    x = np.arange(means.shape[0])
    for i in range(means.shape[1]):
        if ax is None:
@ -68,7 +67,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_sid
    else:
        colors = iter(colors)
    plots = []
-    means, variances, gamma = param_to_array(parameterized.mean, parameterized.variance, parameterized.binary_prob)
+    means, variances, gamma = parameterized.mean, parameterized.variance, parameterized.binary_prob
    x = np.arange(means.shape[0])
    for i in range(means.shape[1]):
        if side_by_side:
--- a/GPy/plotting/matplot_dep/visualize.py
+++ b/GPy/plotting/matplot_dep/visualize.py
@ -4,7 +4,6 @@ import GPy
 import numpy as np
 import matplotlib as mpl
 import time
 from ...util.misc import param_to_array
 from GPy.core.parameterization.variational import VariationalPosterior
 try:
    import visual
--- a/GPy/util/data_resources.json
+++ b/GPy/util/data_resources.json
@ -523,6 +523,23 @@
            "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/"
        ]
    },
    "singlecell_islam": {
        "citation": "Single-Cell RNA-Seq Reveals Dynamic, Random Monoallelic Gene Expression in Mammalian Cells Qiaolin Deng, Daniel Ramskoeld, Bjoern Reinius, and Rickard Sandberg Science 10 January 2014: 343 (6167), 193-196. [DOI:10.1126/science.1245316]",
        "details" : "92 single cells (48 mouse ES cells, 44 mouse embryonic fibroblasts and 4 negative controls) were analyzed by single-cell tagged reverse transcription (STRT)",
        "files"   : [["GSE29087_L139_expression_tab.txt.gz"], ["GSE29087_family.soft.gz"]],
        "license" : "Gene Expression Omnibus: http://www.ncbi.nlm.nih.gov/geo/info/disclaimer.html",
        "size"    : 1159449,
        "urls"    : ["ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE29nnn/GSE29087/suppl/", "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE29nnn/GSE29087/soft/"]
    },
    "singlecell_deng": {
        "citation": "Deng Q, Ramsköld D, Reinius B, Sandberg R. Single-cell RNA-seq reveals dynamic, random monoallelic gene expression in mammalian cells. Science 2014 Jan 10;343(6167):193-6. PMID: 24408435",
        "details" : "First generation mouse strain crosses were used to study monoallelic expression on the single cell level",
        "files"   : [["?acc=GSE45719&format=file"], ["GSE45719_series_matrix.txt.gz"]],
        "license" : "Gene Expression Omnibus: http://www.ncbi.nlm.nih.gov/geo/info/disclaimer.html",
        "size"    : 1159449,
        "save_names": [["GSE45719_Raw.tar"], [null]],
        "urls"    : ["http://www.ncbi.nlm.nih.gov/geo/download/", "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE45nnn/GSE45719/matrix/"]
    },
    "sod1_mouse": {
        "citation": "Transcriptomic indices of fast and slow disease progression in two mouse models of amyotrophic lateral sclerosis' Nardo G1, Iennaco R, Fusi N, Heath PR, Marino M, Trolese MC, Ferraiuolo L, Lawrence N, Shaw PJ, Bendotti C Brain. 2013 Nov;136(Pt 11):3305-32. doi: 10.1093/brain/awt250. Epub 2013 Sep 24.",
        "details": "Gene expression data from two separate strains of mice: C57 and 129Sv in wild type and SOD1 mutant strains.",
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -82,9 +82,16 @@ def prompt_user(prompt):
 def data_available(dataset_name=None):
    """Check if the data set is available on the local machine already."""
-    for file_list in data_resources[dataset_name]['files']:
+    from itertools import izip_longest
-        for file in file_list:
+    dr = data_resources[dataset_name]
-            if not os.path.exists(os.path.join(data_path, dataset_name, file)):
+    zip_urls = (dr['files'], )
    if dr.has_key('save_names'): zip_urls += (dr['save_names'], )
    else: zip_urls += ([],)
    for file_list, save_list in izip_longest(*zip_urls, fillvalue=[]):
        for f, s in izip_longest(file_list, save_list, fillvalue=None):
            if s is not None: f=s # If there is a save_name given, use that one
            if not os.path.exists(os.path.join(data_path, dataset_name, f)):
                return False
    return True
@ -94,8 +101,13 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix
    file = url[i+1:]
    print file
    dir_name = os.path.join(data_path, store_directory)
-    save_name = os.path.join(dir_name, file)
+
-    print "Downloading ", url, "->", os.path.join(store_directory, file)
+    if save_name is None: save_name = os.path.join(dir_name, file)
    else: save_name = os.path.join(dir_name, save_name)
    if suffix is None: suffix=''
    print "Downloading ", url, "->", save_name
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    try:
@ -178,19 +190,24 @@ def authorize_download(dataset_name=None):
 def download_data(dataset_name=None):
    """Check with the user that the are happy with terms and conditions for the data set, then download it."""
    import itertools
    dr = data_resources[dataset_name]
    if not authorize_download(dataset_name):
        raise Exception("Permission to download data set denied.")
-    if dr.has_key('suffices'):
+    zip_urls = (dr['urls'], dr['files'])
-        for url, files, suffices in zip(dr['urls'], dr['files'], dr['suffices']):
+
-            for file, suffix in zip(files, suffices):
+    if dr.has_key('save_names'): zip_urls += (dr['save_names'], )
-                download_url(os.path.join(url,file), dataset_name, dataset_name, suffix=suffix)
+    else: zip_urls += ([],)
-    else:
+
-        for url, files in zip(dr['urls'], dr['files']):
+    if dr.has_key('suffices'): zip_urls += (dr['suffices'], )
-            for file in files:
+    else: zip_urls += ([],)
-                download_url(os.path.join(url,file), dataset_name, dataset_name)
+
    for url, files, save_names, suffices in itertools.izip_longest(*zip_urls, fillvalue=[]):
        for f, save_name, suffix in itertools.izip_longest(files, save_names, suffices, fillvalue=None):
            download_url(os.path.join(url,f), dataset_name, save_name, suffix=suffix)
    return True
 def data_details_return(data, data_set):
@ -895,6 +912,128 @@ def singlecell(data_set='singlecell'):
                                'genes': genes, 'labels':labels,
                                }, data_set)
 def singlecell_rna_seq_islam(dataset='singlecell_islam'):
    if not data_available(dataset):
        download_data(dataset)
    from pandas import read_csv, DataFrame, concat
    dir_path = os.path.join(data_path, dataset)
    filename = os.path.join(dir_path, 'GSE29087_L139_expression_tab.txt.gz')
    data = read_csv(filename, sep='\t', skiprows=6, compression='gzip', header=None)
    header1 = read_csv(filename, sep='\t', header=None, skiprows=5, nrows=1, compression='gzip')
    header2 = read_csv(filename, sep='\t', header=None, skiprows=3, nrows=1, compression='gzip')
    data.columns = np.concatenate((header1.ix[0, :], header2.ix[0, 7:]))
    Y = data.set_index("Feature").ix[8:, 6:-4].T.astype(float)
    # read the info .soft
    filename = os.path.join(dir_path, 'GSE29087_family.soft.gz')
    info = read_csv(filename, sep='\t', skiprows=0, compression='gzip', header=None)
    # split at ' = '
    info = DataFrame(info.ix[:,0].str.split(' = ').tolist())
    # only take samples:
    info = info[info[0].str.contains("!Sample")]
    info[0] = info[0].apply(lambda row: row[len("!Sample_"):])
    groups = info.groupby(0).groups
    # remove 'GGG' from barcodes
    barcode = info[1][groups['barcode']].apply(lambda row: row[:-3])
    title = info[1][groups['title']]
    title.index = barcode
    title.name = 'title'
    geo_accession = info[1][groups['geo_accession']]
    geo_accession.index = barcode
    geo_accession.name = 'geo_accession'
    case_id = info[1][groups['source_name_ch1']]
    case_id.index = barcode
    case_id.name = 'source_name_ch1'
    info = concat([title, geo_accession, case_id], axis=1)
    labels = info.join(Y).source_name_ch1[:-4]
    labels[labels=='Embryonic stem cell'] = "ES"
    labels[labels=='Embryonic fibroblast'] = "MEF"
    return data_details_return({'Y': Y,
                                'info': '92 single cells (48 mouse ES cells, 44 mouse embryonic fibroblasts and 4 negative controls) were analyzed by single-cell tagged reverse transcription (STRT)',
                                'genes': Y.columns,
                                'labels': labels,
                                'datadf': data,
                                'infodf': info}, dataset)
 def singlecell_rna_seq_deng(dataset='singlecell_deng'):
    if not data_available(dataset):
        download_data(dataset)
    from pandas import read_csv
    dir_path = os.path.join(data_path, dataset)
    # read the info .soft
    filename = os.path.join(dir_path, 'GSE45719_series_matrix.txt.gz')
    info = read_csv(filename, sep='\t', skiprows=0, compression='gzip', header=None, nrows=29, index_col=0)
    summary = info.loc['!Series_summary'][1]
    design = info.loc['!Series_overall_design']
    # only take samples:
    sample_info = read_csv(filename, sep='\t', skiprows=30, compression='gzip', header=0, index_col=0).T
    sample_info.columns = sample_info.columns.to_series().apply(lambda row: row[len("!Sample_"):])
    sample_info.columns.name = sample_info.columns.name[len("!Sample_"):]
    sample_info = sample_info[['geo_accession', 'characteristics_ch1',  'description']]
    sample_info = sample_info.ix[:, np.r_[0:3, 5:sample_info.shape[1]]]
    c = sample_info.columns.to_series()
    c[1:4] = ['strain', 'cross', 'developmental_stage']
    sample_info.columns = c
    # Extract the tar file
    filename = os.path.join(dir_path, 'GSE45719_Raw.tar')
    with tarfile.open(filename, 'r') as files:
        data = None
        gene_info = None
        message = ''
        members = files.getmembers()
        overall = len(members)
        for i, file_info in enumerate(members):
            f = files.extractfile(file_info)
            inner = read_csv(f, sep='\t', header=0, compression='gzip', index_col=0)
            sys.stdout.write(' '*(len(message)+1) + '\r')
            sys.stdout.flush()
            message = "{: >7.2%}: Extracting: {}".format(float(i+1)/overall, file_info.name[:20]+"...txt.gz")
            sys.stdout.write(message)
            if data is None:
                data = inner.RPKM.to_frame()
                data.columns = [file_info.name[:-18]]
                gene_info = inner.Refseq_IDs.to_frame()
                gene_info.columns = [file_info.name[:-18]]
            else:
                data[file_info.name[:-18]] = inner.RPKM
                gene_info[file_info.name[:-18]] = inner.Refseq_IDs
    # Strip GSM number off data index
    rep = re.compile('GSM\d+_')
    data.columns = data.columns.to_series().apply(lambda row: row[rep.match(row).end():])
    data = data.T
    # make sure the same index gets used
    sample_info.index = data.index
    # get the labels from the description
    rep = re.compile('fibroblast|\d+-cell|embryo|liver|blastocyst|blastomere|zygote', re.IGNORECASE)
    labels = sample_info.developmental_stage.apply(lambda row: " ".join(rep.findall(row)))
    sys.stdout.write(' '*len(message) + '\r')
    sys.stdout.flush()
    print "Read Archive {}".format(files.name)
    return data_details_return({'Y': data,
                                'series_info': info,
                                'sample_info': sample_info,
                                'gene_info': gene_info,
                                'summary': summary,
                                'design': design,
                                'genes': data.columns,
                                'labels': labels,
                                }, dataset)
 def swiss_roll_1000():
    return swiss_roll(num_samples=1000)
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@ -90,6 +90,8 @@ Convert an arbitrary number of parameters to :class:ndarray class objects. This
 converting parameter objects to numpy arrays, when using scipy.weave.inline routine.
 In scipy.weave.blitz there is no automatic array detection (even when the array inherits
 from :class:ndarray)"""
    import warnings
    warnings.warn("Please use param.values, as this function will be deprecated in the next release.", DeprecationWarning)
    assert len(param) > 0, "At least one parameter needed"
    if len(param) == 1:
        return param[0].view(np.ndarray)