diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index f9dacf02..3f696431 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -12,6 +12,7 @@ from ...core.parameterization.transformations import Logexp from ...util.caching import Cache_this from ...core.parameterization import variational from psi_comp import linear_psi_comp +from ...util.config import * class Linear(Kern): """ @@ -224,12 +225,23 @@ class Linear(Kern): AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :] AZZA = AZZA + AZZA.swapaxes(1, 2) AZZA_2 = AZZA/2. + if config.getboolean('parallel', 'openmp'): + pragma_string = '#pragma omp parallel for private(m,mm,q,qq,factor,tmp)' + header_string = '#include ' + weave_options = {'headers' : [''], + 'extra_compile_args': ['-fopenmp -O3'], + 'extra_link_args' : ['-lgomp'], + 'libraries': ['gomp']} + else: + pragma_string = '' + header_string = '' + weave_options = {'extra_compile_args': ['-O3']} #Using weave, we can exploit the symmetry of this problem: code = """ int n, m, mm,q,qq; double factor,tmp; - #pragma omp parallel for private(m,mm,q,qq,factor,tmp) + %s for(n=0;n + %s #include - """ - weave_options = {'headers' : [''], - 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], - 'extra_link_args' : ['-lgomp']} + """ % header_string mu = vp.mean N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu) - weave.inline(code, support_code=support_code, libraries=['gomp'], + weave.inline(code, support_code=support_code, arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], type_converters=weave.converters.blitz,**weave_options) def _weave_dpsi2_dZ(self, dL_dpsi2, Z, vp, target): AZA = self.variances*self._ZAinner(vp, Z) + + if config.getboolean('parallel', 'openmp'): + pragma_string = '#pragma omp parallel for private(n,mm,q)' + header_string = '#include ' + weave_options = {'headers' : [''], + 'extra_compile_args': ['-fopenmp -O3'], + 'extra_link_args' : ['-lgomp'], + 'libraries': ['gomp']} + else: + pragma_string = '' + header_string = '' + weave_options = {'extra_compile_args': ['-O3']} + code=""" int n,m,mm,q; - #pragma omp parallel for private(n,mm,q) + %s for(m=0;m + %s #include - """ - weave_options = {'headers' : [''], - 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], - 'extra_link_args' : ['-lgomp']} + """ % header_string N,num_inducing,input_dim = vp.mean.shape[0],Z.shape[0],vp.mean.shape[1] mu = param_to_array(vp.mean) - weave.inline(code, support_code=support_code, libraries=['gomp'], + weave.inline(code, support_code=support_code, arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'], type_converters=weave.converters.blitz,**weave_options) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index e0071fb9..5bc80871 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -10,6 +10,7 @@ from GPy.util.caching import Cache_this from ...core.parameterization import variational from psi_comp import ssrbf_psi_comp from psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF +from ...util.config import * class RBF(Stationary): """ @@ -231,6 +232,16 @@ class RBF(Stationary): @Cache_this(limit=1) def _psi2computations(self, Z, vp): + + if config.getboolean('parallel', 'openmp'): + pragma_string = '#pragma omp parallel for private(tmp, exponent_tmp)' + header_string = '#include ' + libraries = ['gomp'] + else: + pragma_string = '' + header_string = '' + libraries = [] + mu, S = vp.mean, vp.variance N, Q = mu.shape @@ -253,8 +264,7 @@ class RBF(Stationary): variance_sq = float(np.square(self.variance)) code = """ double tmp, exponent_tmp; - - #pragma omp parallel for private(tmp, exponent_tmp) + %s for (int n=0; n + %s #include - """ + """ % header_string mu = param_to_array(mu) - weave.inline(code, support_code=support_code, libraries=['gomp'], + weave.inline(code, support_code=support_code, libraries=libraries, arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'], type_converters=weave.converters.blitz, **self.weave_options) @@ -303,12 +313,20 @@ class RBF(Stationary): #return 2.*np.einsum( 'ijk,ijk,ijkl,il->l', dL_dpsi2, psi2, Zdist_sq * (2.*S[:,None,None,:]/l2 + 1.) + mudist_sq + S[:, None, None, :] / l2, 1./(2.*S + l2))*self.lengthscale result = np.zeros(self.input_dim) + if config.getboolean('parallel', 'openmp'): + pragma_string = '#pragma omp parallel for reduction(+:tmp)' + header_string = '#include ' + libraries = ['gomp'] + else: + pragma_string = '' + header_string = '' + libraries = [] code = """ double tmp; for(int q=0; q + %s #include - """ + """ % header_string N,Q = S.shape M = psi2.shape[-1] S = param_to_array(S) - weave.inline(code, support_code=support_code, libraries=['gomp'], + weave.inline(code, support_code=support_code, libraries=libraries, arg_names=['psi2', 'dL_dpsi2', 'N', 'M', 'Q', 'mudist_sq', 'l2', 'Zdist_sq', 'S', 'result'], type_converters=weave.converters.blitz, **self.weave_options) diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py index ca2c890f..71e08c6b 100644 --- a/GPy/plotting/matplot_dep/dim_reduction_plots.py +++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py @@ -97,7 +97,7 @@ def plot_latent(model, labels=None, which_indices=None, elif type(ul) is np.int64: this_label = 'class %i' % ul else: - this_label = 'class %i' % i + this_label = unicode(i) m = marker.next() index = np.nonzero(labels == ul)[0] diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 57b64ae5..84747d05 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -14,7 +14,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', which_data_ycols='all', fixed_inputs=[], levels=20, samples=0, fignum=None, ax=None, resolution=None, plot_raw=False, - linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None): + linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx'): """ Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. @@ -97,7 +97,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', for d in which_data_ycols: plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol) - plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5) + plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5) #optionally plot some samples if samples: #NOTE not tested with fixed_inputs diff --git a/GPy/util/data_resources.json b/GPy/util/data_resources.json index 51070650..6cc692e8 100644 --- a/GPy/util/data_resources.json +++ b/GPy/util/data_resources.json @@ -150,6 +150,26 @@ ] }, "fruitfly_tomancak": { + "citation": "", + "details": "", + "files": [ + [ + "tomancak_exprs.csv", + "tomancak_se.csv", + "tomancak_prctile5.csv", + "tomancak_prctile25.csv", + "tomancak_prctile50.csv", + "tomancak_prctile75.csv", + "tomancak_prctile95.csv" + ] + ], + "license": null, + "size": 59000000, + "urls": [ + "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/fruitfly_tomancak/" + ] + }, + "fruitfly_tomancak_cel_files": { "citation": "'Systematic determination of patterns of gene expression during Drosophila embryogenesis' Pavel Tomancak, Amy Beaton, Richard Weiszmann, Elaine Kwan, ShengQiang Shu, Suzanna E Lewis, Stephen Richards, Michael Ashburner, Volker Hartenstein, Susan E Celniker, and Gerald M Rubin", "details": "Gene expression results from blastoderm development in Drosophila Melanogaster.", "files": [ @@ -198,7 +218,7 @@ ] ], "license": null, - "size": 1, + "size": 389000000, "urls": [ "ftp://ftp.fruitfly.org/pub/embryo_tc_array_data/" ] @@ -217,6 +237,7 @@ "http://www.google.com/trends/" ] }, + "hapmap3": { "citation": "Gibbs, Richard A., et al. 'The international HapMap project.' Nature 426.6968 (2003): 789-796.", "details": "HapMap Project: Single Nucleotide Polymorphism sequenced in all human populations. \n The HapMap phase three SNP dataset - 1184 samples out of 11 populations.\n See http://www.nature.com/nature/journal/v426/n6968/abs/nature02168.html for details.\n\n SNP_matrix (A) encoding [see Paschou et all. 2007 (PCA-Correlated SNPs...)]:\n Let (B1,B2) be the alphabetically sorted bases, which occur in the j-th SNP, then\n\n / 1, iff SNPij==(B1,B1)\n Aij = | 0, iff SNPij==(B1,B2)\n \\\\ -1, iff SNPij==(B2,B2)\n\n The SNP data and the meta information (such as iid, sex and phenotype) are\n stored in the dataframe datadf, index is the Individual ID, \n with following columns for metainfo:\n\n * family_id -> Family ID\n * paternal_id -> Paternal ID\n * maternal_id -> Maternal ID\n * sex -> Sex (1=male; 2=female; other=unknown)\n * phenotype -> Phenotype (-9, or 0 for unknown)\n * population -> Population string (e.g. 'ASW' - 'YRI')\n * rest are SNP rs (ids)\n\n More information is given in infodf:\n\n * Chromosome:\n - autosomal chromosemes -> 1-22\n - X X chromosome -> 23\n - Y Y chromosome -> 24\n - XY Pseudo-autosomal region of X -> 25\n - MT Mitochondrial -> 26\n * Relative Positon (to Chromosome) [base pairs]\n\n ", @@ -434,7 +455,7 @@ }, "singlecell": { "citation": "Guoji Guo, Mikael Huss, Guo Qing Tong, Chaoyang Wang, Li Li Sun, Neil D. Clarke, Paul Robson, Resolution of Cell Fate Decisions Revealed by Single-Cell Gene Expression Analysis from Zygote to Blastocyst, Developmental Cell, Volume 18, Issue 4, 20 April 2010, Pages 675-685, ISSN 1534-5807, http://dx.doi.org/10.1016/j.devcel.2010.02.012. (http://www.sciencedirect.com/science/article/pii/S1534580710001103) Keywords: DEVBIO", - "details": "qPCR Singlecell experiment in Mouse, measuring 48 gene expressions in 1-64 cell states. The labels have been created as in Guo et al. [2010]", + "details": "qPCR TaqMan array single cell experiment in mouse. The data is taken from the early stages of development when the Blastocyst is forming. At the 32 cell stage the data is already separated into the trophectoderm (TE) which goes onto form the placenta and the inner cellular mass (ICM). The ICM further differentiates into the epiblast (EPI)---which gives rise to the endoderm, mesoderm and ectoderm---and the primitive endoderm (PE) which develops into the amniotic sack. Guo et al selected 48 genes for expression measurement. They labelled the resulting cells and their labels are included as an aide to visualization.", "files": [ [ "singlecell.csv" @@ -443,7 +464,7 @@ "license": "ScienceDirect: http://www.elsevier.com/locate/termsandconditions?utm_source=sciencedirect&utm_medium=link&utm_campaign=terms", "size": 233.1, "urls": [ - "http://staffwww.dcs.sheffield.ac.uk/people/M.Zwiessele/data/singlecell/" + "http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/singlecell/" ] }, "swiss_roll": { diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index bdd55066..c18431ef 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -112,7 +112,7 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix if content_length_str: file_size = int(content_length_str[0]) else: - file_size = 1e10 + file_size = None status = "" file_size_dl = 0 block_sz = 8192 @@ -124,9 +124,15 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix file_size_dl += len(buff) f.write(buff) sys.stdout.write(" "*(len(status)) + "\r") - status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1.*1e6), - full=file_size/(1.*1e6), ll=line_length, + if file_size: + status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1048576.), + full=file_size/(1048576.), ll=line_length, perc="="*int(line_length*float(file_size_dl)/file_size)) + else: + status = r"[{perc: <{ll}}] {dl:7.3f}MB".format(dl=file_size_dl/(1048576.), + ll=line_length, + perc="."*int(line_length*float(file_size_dl/(10*1048576.)))) + sys.stdout.write(status) sys.stdout.flush() sys.stdout.write(" "*(len(status)) + "\r") @@ -357,8 +363,15 @@ def football_data(season='1314', data_set='football_data'): def fruitfly_tomancak(data_set='fruitfly_tomancak', gene_number=None): if not data_available(data_set): download_data(data_set) - X = None - Y = None + from pandas import read_csv + filename = os.path.join(data_path, 'tomancak_expr.csv') + Y = read_csv(filename, header=0, index_col=0).T + num_repeats = 3 + num_time = 12 + xt = np.linspace(0, num_time-1, num_time) + xr = np.linspace(0, num_repeats-1, num_repeats) + xtime, xrepeat = np.meshgrid(xt, xr) + X = np.vstack((xtime.flatten(), xrepeat.flatten())).T return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set) # This will be for downloading google trends data. @@ -732,13 +745,16 @@ def hapmap3(data_set='hapmap3'): def singlecell(data_set='singlecell'): if not data_available(data_set): download_data(data_set) + + from pandas import read_csv dirpath = os.path.join(data_path, data_set) - data = np.loadtxt(os.path.join(dirpath, 'singlecell.csv'), delimiter=",", dtype=str) - genes = data[0, 1:] - labels = data[1:, 0] - Y = np.array(data[1:, 1:], dtype=float) - return data_details_return({'Y': Y, 'info' : "qPCR Singlecell experiment in Mouse, measuring 48 gene expressions in 1-64 cell states. The labels have been created as in Guo et al. [2010]", - 'genes':genes, 'labels':labels, + filename = os.path.join(dirpath, 'singlecell.csv') + Y = read_csv(filename, header=0, index_col=0) + genes = Y.columns + labels = Y.index + # data = np.loadtxt(os.path.join(dirpath, 'singlecell.csv'), delimiter=",", dtype=str) + return data_details_return({'Y': Y, 'info' : "qPCR singlecell experiment in Mouse, measuring 48 gene expressions in 1-64 cell states. The labels have been created as in Guo et al. [2010]", + 'genes': genes, 'labels':labels, }, data_set) def swiss_roll_1000(): diff --git a/GPy/util/misc.py b/GPy/util/misc.py index dc327324..fa9bb24c 100644 --- a/GPy/util/misc.py +++ b/GPy/util/misc.py @@ -130,14 +130,14 @@ def fast_array_equal(A, B): """ % pragma_string if config.getboolean('parallel', 'openmp'): - pragma_string = '#include ' + header_string = '#include ' else: - pragma_string = '' + header_string = '' support_code = """ %s #include - """ % pragma_string + """ % header_string weave_options_openmp = {'headers' : [''],