diff --git a/GPy/kern/_src/sympykern.py b/GPy/kern/_src/sympykern.py index 0688682a..91f8c28b 100644 --- a/GPy/kern/_src/sympykern.py +++ b/GPy/kern/_src/sympykern.py @@ -117,6 +117,7 @@ class Sympykern(Kern): if self.output_dim > 1: self.arg_list += self._sp_theta_i + self._sp_theta_j self.diag_arg_list += self._sp_theta_i + # psi_stats aren't yet implemented. if False: self.compute_psi_stats() diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 23f5d0c8..3c44703a 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -274,7 +274,27 @@ def della_gatta_TRP63_gene_expression(data_set='della_gatta', gene_number=None): Y = Y[:, None] return data_details_return({'X': X, 'Y': Y, 'gene_number' : gene_number}, data_set) +def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'): + # Inspired by this notebook: + # http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb + # quote the query terms. + for i, element in enumerate(query_terms): + query_terms[i] = urllib2.quote(element) + query = 'http://www.google.com/trends/fetchComponent?q=%s&cid=TIMESERIES_GRAPH_0&export=3' % ",".join(query_terms) + data = urllib2.urlopen(query).read() + + # We need to do some data cleaning: remove Javascript header+footer, and translate new Date(....,..,..) into YYYY-MM-DD. + header = """// Data table response\ngoogle.visualization.Query.setResponse(""" + data = data[len(header):-2] + data = re.sub('new Date\((\d+),(\d+),(\d+)\)', (lambda m: '"%s-%02d-%02d"' % (m.group(1).strip(), 1+int(m.group(2)), int(m.group(3)))), data) + timeseries = json.loads(data) + import pandas as pd + columns = [k['label'] for k in timeseries['table']['cols']] + rows = map(lambda x: [k['v'] for k in x['c']], timeseries['table']['rows']) + df = pd.DataFrame(rows, columns=columns) + df.set_index('Date', inplace=True) + df.plot(figsize=(16, 8)) # The data sets def oil(data_set='three_phase_oil_flow'):