diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index cc0cfc49..133a79e2 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -659,6 +659,22 @@ def ripley_synth(data_set='ripley_prnn_data'): ytest = test[:, 2:3] return data_details_return({'X': X, 'Y': y, 'Xtest': Xtest, 'Ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set) +def global_average_temperature(data_set='global_temperature', num_train=1000, refresh_data=False): + path = os.path.join(data_path, data_set) + if data_available(data_set) and not refresh_data: + print 'Using cached version of the data set, to use latest version set refresh_data to True' + else: + download_data(data_set) + data = np.loadtxt(os.path.join(data_path, data_set, 'GLBTS.long.data')) + print 'Most recent data observation from month ', data[-1, 1], ' in year ', data[-1, 0] + allX = data[data[:, 3]!=-99.99, 2:3] + allY = data[data[:, 3]!=-99.99, 3:4] + X = allX[:num_train, 0:1] + Xtest = allX[num_train:, 0:1] + Y = allY[:num_train, 0:1] + Ytest = allY[num_train:, 0:1] + return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Mauna Loa data with " + str(num_train) + " values used as training points."}, data_set) + def mauna_loa(data_set='mauna_loa', num_train=545, refresh_data=False): path = os.path.join(data_path, data_set) if data_available(data_set) and not refresh_data: