Changes to datasets.py

This commit is contained in:
Neil Lawrence 2014-07-24 20:55:18 +01:00
parent fdf8f984c1
commit ec5e9443ce

View file

@ -659,6 +659,22 @@ def ripley_synth(data_set='ripley_prnn_data'):
ytest = test[:, 2:3]
return data_details_return({'X': X, 'Y': y, 'Xtest': Xtest, 'Ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set)
def global_average_temperature(data_set='global_temperature', num_train=1000, refresh_data=False):
path = os.path.join(data_path, data_set)
if data_available(data_set) and not refresh_data:
print 'Using cached version of the data set, to use latest version set refresh_data to True'
else:
download_data(data_set)
data = np.loadtxt(os.path.join(data_path, data_set, 'GLBTS.long.data'))
print 'Most recent data observation from month ', data[-1, 1], ' in year ', data[-1, 0]
allX = data[data[:, 3]!=-99.99, 2:3]
allY = data[data[:, 3]!=-99.99, 3:4]
X = allX[:num_train, 0:1]
Xtest = allX[num_train:, 0:1]
Y = allY[:num_train, 0:1]
Ytest = allY[num_train:, 0:1]
return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Mauna Loa data with " + str(num_train) + " values used as training points."}, data_set)
def mauna_loa(data_set='mauna_loa', num_train=545, refresh_data=False):
path = os.path.join(data_path, data_set)
if data_available(data_set) and not refresh_data: