mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-24 20:36:23 +02:00
add the regression benchmark
This commit is contained in:
parent
f221a3b1fa
commit
60aa865631
7 changed files with 310 additions and 2 deletions
|
|
@ -5,3 +5,4 @@ Nicolas Durrande
|
|||
Alan Saul
|
||||
Max Zwiessele
|
||||
Neil D. Lawrence
|
||||
Zhenwen Dai
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
import numpy as np
|
||||
from ...core import Model
|
||||
from ...core.parameterization import variational
|
||||
from GPy.core.parameterization.variational import VariationalPosterior
|
||||
|
||||
def infer_newX(model, Y_new, optimize=True, init='L2'):
|
||||
"""
|
||||
|
|
@ -60,7 +61,8 @@ class InferenceX(Model):
|
|||
# self.kern.GPU(True)
|
||||
from copy import deepcopy
|
||||
self.posterior = deepcopy(model.posterior)
|
||||
if hasattr(model, 'variational_prior'):
|
||||
from ...core.parameterization.variational import VariationalPosterior
|
||||
if isinstance(model.X, VariationalPosterior):
|
||||
self.uncertain_input = True
|
||||
from ...models.ss_gplvm import IBPPrior
|
||||
from ...models.ss_mrd import IBPPrior_SSMRD
|
||||
|
|
@ -71,7 +73,7 @@ class InferenceX(Model):
|
|||
self.variational_prior = model.variational_prior.copy()
|
||||
else:
|
||||
self.uncertain_input = False
|
||||
if hasattr(model, 'inducing_inputs'):
|
||||
if hasattr(model, 'Z'):
|
||||
self.sparse_gp = True
|
||||
self.Z = model.Z.copy()
|
||||
else:
|
||||
|
|
|
|||
21
benchmarks/regression/evaluation.py
Normal file
21
benchmarks/regression/evaluation.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
# Copyright (c) 2015, Zhenwen Dai
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import abc
|
||||
import numpy as np
|
||||
|
||||
class Evaluation(object):
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def evaluate(self, gt, pred):
|
||||
"""Compute a scalar for access the performance"""
|
||||
return None
|
||||
|
||||
class RMSE(Evaluation):
|
||||
"Rooted Mean Square Error"
|
||||
name = 'RMSE'
|
||||
|
||||
def evaluate(self, gt, pred):
|
||||
return np.sqrt(np.square(gt-pred).astype(np.float).mean())
|
||||
|
||||
81
benchmarks/regression/methods.py
Normal file
81
benchmarks/regression/methods.py
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
# Copyright (c) 2015, Zhenwen Dai
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import abc
|
||||
import numpy as np
|
||||
import GPy
|
||||
|
||||
class RegressionMethod(object):
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
def __init__(self):
|
||||
self.preprocess = True
|
||||
|
||||
def _preprocess(self, data, train):
|
||||
"""Zero-mean, unit-variance normalization by default"""
|
||||
if train:
|
||||
inputs, labels = data
|
||||
self.data_mean = inputs.mean(axis=0)
|
||||
self.data_std = inputs.std(axis=0)
|
||||
self.labels_mean = labels.mean(axis=0)
|
||||
self.labels_std = labels.std(axis=0)
|
||||
return ((inputs-self.data_mean)/self.data_std, (labels-self.labels_mean)/self.labels_std)
|
||||
else:
|
||||
return (data-self.data_mean)/self.data_std
|
||||
|
||||
def _reverse_trans_labels(self, labels):
|
||||
return labels*self.labels_std+self.labels_mean
|
||||
|
||||
def fit(self, train_data):
|
||||
if self.preprocess:
|
||||
train_data = self._preprocess(train_data, True)
|
||||
return self._fit(train_data)
|
||||
|
||||
def predict(self, test_data):
|
||||
if self.preprocess:
|
||||
test_data = self._preprocess(test_data, False)
|
||||
labels = self._predict(test_data)
|
||||
if self.preprocess:
|
||||
labels = self._reverse_trans_labels(labels)
|
||||
return labels
|
||||
|
||||
@abc.abstractmethod
|
||||
def _fit(self, train_data):
|
||||
"""Fit the model. Return True if successful"""
|
||||
return True
|
||||
|
||||
@abc.abstractmethod
|
||||
def _predict(self, test_data):
|
||||
"""Predict on test data"""
|
||||
return None
|
||||
|
||||
class GP_RBF(RegressionMethod):
|
||||
name = 'GP_RBF'
|
||||
|
||||
def _fit(self, train_data):
|
||||
inputs, labels = train_data
|
||||
self.model = GPy.models.GPRegression(inputs, labels,kernel=GPy.kern.RBF(inputs.shape[-1],ARD=True) +GPy.kern.Linear(inputs.shape[1], ARD=True) + GPy.kern.White(inputs.shape[1],0.01) )
|
||||
self.model.likelihood.variance[:] = labels.var()*0.01
|
||||
self.model.optimize()
|
||||
return True
|
||||
|
||||
def _predict(self, test_data):
|
||||
return self.model.predict(test_data)[0]
|
||||
|
||||
class SVIGP_RBF(RegressionMethod):
|
||||
name = 'SVIGP_RBF'
|
||||
|
||||
def _fit(self, train_data):
|
||||
X, Y = train_data
|
||||
|
||||
Z = X[np.random.permutation(X.shape[0])[:100]]
|
||||
k = GPy.kern.RBF(X.shape[1], ARD=True) + GPy.kern.Linear(X.shape[1], ARD=True) + GPy.kern.White(X.shape[1],0.01)
|
||||
|
||||
lik = GPy.likelihoods.StudentT(deg_free=3.)
|
||||
self.model = GPy.core.SVGP(X, Y, Z=Z, kernel=k, likelihood=lik)
|
||||
[self.model.optimize('scg', max_iters=40, gtol=0, messages=0, xtol=0, ftol=0) for i in range(10)]
|
||||
self.model.optimize('bfgs', max_iters=1000, gtol=0, messages=0)
|
||||
return True
|
||||
|
||||
def _predict(self, test_data):
|
||||
return self.model.predict(test_data)[0]
|
||||
64
benchmarks/regression/outputs.py
Normal file
64
benchmarks/regression/outputs.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
# Copyright (c) 2015, Zhenwen Dai
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from __future__ import print_function
|
||||
import abc
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
class Output(object):
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
@abc.abstractmethod
|
||||
def output(self, config, results):
|
||||
"""Return the test data: training data and labels"""
|
||||
return None
|
||||
|
||||
class ScreenOutput(Output):
|
||||
|
||||
def output(self, config, results):
|
||||
print('='*10+'Report'+'='*10)
|
||||
print('\t'.join([' ']+[m.name+'('+e+')' for m in config['methods'] for e in [a.name for a in config['evaluations']]+['time']]))
|
||||
for task_i in range(len(config['tasks'])):
|
||||
print(config['tasks'][task_i].name+'\t', end='')
|
||||
|
||||
outputs = []
|
||||
for method_i in range(len(config['methods'])):
|
||||
for ei in range(len(config['evaluations'])+1):
|
||||
m,s = results[task_i, method_i, ei].mean(), results[task_i, method_i, ei].std()
|
||||
outputs.append('%e(%e)'%(m,s))
|
||||
print('\t'.join(outputs))
|
||||
|
||||
class CSVOutput(Output):
|
||||
|
||||
def __init__(self, outpath, prjname):
|
||||
self.fname = os.path.join(outpath, prjname+'.csv')
|
||||
|
||||
def output(self, config, results):
|
||||
with open(self.fname,'w') as f:
|
||||
f.write(','.join([' ']+[m.name+'('+e+')' for m in config['methods'] for e in [a.name for a in config['evaluations']]+['time']])+'\n')
|
||||
for task_i in range(len(config['tasks'])):
|
||||
f.write(config['tasks'][task_i].name+',')
|
||||
|
||||
outputs = []
|
||||
for method_i in range(len(config['methods'])):
|
||||
for ei in range(len(config['evaluations'])+1):
|
||||
m,s = results[task_i, method_i, ei].mean(), results[task_i, method_i, ei].std()
|
||||
outputs.append('%e (%e)'%(m,s))
|
||||
f.write(','.join(outputs)+'\n')
|
||||
f.close()
|
||||
|
||||
class H5Output(Output):
|
||||
|
||||
def __init__(self, outpath, prjname):
|
||||
self.fname = os.path.join(outpath, prjname+'.h5')
|
||||
|
||||
def output(self, config, results):
|
||||
try:
|
||||
import h5py
|
||||
f = h5py.File(self.fname,'w')
|
||||
d = f.create_dataset('results',results.shape, dtype=results.dtype)
|
||||
d[:] = results
|
||||
f.close()
|
||||
except:
|
||||
raise 'Fails to write the parameters into a HDF5 file!'
|
||||
53
benchmarks/regression/run.py
Normal file
53
benchmarks/regression/run.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# Copyright (c) 2015, Zhenwen Dai
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from __future__ import print_function
|
||||
from evaluation import RMSE
|
||||
from methods import GP_RBF, SVIGP_RBF
|
||||
from tasks import Housing, WineQuality
|
||||
from outputs import ScreenOutput, CSVOutput, H5Output
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
outpath = '.'
|
||||
prjname = 'regression'
|
||||
config = {
|
||||
'evaluations':[RMSE],
|
||||
'methods':[GP_RBF, SVIGP_RBF],
|
||||
'tasks':[WineQuality,Housing],
|
||||
'repeats':2,
|
||||
'outputs': [ScreenOutput(), CSVOutput(outpath, prjname), H5Output(outpath, prjname)]
|
||||
}
|
||||
|
||||
if __name__=='__main__':
|
||||
results = np.zeros((len(config['tasks']), len(config['methods']), len(config['evaluations'])+1, config['repeats']))
|
||||
|
||||
for task_i in range(len(config['tasks'])):
|
||||
dataset = config['tasks'][task_i]()
|
||||
print('Benchmarking on '+dataset.name)
|
||||
res = dataset.load_data()
|
||||
if not res: print('Fail to load '+config['tasks'][task_i].name); continue
|
||||
train = dataset.get_training_data()
|
||||
test = dataset.get_test_data()
|
||||
|
||||
for method_i in range(len(config['methods'])):
|
||||
method = config['methods'][method_i]
|
||||
print('With the method '+method.name, end='')
|
||||
for ri in range(config['repeats']):
|
||||
m = method()
|
||||
t_st = time.time()
|
||||
m.fit(train)
|
||||
pred = m.predict(test[0])
|
||||
t_pd = time.time() - t_st
|
||||
for ei in range(len(config['evaluations'])):
|
||||
evalu = config['evaluations'][ei]()
|
||||
results[task_i, method_i, ei, ri] = evalu.evaluate(test[1], pred)
|
||||
results[task_i, method_i, -1, ri] = t_pd
|
||||
print('.',end='')
|
||||
print()
|
||||
|
||||
[out.output(config, results) for out in config['outputs']]
|
||||
|
||||
|
||||
|
||||
|
||||
86
benchmarks/regression/tasks.py
Normal file
86
benchmarks/regression/tasks.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
# Copyright (c) 2015, Zhenwen Dai
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import abc
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
class RegressionTask(object):
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
def __init__(self, datapath='./'):
|
||||
self.datapath = datapath
|
||||
|
||||
@abc.abstractmethod
|
||||
def load_data(self):
|
||||
"""Download the dataset if not exist. Return True if successful"""
|
||||
return True
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_training_data(self):
|
||||
"""Return the training data: training data and labels"""
|
||||
return None
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_test_data(self):
|
||||
"""Return the test data: training data and labels"""
|
||||
return None
|
||||
|
||||
class Housing(RegressionTask):
|
||||
|
||||
name='Housing'
|
||||
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
|
||||
filename = 'housing.data'
|
||||
|
||||
def load_data(self):
|
||||
from GPy.util.datasets import download_url, data_path
|
||||
if not os.path.exists(os.path.join(data_path,self.datapath, self.filename)):
|
||||
download_url(Housing.url, self.datapath, messages=True)
|
||||
if not os.path.exists(os.path.join(data_path, self.datapath, self.filename)):
|
||||
return False
|
||||
|
||||
data = np.loadtxt(os.path.join(data_path, self.datapath, self.filename))
|
||||
self.data = data
|
||||
data_train = data[:250,:-1]
|
||||
label_train = data[:250, -1:]
|
||||
self.train = (data_train, label_train)
|
||||
data_test = data[250:,:-1]
|
||||
label_test = data[250:,-1:]
|
||||
self.test = (data_test, label_test)
|
||||
return True
|
||||
|
||||
def get_training_data(self):
|
||||
return self.train
|
||||
|
||||
def get_test_data(self):
|
||||
return self.test
|
||||
|
||||
class WineQuality(RegressionTask):
|
||||
|
||||
name='WineQuality'
|
||||
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
|
||||
filename = 'winequality-red.csv'
|
||||
|
||||
def load_data(self):
|
||||
from GPy.util.datasets import download_url, data_path
|
||||
if not os.path.exists(os.path.join(data_path,self.datapath, self.filename)):
|
||||
download_url(self.url, self.datapath, messages=True)
|
||||
if not os.path.exists(os.path.join(data_path, self.datapath, self.filename)):
|
||||
return False
|
||||
|
||||
data = np.loadtxt(os.path.join(data_path, self.datapath, self.filename),skiprows=1,delimiter=';')
|
||||
self.data = data
|
||||
data_train = data[:1000,:-1]
|
||||
label_train = data[:1000, -1:]
|
||||
self.train = (data_train, label_train)
|
||||
data_test = data[1000:,:-1]
|
||||
label_test = data[1000:,-1:]
|
||||
self.test = (data_test, label_test)
|
||||
return True
|
||||
|
||||
def get_training_data(self):
|
||||
return self.train
|
||||
|
||||
def get_test_data(self):
|
||||
return self.test
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue