diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py index da2ffb24..0630537b 100644 --- a/GPy/examples/classification.py +++ b/GPy/examples/classification.py @@ -116,7 +116,7 @@ def toy_heaviside(seed=default_seed): Y[Y.flatten() == -1] = 0 # Model definition - noise_model = GPy.likelihoods.binomial(GPy.likelihoods.noise_models.gp_transformations.Heaviside()) + noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside()) likelihood = GPy.likelihoods.EP(Y,noise_model) m = GPy.models.GPClassification(data['X'], likelihood=likelihood) diff --git a/GPy/likelihoods/noise_model_constructors.py b/GPy/likelihoods/noise_model_constructors.py index 26d07391..95247c03 100644 --- a/GPy/likelihoods/noise_model_constructors.py +++ b/GPy/likelihoods/noise_model_constructors.py @@ -4,9 +4,9 @@ import numpy as np import noise_models -def binomial(gp_link=None): +def bernoulli(gp_link=None): """ - Construct a binomial likelihood + Construct a bernoulli likelihood :param gp_link: a GPy gp_link function """ @@ -27,11 +27,12 @@ def binomial(gp_link=None): analytical_mean = False analytical_variance = False - return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance) + return noise_models.bernoulli_noise.Bernoulli(gp_link,analytical_mean,analytical_variance) def exponential(gp_link=None): + """ - Construct a binomial likelihood + Construct a exponential likelihood :param gp_link: a GPy gp_link function """ diff --git a/GPy/likelihoods/noise_models/__init__.py b/GPy/likelihoods/noise_models/__init__.py index 54f3f61a..d1d134dc 100644 --- a/GPy/likelihoods/noise_models/__init__.py +++ b/GPy/likelihoods/noise_models/__init__.py @@ -1,5 +1,5 @@ import noise_distributions -import binomial_noise +import bernoulli_noise import exponential_noise import gaussian_noise import gamma_noise diff --git a/GPy/likelihoods/noise_models/binomial_noise.py b/GPy/likelihoods/noise_models/bernoulli_noise.py similarity index 95% rename from GPy/likelihoods/noise_models/binomial_noise.py rename to GPy/likelihoods/noise_models/bernoulli_noise.py index c0bb8be4..1d45c82e 100644 --- a/GPy/likelihoods/noise_models/binomial_noise.py +++ b/GPy/likelihoods/noise_models/bernoulli_noise.py @@ -9,7 +9,7 @@ from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf import gp_transformations from noise_distributions import NoiseDistribution -class Binomial(NoiseDistribution): +class Bernoulli(NoiseDistribution): """ Probit likelihood Y is expected to take values in {-1,1} @@ -19,7 +19,7 @@ class Binomial(NoiseDistribution): $$ """ def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): - super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance) + super(Bernoulli, self).__init__(gp_link,analytical_mean,analytical_variance) def _preprocess_values(self,Y): """ @@ -31,7 +31,7 @@ class Binomial(NoiseDistribution): Y_prep = Y.copy() Y1 = Y[Y.flatten()==1].size Y2 = Y[Y.flatten()==0].size - assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.' + assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0,1}.' Y_prep[Y.flatten() == 0] = -1 return Y_prep diff --git a/GPy/models/fitc_classification.py b/GPy/models/fitc_classification.py index ee92a1b4..0aa21db9 100644 --- a/GPy/models/fitc_classification.py +++ b/GPy/models/fitc_classification.py @@ -16,7 +16,7 @@ class FITCClassification(FITC): :param X: input observations :param Y: observed values - :param likelihood: a GPy likelihood, defaults to Binomial with probit link function + :param likelihood: a GPy likelihood, defaults to Bernoulli with probit link function :param kernel: a GPy kernel, defaults to rbf+white :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True @@ -31,7 +31,7 @@ class FITCClassification(FITC): kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) if likelihood is None: - noise_model = likelihoods.binomial() + noise_model = likelihoods.bernoulli() likelihood = likelihoods.EP(Y, noise_model) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py index fce51cfa..7fc61bb7 100644 --- a/GPy/models/gp_classification.py +++ b/GPy/models/gp_classification.py @@ -15,7 +15,7 @@ class GPClassification(GP): :param X: input observations :param Y: observed values, can be None if likelihood is not None - :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function + :param likelihood: a GPy likelihood, defaults to Bernoulli with Probit link_function :param kernel: a GPy kernel, defaults to rbf :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True @@ -31,7 +31,7 @@ class GPClassification(GP): kernel = kern.rbf(X.shape[1]) if likelihood is None: - noise_model = likelihoods.binomial() + noise_model = likelihoods.bernoulli() likelihood = likelihoods.EP(Y, noise_model) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): diff --git a/GPy/models/sparse_gp_classification.py b/GPy/models/sparse_gp_classification.py index 50c2f935..9274aacc 100644 --- a/GPy/models/sparse_gp_classification.py +++ b/GPy/models/sparse_gp_classification.py @@ -16,7 +16,7 @@ class SparseGPClassification(SparseGP): :param X: input observations :param Y: observed values - :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function + :param likelihood: a GPy likelihood, defaults to Bernoulli with probit link_function :param kernel: a GPy kernel, defaults to rbf+white :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True @@ -31,7 +31,7 @@ class SparseGPClassification(SparseGP): kernel = kern.rbf(X.shape[1])# + kern.white(X.shape[1],1e-3) if likelihood is None: - noise_model = likelihoods.binomial() + noise_model = likelihoods.bernoulli() likelihood = likelihoods.EP(Y, noise_model) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py index e4d9e063..818cb56e 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/unit_tests.py @@ -209,7 +209,7 @@ class GradientTests(unittest.TestCase): Z = np.linspace(0, 15, 4)[:, None] kernel = GPy.kern.rbf(1) m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z) - #distribution = GPy.likelihoods.likelihood_functions.Binomial() + #distribution = GPy.likelihoods.likelihood_functions.Bernoulli() #likelihood = GPy.likelihoods.EP(Y, distribution) #m = GPy.core.SparseGP(X, likelihood, kernel, Z) #m.ensure_default_constraints() diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index f5947179..565f8e76 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -17,13 +17,13 @@ except ImportError: import sys, urllib -def reporthook(a,b,c): +def reporthook(a,b,c): # ',' at the end of the line is important! #print "% 3.1f%% of %d bytes\r" % (min(100, float(a * b) / c * 100), c), #you can also use sys.stdout.write sys.stdout.write("\r% 3.1f%% of %d bytes" % (min(100, float(a * b) / c * 100), c)) sys.stdout.flush() - + # Global variables data_path = os.path.join(os.path.dirname(__file__), 'datasets') default_seed = 10000 @@ -39,7 +39,7 @@ data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'], 'license' : None, 'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""", 'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""}, - + 'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'], 'files' : [['Index', 'housing.data', 'housing.names']], 'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""", @@ -164,14 +164,14 @@ def prompt_user(prompt): print(prompt) choice = raw_input().lower() # would like to test for exception here, but not sure if we can do that without importing IPython - except: + except: print('Stdin is not implemented.') print('You need to set') print('overide_manual_authorize=True') print('to proceed with the download. Please set that variable and continue.') raise - + if choice in yes: return True elif choice in no: @@ -189,7 +189,7 @@ def data_available(dataset_name=None): if not os.path.exists(os.path.join(data_path, dataset_name, file)): return False return True - + def download_url(url, store_directory, save_name = None, messages = True, suffix=''): """Download a file from a url and save it to disk.""" i = url.rfind('/') @@ -249,18 +249,18 @@ def download_data(dataset_name=None): for file in files: download_url(os.path.join(url,file), dataset_name, dataset_name) return True - + def data_details_return(data, data_set): """Update the data component of the data dictionary with details drawn from the data_resources.""" data.update(data_resources[data_set]) return data - + def cmu_urls_files(subj_motions, messages = True): ''' - Find which resources are missing on the local disk for the requested CMU motion capture motions. + Find which resources are missing on the local disk for the requested CMU motion capture motions. ''' - + subjects_num = subj_motions[0] motions_num = subj_motions[1] @@ -280,15 +280,15 @@ def cmu_urls_files(subj_motions, messages = True): motions[i].append(curMot) all_skels = [] - + assert len(subjects) == len(motions) - + all_motions = [] - + for i in range(len(subjects)): skel_dir = os.path.join(data_path, 'cmu_mocap') cur_skel_file = os.path.join(skel_dir, subjects[i] + '.asf') - + url_required = False file_download = [] if not os.path.exists(cur_skel_file): @@ -332,7 +332,7 @@ if gpxpy_available: points = [point for track in gpx.tracks for segment in track.segments for point in segment.points] data = [[(point.time-datetime.datetime(2013,8,21)).total_seconds(), point.latitude, point.longitude, point.elevation] for point in points] X.append(np.asarray(data)[::sample_every, :]) - gpx_file.close() + gpx_file.close() return data_details_return({'X' : X, 'info' : 'Data is an array containing time in seconds, latitude, longitude and elevation in that order.'}, data_set) del gpxpy_available @@ -408,7 +408,7 @@ def oil(data_set='three_phase_oil_flow'): return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'Xtest' : Xtest, 'Xvalid': Xvalid, 'Yvalid': Yvalid}, data_set) #else: # throw an error - + def oil_100(seed=default_seed, data_set = 'three_phase_oil_flow'): np.random.seed(seed=seed) data = oil() @@ -622,7 +622,7 @@ def xw_pen(data_set='xw_pen'): X = np.arange(485)[:, None] return data_details_return({'Y': Y, 'X': X, 'info': "Tilt data from a personalized digital assistant pen. Plot in original paper showed regression between time steps 175 and 275."}, data_set) - + def download_rogers_girolami_data(): if not data_available('rogers_girolami_data'): download_data(data_set)