Rename Binomial to Bernoulli (maybe generalise it with the constant

later, but tilted distribution may change)
2026-06-05 14:55:15 +02:00 · 2013-10-17 15:04:55 +01:00 · 2013-10-17 15:04:55 +01:00 · f3fd9f1325
commit f3fd9f1325
parent afd38df1ef
9 changed files with 34 additions and 33 deletions
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -116,7 +116,7 @@ def toy_heaviside(seed=default_seed):
    Y[Y.flatten() == -1] = 0

    # Model definition
-    noise_model = GPy.likelihoods.binomial(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
+    noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
    likelihood = GPy.likelihoods.EP(Y,noise_model)
    m = GPy.models.GPClassification(data['X'], likelihood=likelihood)

--- a/GPy/likelihoods/noise_model_constructors.py
+++ b/GPy/likelihoods/noise_model_constructors.py
@ -4,9 +4,9 @@
 import numpy as np
 import noise_models

-def binomial(gp_link=None):
+def bernoulli(gp_link=None):
    """
-    Construct a binomial likelihood
+    Construct a bernoulli likelihood

    :param gp_link: a GPy gp_link function
    """
@ -27,11 +27,12 @@ def binomial(gp_link=None):
        analytical_mean = False
        analytical_variance = False

-    return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance)
+    return noise_models.bernoulli_noise.Bernoulli(gp_link,analytical_mean,analytical_variance)

 def exponential(gp_link=None):
+
    """
-    Construct a binomial likelihood
+    Construct a exponential likelihood

    :param gp_link: a GPy gp_link function
    """
--- a/GPy/likelihoods/noise_models/init.py
+++ b/GPy/likelihoods/noise_models/init.py
@ -1,5 +1,5 @@
 import noise_distributions
-import binomial_noise
+import bernoulli_noise
 import exponential_noise
 import gaussian_noise
 import gamma_noise
--- a/GPy/likelihoods/noise_models/bernoulli_noise.py
+++ b/GPy/likelihoods/noise_models/bernoulli_noise.py
@ -9,7 +9,7 @@ from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
 import gp_transformations
 from noise_distributions import NoiseDistribution

-class Binomial(NoiseDistribution):
+class Bernoulli(NoiseDistribution):
    """
    Probit likelihood
    Y is expected to take values in {-1,1}
@ -19,7 +19,7 @@ class Binomial(NoiseDistribution):
    $$
    """
    def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
-        super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance)
+        super(Bernoulli, self).__init__(gp_link,analytical_mean,analytical_variance)

    def _preprocess_values(self,Y):
        """
@ -31,7 +31,7 @@ class Binomial(NoiseDistribution):
        Y_prep = Y.copy()
        Y1 = Y[Y.flatten()==1].size
        Y2 = Y[Y.flatten()==0].size
-        assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
+        assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0,1}.'
        Y_prep[Y.flatten() == 0] = -1
        return Y_prep

--- a/GPy/models/fitc_classification.py
+++ b/GPy/models/fitc_classification.py
@ -16,7 +16,7 @@ class FITCClassification(FITC):

    :param X: input observations
    :param Y: observed values
-    :param likelihood: a GPy likelihood, defaults to Binomial with probit link function
+    :param likelihood: a GPy likelihood, defaults to Bernoulli with probit link function
    :param kernel: a GPy kernel, defaults to rbf+white
    :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
    :type normalize_X: False|True
@ -31,7 +31,7 @@ class FITCClassification(FITC):
            kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)

        if likelihood is None:
-            noise_model = likelihoods.binomial()
+            noise_model = likelihoods.bernoulli()
            likelihood = likelihoods.EP(Y, noise_model)
        elif Y is not None:
            if not all(Y.flatten() == likelihood.data.flatten()):
--- a/GPy/models/gp_classification.py
+++ b/GPy/models/gp_classification.py
@ -15,7 +15,7 @@ class GPClassification(GP):

    :param X: input observations
    :param Y: observed values, can be None if likelihood is not None
-    :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
+    :param likelihood: a GPy likelihood, defaults to Bernoulli with Probit link_function
    :param kernel: a GPy kernel, defaults to rbf
    :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
    :type normalize_X: False|True
@ -31,7 +31,7 @@ class GPClassification(GP):
            kernel = kern.rbf(X.shape[1])

        if likelihood is None:
-            noise_model = likelihoods.binomial()
+            noise_model = likelihoods.bernoulli()
            likelihood = likelihoods.EP(Y, noise_model)
        elif Y is not None:
            if not all(Y.flatten() == likelihood.data.flatten()):
--- a/GPy/models/sparse_gp_classification.py
+++ b/GPy/models/sparse_gp_classification.py
@ -16,7 +16,7 @@ class SparseGPClassification(SparseGP):

    :param X: input observations
    :param Y: observed values
-    :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
+    :param likelihood: a GPy likelihood, defaults to Bernoulli with probit link_function
    :param kernel: a GPy kernel, defaults to rbf+white
    :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
    :type normalize_X: False|True
@ -31,7 +31,7 @@ class SparseGPClassification(SparseGP):
            kernel = kern.rbf(X.shape[1])# + kern.white(X.shape[1],1e-3)

        if likelihood is None:
-            noise_model = likelihoods.binomial()
+            noise_model = likelihoods.bernoulli()
            likelihood = likelihoods.EP(Y, noise_model)
        elif Y is not None:
            if not all(Y.flatten() == likelihood.data.flatten()):
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@ -209,7 +209,7 @@ class GradientTests(unittest.TestCase):
        Z = np.linspace(0, 15, 4)[:, None]
        kernel = GPy.kern.rbf(1)
        m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z)
-        #distribution = GPy.likelihoods.likelihood_functions.Binomial()
+        #distribution = GPy.likelihoods.likelihood_functions.Bernoulli()
        #likelihood = GPy.likelihoods.EP(Y, distribution)
        #m = GPy.core.SparseGP(X, likelihood, kernel, Z)
        #m.ensure_default_constraints()
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -17,13 +17,13 @@ except ImportError:

 import sys, urllib

-def reporthook(a,b,c): 
+def reporthook(a,b,c):
    # ',' at the end of the line is important!
    #print "% 3.1f%% of %d bytes\r" % (min(100, float(a * b) / c * 100), c),
    #you can also use sys.stdout.write
    sys.stdout.write("\r% 3.1f%% of %d bytes" % (min(100, float(a * b) / c * 100), c))
    sys.stdout.flush()
-     
+
 # Global variables
 data_path = os.path.join(os.path.dirname(__file__), 'datasets')
 default_seed = 10000
@ -39,7 +39,7 @@ data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
                                       'license' : None,
                                       'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""",
                                       'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""},
-                   
+
                  'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'],
                                      'files' : [['Index', 'housing.data', 'housing.names']],
                                      'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""",
@ -164,14 +164,14 @@ def prompt_user(prompt):
        print(prompt)
        choice = raw_input().lower()
        # would like to test for exception here, but not sure if we can do that without importing IPython
-    except: 
+    except:
        print('Stdin is not implemented.')
        print('You need to set')
        print('overide_manual_authorize=True')
        print('to proceed with the download. Please set that variable and continue.')
        raise

-    
+
    if choice in yes:
        return True
    elif choice in no:
@ -189,7 +189,7 @@ def data_available(dataset_name=None):
            if not os.path.exists(os.path.join(data_path, dataset_name, file)):
                return False
    return True
-            
+
 def download_url(url, store_directory, save_name = None, messages = True, suffix=''):
    """Download a file from a url and save it to disk."""
    i = url.rfind('/')
@ -249,18 +249,18 @@ def download_data(dataset_name=None):
            for file in files:
                download_url(os.path.join(url,file), dataset_name, dataset_name)
    return True
-                  
+
 def data_details_return(data, data_set):
    """Update the data component of the data dictionary with details drawn from the data_resources."""
    data.update(data_resources[data_set])
    return data

-    
+
 def cmu_urls_files(subj_motions, messages = True):
    '''
-    Find which resources are missing on the local disk for the requested CMU motion capture motions. 
+    Find which resources are missing on the local disk for the requested CMU motion capture motions.
    '''
-    
+
    subjects_num = subj_motions[0]
    motions_num = subj_motions[1]

@ -280,15 +280,15 @@ def cmu_urls_files(subj_motions, messages = True):
            motions[i].append(curMot)

    all_skels = []
-    
+
    assert len(subjects) == len(motions)
-    
+
    all_motions = []
-            
+
    for i in range(len(subjects)):
        skel_dir = os.path.join(data_path, 'cmu_mocap')
        cur_skel_file = os.path.join(skel_dir, subjects[i] + '.asf')
-        
+
        url_required = False
        file_download = []
        if not os.path.exists(cur_skel_file):
@ -332,7 +332,7 @@ if gpxpy_available:
            points = [point for track in gpx.tracks for segment in track.segments for point in segment.points]
            data = [[(point.time-datetime.datetime(2013,8,21)).total_seconds(), point.latitude, point.longitude, point.elevation] for point in points]
            X.append(np.asarray(data)[::sample_every, :])
-            gpx_file.close()        
+            gpx_file.close()
        return data_details_return({'X' : X, 'info' : 'Data is an array containing time in seconds, latitude, longitude and elevation in that order.'}, data_set)

 del gpxpy_available
@ -408,7 +408,7 @@ def oil(data_set='three_phase_oil_flow'):
    return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'Xtest' : Xtest, 'Xvalid': Xvalid, 'Yvalid': Yvalid}, data_set)
    #else:
    # throw an error
-    
+
 def oil_100(seed=default_seed, data_set = 'three_phase_oil_flow'):
    np.random.seed(seed=seed)
    data = oil()
@ -622,7 +622,7 @@ def xw_pen(data_set='xw_pen'):
    X = np.arange(485)[:, None]
    return data_details_return({'Y': Y, 'X': X, 'info': "Tilt data from a personalized digital assistant pen. Plot in original paper showed regression between time steps 175 and 275."}, data_set)

-    
+
 def download_rogers_girolami_data():
    if not data_available('rogers_girolami_data'):
        download_data(data_set)