Rename Binomial to Bernoulli (maybe generalise it with the constant

later, but tilted distribution may change)
This commit is contained in:
Alan Saul 2013-10-17 15:04:55 +01:00
parent afd38df1ef
commit f3fd9f1325
9 changed files with 34 additions and 33 deletions

View file

@ -116,7 +116,7 @@ def toy_heaviside(seed=default_seed):
Y[Y.flatten() == -1] = 0
# Model definition
noise_model = GPy.likelihoods.binomial(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
likelihood = GPy.likelihoods.EP(Y,noise_model)
m = GPy.models.GPClassification(data['X'], likelihood=likelihood)

View file

@ -4,9 +4,9 @@
import numpy as np
import noise_models
def binomial(gp_link=None):
def bernoulli(gp_link=None):
"""
Construct a binomial likelihood
Construct a bernoulli likelihood
:param gp_link: a GPy gp_link function
"""
@ -27,11 +27,12 @@ def binomial(gp_link=None):
analytical_mean = False
analytical_variance = False
return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance)
return noise_models.bernoulli_noise.Bernoulli(gp_link,analytical_mean,analytical_variance)
def exponential(gp_link=None):
"""
Construct a binomial likelihood
Construct a exponential likelihood
:param gp_link: a GPy gp_link function
"""

View file

@ -1,5 +1,5 @@
import noise_distributions
import binomial_noise
import bernoulli_noise
import exponential_noise
import gaussian_noise
import gamma_noise

View file

@ -9,7 +9,7 @@ from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Binomial(NoiseDistribution):
class Bernoulli(NoiseDistribution):
"""
Probit likelihood
Y is expected to take values in {-1,1}
@ -19,7 +19,7 @@ class Binomial(NoiseDistribution):
$$
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance)
super(Bernoulli, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y):
"""
@ -31,7 +31,7 @@ class Binomial(NoiseDistribution):
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0,1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep

View file

@ -16,7 +16,7 @@ class FITCClassification(FITC):
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to Binomial with probit link function
:param likelihood: a GPy likelihood, defaults to Bernoulli with probit link function
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
@ -31,7 +31,7 @@ class FITCClassification(FITC):
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
if likelihood is None:
noise_model = likelihoods.binomial()
noise_model = likelihoods.bernoulli()
likelihood = likelihoods.EP(Y, noise_model)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):

View file

@ -15,7 +15,7 @@ class GPClassification(GP):
:param X: input observations
:param Y: observed values, can be None if likelihood is not None
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
:param likelihood: a GPy likelihood, defaults to Bernoulli with Probit link_function
:param kernel: a GPy kernel, defaults to rbf
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
@ -31,7 +31,7 @@ class GPClassification(GP):
kernel = kern.rbf(X.shape[1])
if likelihood is None:
noise_model = likelihoods.binomial()
noise_model = likelihoods.bernoulli()
likelihood = likelihoods.EP(Y, noise_model)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):

View file

@ -16,7 +16,7 @@ class SparseGPClassification(SparseGP):
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
:param likelihood: a GPy likelihood, defaults to Bernoulli with probit link_function
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
@ -31,7 +31,7 @@ class SparseGPClassification(SparseGP):
kernel = kern.rbf(X.shape[1])# + kern.white(X.shape[1],1e-3)
if likelihood is None:
noise_model = likelihoods.binomial()
noise_model = likelihoods.bernoulli()
likelihood = likelihoods.EP(Y, noise_model)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):

View file

@ -209,7 +209,7 @@ class GradientTests(unittest.TestCase):
Z = np.linspace(0, 15, 4)[:, None]
kernel = GPy.kern.rbf(1)
m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z)
#distribution = GPy.likelihoods.likelihood_functions.Binomial()
#distribution = GPy.likelihoods.likelihood_functions.Bernoulli()
#likelihood = GPy.likelihoods.EP(Y, distribution)
#m = GPy.core.SparseGP(X, likelihood, kernel, Z)
#m.ensure_default_constraints()

View file

@ -17,13 +17,13 @@ except ImportError:
import sys, urllib
def reporthook(a,b,c):
def reporthook(a,b,c):
# ',' at the end of the line is important!
#print "% 3.1f%% of %d bytes\r" % (min(100, float(a * b) / c * 100), c),
#you can also use sys.stdout.write
sys.stdout.write("\r% 3.1f%% of %d bytes" % (min(100, float(a * b) / c * 100), c))
sys.stdout.flush()
# Global variables
data_path = os.path.join(os.path.dirname(__file__), 'datasets')
default_seed = 10000
@ -39,7 +39,7 @@ data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
'license' : None,
'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""",
'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""},
'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'],
'files' : [['Index', 'housing.data', 'housing.names']],
'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""",
@ -164,14 +164,14 @@ def prompt_user(prompt):
print(prompt)
choice = raw_input().lower()
# would like to test for exception here, but not sure if we can do that without importing IPython
except:
except:
print('Stdin is not implemented.')
print('You need to set')
print('overide_manual_authorize=True')
print('to proceed with the download. Please set that variable and continue.')
raise
if choice in yes:
return True
elif choice in no:
@ -189,7 +189,7 @@ def data_available(dataset_name=None):
if not os.path.exists(os.path.join(data_path, dataset_name, file)):
return False
return True
def download_url(url, store_directory, save_name = None, messages = True, suffix=''):
"""Download a file from a url and save it to disk."""
i = url.rfind('/')
@ -249,18 +249,18 @@ def download_data(dataset_name=None):
for file in files:
download_url(os.path.join(url,file), dataset_name, dataset_name)
return True
def data_details_return(data, data_set):
"""Update the data component of the data dictionary with details drawn from the data_resources."""
data.update(data_resources[data_set])
return data
def cmu_urls_files(subj_motions, messages = True):
'''
Find which resources are missing on the local disk for the requested CMU motion capture motions.
Find which resources are missing on the local disk for the requested CMU motion capture motions.
'''
subjects_num = subj_motions[0]
motions_num = subj_motions[1]
@ -280,15 +280,15 @@ def cmu_urls_files(subj_motions, messages = True):
motions[i].append(curMot)
all_skels = []
assert len(subjects) == len(motions)
all_motions = []
for i in range(len(subjects)):
skel_dir = os.path.join(data_path, 'cmu_mocap')
cur_skel_file = os.path.join(skel_dir, subjects[i] + '.asf')
url_required = False
file_download = []
if not os.path.exists(cur_skel_file):
@ -332,7 +332,7 @@ if gpxpy_available:
points = [point for track in gpx.tracks for segment in track.segments for point in segment.points]
data = [[(point.time-datetime.datetime(2013,8,21)).total_seconds(), point.latitude, point.longitude, point.elevation] for point in points]
X.append(np.asarray(data)[::sample_every, :])
gpx_file.close()
gpx_file.close()
return data_details_return({'X' : X, 'info' : 'Data is an array containing time in seconds, latitude, longitude and elevation in that order.'}, data_set)
del gpxpy_available
@ -408,7 +408,7 @@ def oil(data_set='three_phase_oil_flow'):
return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'Xtest' : Xtest, 'Xvalid': Xvalid, 'Yvalid': Yvalid}, data_set)
#else:
# throw an error
def oil_100(seed=default_seed, data_set = 'three_phase_oil_flow'):
np.random.seed(seed=seed)
data = oil()
@ -622,7 +622,7 @@ def xw_pen(data_set='xw_pen'):
X = np.arange(485)[:, None]
return data_details_return({'Y': Y, 'X': X, 'info': "Tilt data from a personalized digital assistant pen. Plot in original paper showed regression between time steps 175 and 275."}, data_set)
def download_rogers_girolami_data():
if not data_available('rogers_girolami_data'):
download_data(data_set)