mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-05 14:55:15 +02:00
Rename Binomial to Bernoulli (maybe generalise it with the constant
later, but tilted distribution may change)
This commit is contained in:
parent
afd38df1ef
commit
f3fd9f1325
9 changed files with 34 additions and 33 deletions
|
|
@ -116,7 +116,7 @@ def toy_heaviside(seed=default_seed):
|
|||
Y[Y.flatten() == -1] = 0
|
||||
|
||||
# Model definition
|
||||
noise_model = GPy.likelihoods.binomial(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
|
||||
noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
|
||||
likelihood = GPy.likelihoods.EP(Y,noise_model)
|
||||
m = GPy.models.GPClassification(data['X'], likelihood=likelihood)
|
||||
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@
|
|||
import numpy as np
|
||||
import noise_models
|
||||
|
||||
def binomial(gp_link=None):
|
||||
def bernoulli(gp_link=None):
|
||||
"""
|
||||
Construct a binomial likelihood
|
||||
Construct a bernoulli likelihood
|
||||
|
||||
:param gp_link: a GPy gp_link function
|
||||
"""
|
||||
|
|
@ -27,11 +27,12 @@ def binomial(gp_link=None):
|
|||
analytical_mean = False
|
||||
analytical_variance = False
|
||||
|
||||
return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance)
|
||||
return noise_models.bernoulli_noise.Bernoulli(gp_link,analytical_mean,analytical_variance)
|
||||
|
||||
def exponential(gp_link=None):
|
||||
|
||||
"""
|
||||
Construct a binomial likelihood
|
||||
Construct a exponential likelihood
|
||||
|
||||
:param gp_link: a GPy gp_link function
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import noise_distributions
|
||||
import binomial_noise
|
||||
import bernoulli_noise
|
||||
import exponential_noise
|
||||
import gaussian_noise
|
||||
import gamma_noise
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
|||
import gp_transformations
|
||||
from noise_distributions import NoiseDistribution
|
||||
|
||||
class Binomial(NoiseDistribution):
|
||||
class Bernoulli(NoiseDistribution):
|
||||
"""
|
||||
Probit likelihood
|
||||
Y is expected to take values in {-1,1}
|
||||
|
|
@ -19,7 +19,7 @@ class Binomial(NoiseDistribution):
|
|||
$$
|
||||
"""
|
||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
||||
super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||
super(Bernoulli, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||
|
||||
def _preprocess_values(self,Y):
|
||||
"""
|
||||
|
|
@ -31,7 +31,7 @@ class Binomial(NoiseDistribution):
|
|||
Y_prep = Y.copy()
|
||||
Y1 = Y[Y.flatten()==1].size
|
||||
Y2 = Y[Y.flatten()==0].size
|
||||
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
|
||||
assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0,1}.'
|
||||
Y_prep[Y.flatten() == 0] = -1
|
||||
return Y_prep
|
||||
|
||||
|
|
@ -16,7 +16,7 @@ class FITCClassification(FITC):
|
|||
|
||||
:param X: input observations
|
||||
:param Y: observed values
|
||||
:param likelihood: a GPy likelihood, defaults to Binomial with probit link function
|
||||
:param likelihood: a GPy likelihood, defaults to Bernoulli with probit link function
|
||||
:param kernel: a GPy kernel, defaults to rbf+white
|
||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||
:type normalize_X: False|True
|
||||
|
|
@ -31,7 +31,7 @@ class FITCClassification(FITC):
|
|||
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
|
||||
|
||||
if likelihood is None:
|
||||
noise_model = likelihoods.binomial()
|
||||
noise_model = likelihoods.bernoulli()
|
||||
likelihood = likelihoods.EP(Y, noise_model)
|
||||
elif Y is not None:
|
||||
if not all(Y.flatten() == likelihood.data.flatten()):
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class GPClassification(GP):
|
|||
|
||||
:param X: input observations
|
||||
:param Y: observed values, can be None if likelihood is not None
|
||||
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
|
||||
:param likelihood: a GPy likelihood, defaults to Bernoulli with Probit link_function
|
||||
:param kernel: a GPy kernel, defaults to rbf
|
||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||
:type normalize_X: False|True
|
||||
|
|
@ -31,7 +31,7 @@ class GPClassification(GP):
|
|||
kernel = kern.rbf(X.shape[1])
|
||||
|
||||
if likelihood is None:
|
||||
noise_model = likelihoods.binomial()
|
||||
noise_model = likelihoods.bernoulli()
|
||||
likelihood = likelihoods.EP(Y, noise_model)
|
||||
elif Y is not None:
|
||||
if not all(Y.flatten() == likelihood.data.flatten()):
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ class SparseGPClassification(SparseGP):
|
|||
|
||||
:param X: input observations
|
||||
:param Y: observed values
|
||||
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
|
||||
:param likelihood: a GPy likelihood, defaults to Bernoulli with probit link_function
|
||||
:param kernel: a GPy kernel, defaults to rbf+white
|
||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||
:type normalize_X: False|True
|
||||
|
|
@ -31,7 +31,7 @@ class SparseGPClassification(SparseGP):
|
|||
kernel = kern.rbf(X.shape[1])# + kern.white(X.shape[1],1e-3)
|
||||
|
||||
if likelihood is None:
|
||||
noise_model = likelihoods.binomial()
|
||||
noise_model = likelihoods.bernoulli()
|
||||
likelihood = likelihoods.EP(Y, noise_model)
|
||||
elif Y is not None:
|
||||
if not all(Y.flatten() == likelihood.data.flatten()):
|
||||
|
|
|
|||
|
|
@ -209,7 +209,7 @@ class GradientTests(unittest.TestCase):
|
|||
Z = np.linspace(0, 15, 4)[:, None]
|
||||
kernel = GPy.kern.rbf(1)
|
||||
m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z)
|
||||
#distribution = GPy.likelihoods.likelihood_functions.Binomial()
|
||||
#distribution = GPy.likelihoods.likelihood_functions.Bernoulli()
|
||||
#likelihood = GPy.likelihoods.EP(Y, distribution)
|
||||
#m = GPy.core.SparseGP(X, likelihood, kernel, Z)
|
||||
#m.ensure_default_constraints()
|
||||
|
|
|
|||
|
|
@ -17,13 +17,13 @@ except ImportError:
|
|||
|
||||
import sys, urllib
|
||||
|
||||
def reporthook(a,b,c):
|
||||
def reporthook(a,b,c):
|
||||
# ',' at the end of the line is important!
|
||||
#print "% 3.1f%% of %d bytes\r" % (min(100, float(a * b) / c * 100), c),
|
||||
#you can also use sys.stdout.write
|
||||
sys.stdout.write("\r% 3.1f%% of %d bytes" % (min(100, float(a * b) / c * 100), c))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
# Global variables
|
||||
data_path = os.path.join(os.path.dirname(__file__), 'datasets')
|
||||
default_seed = 10000
|
||||
|
|
@ -39,7 +39,7 @@ data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
|
|||
'license' : None,
|
||||
'citation' : """3D Human Pose from Silhouettes by Relevance Vector Regression (In CVPR'04). A. Agarwal and B. Triggs.""",
|
||||
'details' : """Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing."""},
|
||||
|
||||
|
||||
'boston_housing' : {'urls' : ['http://archive.ics.uci.edu/ml/machine-learning-databases/housing/'],
|
||||
'files' : [['Index', 'housing.data', 'housing.names']],
|
||||
'citation' : """Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.""",
|
||||
|
|
@ -164,14 +164,14 @@ def prompt_user(prompt):
|
|||
print(prompt)
|
||||
choice = raw_input().lower()
|
||||
# would like to test for exception here, but not sure if we can do that without importing IPython
|
||||
except:
|
||||
except:
|
||||
print('Stdin is not implemented.')
|
||||
print('You need to set')
|
||||
print('overide_manual_authorize=True')
|
||||
print('to proceed with the download. Please set that variable and continue.')
|
||||
raise
|
||||
|
||||
|
||||
|
||||
if choice in yes:
|
||||
return True
|
||||
elif choice in no:
|
||||
|
|
@ -189,7 +189,7 @@ def data_available(dataset_name=None):
|
|||
if not os.path.exists(os.path.join(data_path, dataset_name, file)):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def download_url(url, store_directory, save_name = None, messages = True, suffix=''):
|
||||
"""Download a file from a url and save it to disk."""
|
||||
i = url.rfind('/')
|
||||
|
|
@ -249,18 +249,18 @@ def download_data(dataset_name=None):
|
|||
for file in files:
|
||||
download_url(os.path.join(url,file), dataset_name, dataset_name)
|
||||
return True
|
||||
|
||||
|
||||
def data_details_return(data, data_set):
|
||||
"""Update the data component of the data dictionary with details drawn from the data_resources."""
|
||||
data.update(data_resources[data_set])
|
||||
return data
|
||||
|
||||
|
||||
|
||||
def cmu_urls_files(subj_motions, messages = True):
|
||||
'''
|
||||
Find which resources are missing on the local disk for the requested CMU motion capture motions.
|
||||
Find which resources are missing on the local disk for the requested CMU motion capture motions.
|
||||
'''
|
||||
|
||||
|
||||
subjects_num = subj_motions[0]
|
||||
motions_num = subj_motions[1]
|
||||
|
||||
|
|
@ -280,15 +280,15 @@ def cmu_urls_files(subj_motions, messages = True):
|
|||
motions[i].append(curMot)
|
||||
|
||||
all_skels = []
|
||||
|
||||
|
||||
assert len(subjects) == len(motions)
|
||||
|
||||
|
||||
all_motions = []
|
||||
|
||||
|
||||
for i in range(len(subjects)):
|
||||
skel_dir = os.path.join(data_path, 'cmu_mocap')
|
||||
cur_skel_file = os.path.join(skel_dir, subjects[i] + '.asf')
|
||||
|
||||
|
||||
url_required = False
|
||||
file_download = []
|
||||
if not os.path.exists(cur_skel_file):
|
||||
|
|
@ -332,7 +332,7 @@ if gpxpy_available:
|
|||
points = [point for track in gpx.tracks for segment in track.segments for point in segment.points]
|
||||
data = [[(point.time-datetime.datetime(2013,8,21)).total_seconds(), point.latitude, point.longitude, point.elevation] for point in points]
|
||||
X.append(np.asarray(data)[::sample_every, :])
|
||||
gpx_file.close()
|
||||
gpx_file.close()
|
||||
return data_details_return({'X' : X, 'info' : 'Data is an array containing time in seconds, latitude, longitude and elevation in that order.'}, data_set)
|
||||
|
||||
del gpxpy_available
|
||||
|
|
@ -408,7 +408,7 @@ def oil(data_set='three_phase_oil_flow'):
|
|||
return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'Xtest' : Xtest, 'Xvalid': Xvalid, 'Yvalid': Yvalid}, data_set)
|
||||
#else:
|
||||
# throw an error
|
||||
|
||||
|
||||
def oil_100(seed=default_seed, data_set = 'three_phase_oil_flow'):
|
||||
np.random.seed(seed=seed)
|
||||
data = oil()
|
||||
|
|
@ -622,7 +622,7 @@ def xw_pen(data_set='xw_pen'):
|
|||
X = np.arange(485)[:, None]
|
||||
return data_details_return({'Y': Y, 'X': X, 'info': "Tilt data from a personalized digital assistant pen. Plot in original paper showed regression between time steps 175 and 275."}, data_set)
|
||||
|
||||
|
||||
|
||||
def download_rogers_girolami_data():
|
||||
if not data_available('rogers_girolami_data'):
|
||||
download_data(data_set)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue