From 1dd83291fef489e2c44d6ccb0d4a1ba8a6776bc6 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Wed, 11 Sep 2013 11:54:15 +0100 Subject: [PATCH] Renamed some things, made some small (incorrect) gradient changes, generalised the gp regression for any likelihood, and added a place holder link function waiting for Richardos changes --- GPy/examples/laplace_approximations.py | 75 +++++++++++----------- GPy/likelihoods/__init__.py | 1 + GPy/likelihoods/{Laplace.py => laplace.py} | 0 GPy/likelihoods/likelihood_functions.py | 32 +++++---- GPy/likelihoods/link_functions.py | 13 ++++ GPy/models/gp_regression.py | 7 +- GPy/util/linalg.py | 8 +++ 7 files changed, 83 insertions(+), 53 deletions(-) rename GPy/likelihoods/{Laplace.py => laplace.py} (100%) diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py index b6443664..c0bc3aef 100644 --- a/GPy/examples/laplace_approximations.py +++ b/GPy/examples/laplace_approximations.py @@ -25,9 +25,9 @@ def timing(): edited_real_sd = real_sd kernel1 = GPy.kern.rbf(X.shape[1]) - t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd) + t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm') - m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel1) + m = GPy.models.GPRegression(X, corrupt_stu_t_likelihood, kernel1) m.ensure_default_constraints() m.update_likelihood_approximation() m.optimize() @@ -54,9 +54,9 @@ def v_fail_test(): edited_real_sd = real_sd print "Clean student t, rasm" - t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd) + t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') - m = GPy.models.GP(X, stu_t_likelihood, kernel1) + m = GPy.models.GPRegression(X, stu_t_likelihood, kernel1) m.constrain_positive('') vs = 25 noises = 30 @@ -94,16 +94,16 @@ def student_t_obj_plane(): deg_free = 1000 kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1]) - mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp) + mgp = GPy.models.GPRegression(X, Y, kernel=kernelgp) mgp.ensure_default_constraints() mgp['noise'] = real_std**2 print "Gaussian" print mgp kernelst = kernelgp.copy() - t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=(real_std**2)) + t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=(real_std**2)) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') - m = GPy.models.GP(X, stu_t_likelihood, kernelst) + m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst) m.ensure_default_constraints() m.constrain_fixed('t_no', real_std**2) vs = 10 @@ -144,7 +144,7 @@ def student_t_f_check(): deg_free = 1000 kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1]) - mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp) + mgp = GPy.models.GPRegression(X, Y, kernel=kernelgp) mgp.ensure_default_constraints() mgp.randomize() mgp.optimize() @@ -154,9 +154,9 @@ def student_t_f_check(): kernelst = kernelgp.copy() #kernelst += GPy.kern.bias(X.shape[1]) - t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=0.05) + t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=0.05) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') - m = GPy.models.GP(X, stu_t_likelihood, kernelst) + m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst) #m['rbf_v'] = mgp._get_params()[0] #m['rbf_l'] = mgp._get_params()[1] + 1 m.ensure_default_constraints() @@ -198,7 +198,7 @@ def student_t_fix_optimise_check(): #GP kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1]) - mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp) + mgp = GPy.models.GPRegression(X, Y, kernel=kernelgp) mgp.ensure_default_constraints() mgp.randomize() mgp.optimize() @@ -206,12 +206,12 @@ def student_t_fix_optimise_check(): kernelst = kernelgp.copy() real_stu_t_std2 = (real_std**2)*((deg_free - 2)/float(deg_free)) - t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=real_stu_t_std2) + t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=real_stu_t_std2) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') plt.figure(1) plt.suptitle('Student likelihood') - m = GPy.models.GP(X, stu_t_likelihood, kernelst) + m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst) m.constrain_fixed('rbf_var', mgp._get_params()[0]) m.constrain_fixed('rbf_len', mgp._get_params()[1]) m.constrain_positive('t_noise') @@ -331,7 +331,7 @@ def debug_student_t_noise_approx(): print "Clean Gaussian" #A GP should completely break down due to the points as they get a lot of weight # create simple GP model - #m = GPy.models.GP_regression(X, Y, kernel=kernel1) + #m = GPy.models.GPRegression(X, Y, kernel=kernel1) ## optimize #m.ensure_default_constraints() #m.optimize() @@ -349,10 +349,10 @@ def debug_student_t_noise_approx(): #edited_real_sd = real_sd print "Clean student t, rasm" - t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd) + t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') - m = GPy.models.GP(X, stu_t_likelihood, kernel6) + m = GPy.models.GPRegression(X, stu_t_likelihood, kernel6) #m['rbf_len'] = 1.5 #m.constrain_fixed('rbf_v', 1.0898) #m.constrain_fixed('rbf_l', 0.2651) @@ -384,9 +384,9 @@ def debug_student_t_noise_approx(): return m #print "Clean student t, ncg" - #t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd) + #t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) #stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg') - #m = GPy.models.GP(X, stu_t_likelihood, kernel3) + #m = GPy.models.GPRegression(X, stu_t_likelihood, kernel3) #m.ensure_default_constraints() #m.update_likelihood_approximation() #m.optimize() @@ -453,7 +453,7 @@ def student_t_approx(): print "Clean Gaussian" #A GP should completely break down due to the points as they get a lot of weight # create simple GP model - m = GPy.models.GP_regression(X, Y, kernel=kernel1) + m = GPy.models.GPRegression(X, Y, kernel=kernel1) # optimize m.ensure_default_constraints() m.optimize() @@ -466,7 +466,7 @@ def student_t_approx(): #Corrupt print "Corrupt Gaussian" - m = GPy.models.GP_regression(X, Yc, kernel=kernel2) + m = GPy.models.GPRegression(X, Yc, kernel=kernel2) m.ensure_default_constraints() #m.optimize() plt.subplot(212) @@ -480,9 +480,9 @@ def student_t_approx(): edited_real_sd = real_std #initial_var_guess print "Clean student t, rasm" - t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd) + t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') - m = GPy.models.GP(X, stu_t_likelihood, kernel6) + m = GPy.models.GPRegression(X, Y.copy(), kernel6, stu_t_likelihood) m.ensure_default_constraints() m.constrain_positive('t_noise') m.randomize() @@ -496,9 +496,9 @@ def student_t_approx(): plt.title('Student-t rasm clean') print "Corrupt student t, rasm" - t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd) + t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm') - m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4) + m = GPy.models.GPRegression(X, Yc.copy(), kernel4, corrupt_stu_t_likelihood) m.ensure_default_constraints() m.constrain_positive('t_noise') m.randomize() @@ -514,9 +514,9 @@ def student_t_approx(): return m #print "Clean student t, ncg" - #t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd) + #t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) #stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg') - #m = GPy.models.GP(X, stu_t_likelihood, kernel3) + #m = GPy.models.GPRegression(X, stu_t_likelihood, kernel3) #m.ensure_default_constraints() #m.update_likelihood_approximation() #m.optimize() @@ -528,9 +528,9 @@ def student_t_approx(): #plt.title('Student-t ncg clean') #print "Corrupt student t, ncg" - #t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd) + #t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) #corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='ncg') - #m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel5) + #m = GPy.models.GPRegression(X, corrupt_stu_t_likelihood, kernel5) #m.ensure_default_constraints() #m.update_likelihood_approximation() #m.optimize() @@ -582,7 +582,7 @@ def noisy_laplace_approx(): #A GP should completely break down due to the points as they get a lot of weight # create simple GP model - m = GPy.models.GP_regression(X, Y) + m = GPy.models.GPRegression(X, Y) # optimize m.ensure_default_constraints() @@ -601,7 +601,7 @@ def gaussian_f_check(): Y = np.sin(X*2*np.pi) + noise kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1]) - mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp) + mgp = GPy.models.GPRegression(X, Y, kernel=kernelgp) mgp.ensure_default_constraints() mgp.randomize() mgp.optimize() @@ -612,9 +612,9 @@ def gaussian_f_check(): kernelg = kernelgp.copy() #kernelst += GPy.kern.bias(X.shape[1]) N, D = X.shape - g_distribution = GPy.likelihoods.likelihood_functions.Gaussian(variance=0.1, N=N, D=D) + g_distribution = GPy.likelihoods.functions.Gaussian(variance=0.1, N=N, D=D) g_likelihood = GPy.likelihoods.Laplace(Y.copy(), g_distribution, opt='rasm') - m = GPy.models.GP(X, g_likelihood, kernelg) + m = GPy.models.GPRegression(X, Y, kernelg, likelihood=g_likelihood) #m['rbf_v'] = mgp._get_params()[0] #m['rbf_l'] = mgp._get_params()[1] + 1 m.ensure_default_constraints() @@ -624,14 +624,15 @@ def gaussian_f_check(): #m.constrain_positive('bias') m.constrain_positive('noise_var') m.randomize() + import ipdb; ipdb.set_trace() # XXX BREAKPOINT m['noise_variance'] = 0.1 - m.likelihood.X = X + #m.likelihood.X = X plt.figure() - plt.subplot(211) - m.plot() - plt.subplot(212) + ax = plt.subplot(211) + m.plot(ax=ax) + ax = plt.subplot(212) m.optimize() - m.plot() + m.plot(ax=ax) print "final optimised gaussian" print m print "real GP" diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 99e88b6d..5d4e31f7 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -1,4 +1,5 @@ from ep import EP +from laplace import Laplace from gaussian import Gaussian # TODO: from Laplace import Laplace import likelihood_functions as functions diff --git a/GPy/likelihoods/Laplace.py b/GPy/likelihoods/laplace.py similarity index 100% rename from GPy/likelihoods/Laplace.py rename to GPy/likelihoods/laplace.py diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index 5d270b2b..06735a9c 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -167,7 +167,7 @@ class Poisson(LikelihoodFunction): p_975 = tmp[:,1] return mean,np.nan*mean,p_025,p_975 # better variance here TODO -class Student_t(LikelihoodFunction): +class StudentT(LikelihoodFunction): """Student t likelihood distribution For nomanclature see Bayesian Data Analysis 2003 p576 @@ -180,7 +180,11 @@ class Student_t(LikelihoodFunction): d2ln p(yi|fi)_d2fifj """ def __init__(self, deg_free=5, sigma2=2, link=None): - super(Student_t, self).__init__(link) + self._analytical = None + if not link: + link = link_functions.Nothing() + + super(StudentT, self).__init__(link) self.v = deg_free self.sigma2 = sigma2 @@ -413,6 +417,10 @@ class Gaussian(LikelihoodFunction): Gaussian likelihood - this is a test class for approximation schemes """ def __init__(self, variance, D, N, link=None): + self._analytical = None + if not link: + link = link_functions.Nothing() + super(Gaussian, self).__init__(link) self.D = D self.N = N @@ -454,7 +462,7 @@ class Gaussian(LikelihoodFunction): #- 0.5*np.sum(np.multiply(self.Ki, eeT)) - 0.5*np.dot(np.dot(e.T, self.Ki), e) ) - return np.sum(objective) + return np.sum(objective) # FIXME: put this back! def dlik_df(self, y, f, extra_data=None): """ @@ -468,7 +476,7 @@ class Gaussian(LikelihoodFunction): """ assert y.shape == f.shape s2_i = (1.0/self._variance)*self.I - grad = np.dot(s2_i, y) - 0.5*np.dot(s2_i, f) + grad = np.dot(s2_i, y) - np.dot(s2_i, f) return grad def d2lik_d2f(self, y, f, extra_data=None): @@ -486,7 +494,7 @@ class Gaussian(LikelihoodFunction): """ assert y.shape == f.shape s2_i = (1.0/self._variance)*self.I - hess = 0.5*np.diag(-s2_i)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS? + hess = np.diag(-s2_i)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS? return hess def d3lik_d3f(self, y, f, extra_data=None): @@ -499,17 +507,17 @@ class Gaussian(LikelihoodFunction): d3lik_d3f = np.diagonal(0*self.I)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS? return d3lik_d3f - def lik_dstd(self, y, f, extra_data=None): + def lik_dvar(self, y, f, extra_data=None): """ Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation) """ assert y.shape == f.shape e = y - f s_4 = 1.0/(self._variance**2) - dlik_dsigma = -0.5*self.N*1/self._variance + 0.5*s_4*np.trace(np.dot(e.T, np.dot(self.I, e))) + dlik_dsigma = -0.5*self.N/self._variance + 0.5*s_4*np.trace(np.dot(e.T, np.dot(self.I, e))) return dlik_dsigma - def dlik_df_dstd(self, y, f, extra_data=None): + def dlik_df_dvar(self, y, f, extra_data=None): """ Gradient of the dlik_df w.r.t sigma parameter (standard deviation) """ @@ -518,7 +526,7 @@ class Gaussian(LikelihoodFunction): dlik_grad_dsigma = -np.dot(s_4, np.dot(self.I, y)) + 0.5*np.dot(s_4, np.dot(self.I, f)) return dlik_grad_dsigma - def d2lik_d2f_dstd(self, y, f, extra_data=None): + def d2lik_d2f_dvar(self, y, f, extra_data=None): """ Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation) @@ -530,9 +538,9 @@ class Gaussian(LikelihoodFunction): def _gradients(self, y, f, extra_data=None): #must be listed in same order as 'get_param_names' - derivs = ([self.lik_dstd(y, f, extra_data=extra_data)], - [self.dlik_df_dstd(y, f, extra_data=extra_data)], - [self.d2lik_d2f_dstd(y, f, extra_data=extra_data)] + derivs = ([self.lik_dvar(y, f, extra_data=extra_data)], + [self.dlik_df_dvar(y, f, extra_data=extra_data)], + [self.d2lik_d2f_dvar(y, f, extra_data=extra_data)] ) # lists as we might learn many parameters # ensure we have gradients for every parameter we want to optimize assert len(derivs[0]) == len(self._get_param_names()) diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py index 3b9a55b2..826983a9 100644 --- a/GPy/likelihoods/link_functions.py +++ b/GPy/likelihoods/link_functions.py @@ -31,3 +31,16 @@ class Probit(LinkFunction): def log_inv_transf(self,f): pass + +class Nothing(LinkFunction): + """ + Probit link function: Squashes a likelihood between 0 and 1 + """ + def transf(self,mu): + return mu + + def inv_transf(self,f): + return f + + def log_inv_transf(self,f): + return np.log(f) diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py index 86e1f7de..633fc1c8 100644 --- a/GPy/models/gp_regression.py +++ b/GPy/models/gp_regression.py @@ -25,11 +25,12 @@ class GPRegression(GP): """ - def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False): + def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, likelihood=None): if kernel is None: kernel = kern.rbf(X.shape[1]) - likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y) + if likelihood is None: + likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y) GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) self.ensure_default_constraints() @@ -39,5 +40,3 @@ class GPRegression(GP): def setstate(self, state): return GP.setstate(self, state) - - pass diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 19cf6545..8331933d 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -55,6 +55,14 @@ def dpotri(A, lower=0): """ return lapack.dpotri(A, lower=lower) +def pddet(A): + """ + Determinant of a positive definite matrix, only symmetric matricies though + """ + L = jitchol(A) + logdetA = 2*sum(np.log(np.diag(L))) + return logdetA + def trace_dot(a, b): """ efficiently compute the trace of the matrix product of a and b