Renamed some things, made some small (incorrect) gradient changes,

generalised the gp regression for any likelihood, and added a place holder link function waiting for Richardos changes
2026-05-10 20:42:39 +02:00 · 2013-09-11 11:54:15 +01:00 · 2013-09-11 11:54:15 +01:00 · 1dd83291fe
commit 1dd83291fe
parent 5b25273d2b
7 changed files with 83 additions and 53 deletions
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@ -25,9 +25,9 @@ def timing():
        edited_real_sd = real_sd
        kernel1 = GPy.kern.rbf(X.shape[1])
-        t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd)
+        t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
        corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
-        m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel1)
+        m = GPy.models.GPRegression(X, corrupt_stu_t_likelihood, kernel1)
        m.ensure_default_constraints()
        m.update_likelihood_approximation()
        m.optimize()
@ -54,9 +54,9 @@ def v_fail_test():
    edited_real_sd = real_sd
    print "Clean student t, rasm"
-    t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd)
+    t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GP(X, stu_t_likelihood, kernel1)
+    m = GPy.models.GPRegression(X, stu_t_likelihood, kernel1)
    m.constrain_positive('')
    vs = 25
    noises = 30
@ -94,16 +94,16 @@ def student_t_obj_plane():
    deg_free = 1000
    kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1])
-    mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp)
+    mgp = GPy.models.GPRegression(X, Y, kernel=kernelgp)
    mgp.ensure_default_constraints()
    mgp['noise'] = real_std**2
    print "Gaussian"
    print mgp
    kernelst = kernelgp.copy()
-    t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=(real_std**2))
+    t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=(real_std**2))
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GP(X, stu_t_likelihood, kernelst)
+    m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst)
    m.ensure_default_constraints()
    m.constrain_fixed('t_no', real_std**2)
    vs = 10
@ -144,7 +144,7 @@ def student_t_f_check():
    deg_free = 1000
    kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1])
-    mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp)
+    mgp = GPy.models.GPRegression(X, Y, kernel=kernelgp)
    mgp.ensure_default_constraints()
    mgp.randomize()
    mgp.optimize()
@ -154,9 +154,9 @@ def student_t_f_check():
    kernelst = kernelgp.copy()
    #kernelst += GPy.kern.bias(X.shape[1])
-    t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=0.05)
+    t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=0.05)
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GP(X, stu_t_likelihood, kernelst)
+    m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst)
    #m['rbf_v'] = mgp._get_params()[0]
    #m['rbf_l'] = mgp._get_params()[1] + 1
    m.ensure_default_constraints()
@ -198,7 +198,7 @@ def student_t_fix_optimise_check():
    #GP
    kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1])
-    mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp)
+    mgp = GPy.models.GPRegression(X, Y, kernel=kernelgp)
    mgp.ensure_default_constraints()
    mgp.randomize()
    mgp.optimize()
@ -206,12 +206,12 @@ def student_t_fix_optimise_check():
    kernelst = kernelgp.copy()
    real_stu_t_std2 = (real_std**2)*((deg_free - 2)/float(deg_free))
-    t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=real_stu_t_std2)
+    t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=real_stu_t_std2)
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
    plt.figure(1)
    plt.suptitle('Student likelihood')
-    m = GPy.models.GP(X, stu_t_likelihood, kernelst)
+    m = GPy.models.GPRegression(X, stu_t_likelihood, kernelst)
    m.constrain_fixed('rbf_var', mgp._get_params()[0])
    m.constrain_fixed('rbf_len', mgp._get_params()[1])
    m.constrain_positive('t_noise')
@ -331,7 +331,7 @@ def debug_student_t_noise_approx():
    print "Clean Gaussian"
    #A GP should completely break down due to the points as they get a lot of weight
    # create simple GP model
-    #m = GPy.models.GP_regression(X, Y, kernel=kernel1)
+    #m = GPy.models.GPRegression(X, Y, kernel=kernel1)
    ## optimize
    #m.ensure_default_constraints()
    #m.optimize()
@ -349,10 +349,10 @@ def debug_student_t_noise_approx():
    #edited_real_sd = real_sd
    print "Clean student t, rasm"
-    t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd)
+    t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GP(X, stu_t_likelihood, kernel6)
+    m = GPy.models.GPRegression(X, stu_t_likelihood, kernel6)
    #m['rbf_len'] = 1.5
    #m.constrain_fixed('rbf_v', 1.0898)
    #m.constrain_fixed('rbf_l', 0.2651)
@ -384,9 +384,9 @@ def debug_student_t_noise_approx():
    return m
    #print "Clean student t, ncg"
-    #t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd)
+    #t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
    #stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg')
-    #m = GPy.models.GP(X, stu_t_likelihood, kernel3)
+    #m = GPy.models.GPRegression(X, stu_t_likelihood, kernel3)
    #m.ensure_default_constraints()
    #m.update_likelihood_approximation()
    #m.optimize()
@ -453,7 +453,7 @@ def student_t_approx():
    print "Clean Gaussian"
    #A GP should completely break down due to the points as they get a lot of weight
    # create simple GP model
-    m = GPy.models.GP_regression(X, Y, kernel=kernel1)
+    m = GPy.models.GPRegression(X, Y, kernel=kernel1)
    # optimize
    m.ensure_default_constraints()
    m.optimize()
@ -466,7 +466,7 @@ def student_t_approx():
    #Corrupt
    print "Corrupt Gaussian"
-    m = GPy.models.GP_regression(X, Yc, kernel=kernel2)
+    m = GPy.models.GPRegression(X, Yc, kernel=kernel2)
    m.ensure_default_constraints()
    #m.optimize()
    plt.subplot(212)
@ -480,9 +480,9 @@ def student_t_approx():
    edited_real_sd = real_std #initial_var_guess
    print "Clean student t, rasm"
-    t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd)
+    t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
    stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GP(X, stu_t_likelihood, kernel6)
+    m = GPy.models.GPRegression(X, Y.copy(), kernel6, stu_t_likelihood)
    m.ensure_default_constraints()
    m.constrain_positive('t_noise')
    m.randomize()
@ -496,9 +496,9 @@ def student_t_approx():
    plt.title('Student-t rasm clean')
    print "Corrupt student t, rasm"
-    t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd)
+    t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
    corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
-    m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4)
+    m = GPy.models.GPRegression(X, Yc.copy(), kernel4, corrupt_stu_t_likelihood)
    m.ensure_default_constraints()
    m.constrain_positive('t_noise')
    m.randomize()
@ -514,9 +514,9 @@ def student_t_approx():
    return m
    #print "Clean student t, ncg"
-    #t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd)
+    #t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
    #stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg')
-    #m = GPy.models.GP(X, stu_t_likelihood, kernel3)
+    #m = GPy.models.GPRegression(X, stu_t_likelihood, kernel3)
    #m.ensure_default_constraints()
    #m.update_likelihood_approximation()
    #m.optimize()
@ -528,9 +528,9 @@ def student_t_approx():
    #plt.title('Student-t ncg clean')
    #print "Corrupt student t, ncg"
-    #t_distribution = GPy.likelihoods.likelihood_functions.Student_t(deg_free, sigma2=edited_real_sd)
+    #t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd)
    #corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='ncg')
-    #m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel5)
+    #m = GPy.models.GPRegression(X, corrupt_stu_t_likelihood, kernel5)
    #m.ensure_default_constraints()
    #m.update_likelihood_approximation()
    #m.optimize()
@ -582,7 +582,7 @@ def noisy_laplace_approx():
    #A GP should completely break down due to the points as they get a lot of weight
    # create simple GP model
-    m = GPy.models.GP_regression(X, Y)
+    m = GPy.models.GPRegression(X, Y)
    # optimize
    m.ensure_default_constraints()
@ -601,7 +601,7 @@ def gaussian_f_check():
    Y = np.sin(X*2*np.pi) + noise
    kernelgp = GPy.kern.rbf(X.shape[1]) # + GPy.kern.white(X.shape[1])
-    mgp = GPy.models.GP_regression(X, Y, kernel=kernelgp)
+    mgp = GPy.models.GPRegression(X, Y, kernel=kernelgp)
    mgp.ensure_default_constraints()
    mgp.randomize()
    mgp.optimize()
@ -612,9 +612,9 @@ def gaussian_f_check():
    kernelg = kernelgp.copy()
    #kernelst += GPy.kern.bias(X.shape[1])
    N, D = X.shape
-    g_distribution = GPy.likelihoods.likelihood_functions.Gaussian(variance=0.1, N=N, D=D)
+    g_distribution = GPy.likelihoods.functions.Gaussian(variance=0.1, N=N, D=D)
    g_likelihood = GPy.likelihoods.Laplace(Y.copy(), g_distribution, opt='rasm')
-    m = GPy.models.GP(X, g_likelihood, kernelg)
+    m = GPy.models.GPRegression(X, Y, kernelg, likelihood=g_likelihood)
    #m['rbf_v'] = mgp._get_params()[0]
    #m['rbf_l'] = mgp._get_params()[1] + 1
    m.ensure_default_constraints()
@ -624,14 +624,15 @@ def gaussian_f_check():
    #m.constrain_positive('bias')
    m.constrain_positive('noise_var')
    m.randomize()
    import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
    m['noise_variance'] = 0.1
-    m.likelihood.X = X
+    #m.likelihood.X = X
    plt.figure()
-    plt.subplot(211)
+    ax = plt.subplot(211)
-    m.plot()
+    m.plot(ax=ax)
-    plt.subplot(212)
+    ax = plt.subplot(212)
    m.optimize()
-    m.plot()
+    m.plot(ax=ax)
    print "final optimised gaussian"
    print m
    print "real GP"
--- a/GPy/likelihoods/init.py
+++ b/GPy/likelihoods/init.py
@ -1,4 +1,5 @@
 from ep import EP
 from laplace import Laplace
 from gaussian import Gaussian
 # TODO: from Laplace import Laplace
 import likelihood_functions as functions
--- a/GPy/likelihoods/laplace.py
+++ b/GPy/likelihoods/laplace.py
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@ -167,7 +167,7 @@ class Poisson(LikelihoodFunction):
        p_975 = tmp[:,1]
        return mean,np.nan*mean,p_025,p_975 # better variance here TODO
-class Student_t(LikelihoodFunction):
+class StudentT(LikelihoodFunction):
    """Student t likelihood distribution
    For nomanclature see Bayesian Data Analysis 2003 p576
@ -180,7 +180,11 @@ class Student_t(LikelihoodFunction):
    d2ln p(yi|fi)_d2fifj
    """
    def __init__(self, deg_free=5, sigma2=2, link=None):
-        super(Student_t, self).__init__(link)
+        self._analytical = None
        if not link:
            link = link_functions.Nothing()
        super(StudentT, self).__init__(link)
        self.v = deg_free
        self.sigma2 = sigma2
@ -413,6 +417,10 @@ class Gaussian(LikelihoodFunction):
    Gaussian likelihood - this is a test class for approximation schemes
    """
    def __init__(self, variance, D, N, link=None):
        self._analytical = None
        if not link:
            link = link_functions.Nothing()
        super(Gaussian, self).__init__(link)
        self.D = D
        self.N = N
@ -454,7 +462,7 @@ class Gaussian(LikelihoodFunction):
                     #- 0.5*np.sum(np.multiply(self.Ki, eeT))
                     - 0.5*np.dot(np.dot(e.T, self.Ki), e)
                     )
-        return np.sum(objective)
+        return np.sum(objective) # FIXME: put this back!
    def dlik_df(self, y, f, extra_data=None):
        """
@ -468,7 +476,7 @@ class Gaussian(LikelihoodFunction):
        """
        assert y.shape == f.shape
        s2_i = (1.0/self._variance)*self.I
-        grad = np.dot(s2_i, y) - 0.5*np.dot(s2_i, f)
+        grad = np.dot(s2_i, y) - np.dot(s2_i, f)
        return grad
    def d2lik_d2f(self, y, f, extra_data=None):
@ -486,7 +494,7 @@ class Gaussian(LikelihoodFunction):
        """
        assert y.shape == f.shape
        s2_i = (1.0/self._variance)*self.I
-        hess = 0.5*np.diag(-s2_i)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS?
+        hess = np.diag(-s2_i)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS?
        return hess
    def d3lik_d3f(self, y, f, extra_data=None):
@ -499,17 +507,17 @@ class Gaussian(LikelihoodFunction):
        d3lik_d3f = np.diagonal(0*self.I)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS?
        return d3lik_d3f
-    def lik_dstd(self, y, f, extra_data=None):
+    def lik_dvar(self, y, f, extra_data=None):
        """
        Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation)
        """
        assert y.shape == f.shape
        e = y - f
        s_4 = 1.0/(self._variance**2)
-        dlik_dsigma = -0.5*self.N*1/self._variance + 0.5*s_4*np.trace(np.dot(e.T, np.dot(self.I, e)))
+        dlik_dsigma = -0.5*self.N/self._variance + 0.5*s_4*np.trace(np.dot(e.T, np.dot(self.I, e)))
        return dlik_dsigma
-    def dlik_df_dstd(self, y, f, extra_data=None):
+    def dlik_df_dvar(self, y, f, extra_data=None):
        """
        Gradient of the dlik_df w.r.t sigma parameter (standard deviation)
        """
@ -518,7 +526,7 @@ class Gaussian(LikelihoodFunction):
        dlik_grad_dsigma = -np.dot(s_4, np.dot(self.I, y)) + 0.5*np.dot(s_4, np.dot(self.I, f))
        return dlik_grad_dsigma
-    def d2lik_d2f_dstd(self, y, f, extra_data=None):
+    def d2lik_d2f_dvar(self, y, f, extra_data=None):
        """
        Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation)
@ -530,9 +538,9 @@ class Gaussian(LikelihoodFunction):
    def _gradients(self, y, f, extra_data=None):
        #must be listed in same order as 'get_param_names'
-        derivs = ([self.lik_dstd(y, f, extra_data=extra_data)],
+        derivs = ([self.lik_dvar(y, f, extra_data=extra_data)],
-                  [self.dlik_df_dstd(y, f, extra_data=extra_data)],
+                  [self.dlik_df_dvar(y, f, extra_data=extra_data)],
-                  [self.d2lik_d2f_dstd(y, f, extra_data=extra_data)]
+                  [self.d2lik_d2f_dvar(y, f, extra_data=extra_data)]
                 ) # lists as we might learn many parameters
        # ensure we have gradients for every parameter we want to optimize
        assert len(derivs[0]) == len(self._get_param_names())
--- a/GPy/likelihoods/link_functions.py
+++ b/GPy/likelihoods/link_functions.py
@ -31,3 +31,16 @@ class Probit(LinkFunction):
    def log_inv_transf(self,f):
        pass
 class Nothing(LinkFunction):
    """
    Probit link function: Squashes a likelihood between 0 and 1
    """
    def transf(self,mu):
        return mu
    def inv_transf(self,f):
        return f
    def log_inv_transf(self,f):
        return np.log(f)
--- a/GPy/models/gp_regression.py
+++ b/GPy/models/gp_regression.py
@ -25,10 +25,11 @@ class GPRegression(GP):
    """
-    def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False):
+    def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, likelihood=None):
        if kernel is None:
            kernel = kern.rbf(X.shape[1])
        if likelihood is None:
            likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
        GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
@ -39,5 +40,3 @@ class GPRegression(GP):
    def setstate(self, state):
        return GP.setstate(self, state)
    pass
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -55,6 +55,14 @@ def dpotri(A, lower=0):
    """
    return lapack.dpotri(A, lower=lower)
 def pddet(A):
    """
    Determinant of a positive definite matrix, only symmetric matricies though
    """
    L = jitchol(A)
    logdetA = 2*sum(np.log(np.diag(L)))
    return logdetA
 def trace_dot(a, b):
    """
    efficiently compute the trace of the matrix product of a and b