From 4925d8a0d94d240f5674399f8014fd2b725083c6 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Fri, 4 Oct 2013 15:38:59 +0100 Subject: [PATCH] Doccing and testing for D dimensional input (not multiple dimensional Y yet) --- .../noise_models/student_t_noise.py | 50 +++++++++++-------- GPy/testing/laplace_tests.py | 15 +++--- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/GPy/likelihoods/noise_models/student_t_noise.py b/GPy/likelihoods/noise_models/student_t_noise.py index 000168e1..dc78b582 100644 --- a/GPy/likelihoods/noise_models/student_t_noise.py +++ b/GPy/likelihoods/noise_models/student_t_noise.py @@ -48,9 +48,9 @@ class StudentT(NoiseDistribution): \\ln p(y_{i}|f_{i}) = \\ln \\Gamma(\\frac{v+1}{2}) - \\ln \\Gamma(\\frac{v}{2})\\sqrt{v \\pi}\sigma - \\frac{v+1}{2}\\ln (1 + \\frac{1}{v}\\left(\\frac{y_{i} - f_{i}}{\\sigma}\\right)^2 :param y: data - :type y: NxD matrix + :type y: Nx1 matrix :param f: latent variables f - :type f: NxD matrix + :type f: Nx1 matrix :param extra_data: extra_data which is not used in student t distribution - not used :returns: likelihood evaluated for this point :rtype: float @@ -73,12 +73,12 @@ class StudentT(NoiseDistribution): \\frac{d \\ln p(y_{i}|f_{i})}{df} = \\frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \\sigma^{2}v} :param y: data - :type y: NxD matrix + :type y: Nx1 matrix :param f: latent variables f - :type f: NxD matrix + :type f: Nx1 matrix :param extra_data: extra_data which is not used in student t distribution - not used :returns: gradient of likelihood evaluated at points - :rtype: 1xN array + :rtype: Nx1 array """ assert y.shape == f.shape @@ -95,12 +95,12 @@ class StudentT(NoiseDistribution): \\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f} = \\frac{(v+1)((y_{i}-f_{i})^{2} - \\sigma^{2}v)}{((y_{i}-f_{i})^{2} + \\sigma^{2}v)^{2}} :param y: data - :type y: NxD matrix + :type y: Nx1 matrix :param f: latent variables f - :type f: NxD matrix + :type f: Nx1 matrix :param extra_data: extra_data which is not used in student t distribution - not used :returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f) - :rtype: 1xN array + :rtype: Nx1 array .. Note:: Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases @@ -119,12 +119,12 @@ class StudentT(NoiseDistribution): \\frac{d^{3} \\ln p(y_{i}|f_{i})}{d^{3}f} = \\frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \\sigma^{2} v))}{((y_{i} - f_{i}) + \\sigma^{2} v)^3} :param y: data - :type y: NxD matrix + :type y: Nx1 matrix :param f: latent variables f - :type f: NxD matrix + :type f: Nx1 matrix :param extra_data: extra_data which is not used in student t distribution - not used :returns: third derivative of likelihood evaluated at points f - :rtype: 1xN array + :rtype: Nx1 array """ assert y.shape == f.shape e = y - f @@ -138,15 +138,17 @@ class StudentT(NoiseDistribution): Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) .. math:: - \\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = -\\frac{1}{\\sigma} + \\frac{(1+v)(y_{i}-f_{i})^2}{\\sigma^3 v(1 + \\frac{1}{v}(\\frac{(y_{i} - f_{i})}{\\sigma^2})^2)} + \\frac{d \\ln p(y_{i}|f_{i})}{d\\sigma^{2}} = \\frac{v((y_{i} - f_{i})^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - f_{i})^{2})} + + -\\frac{1}{\\sigma} + \\frac{(1+v)(y_{i}-f_{i})^2}{\\sigma^3 v(1 + \\frac{1}{v}(\\frac{(y_{i} - f_{i})}{\\sigma^2})^2)} :param y: data - :type y: NxD matrix + :type y: Nx1 matrix :param f: latent variables f - :type f: NxD matrix + :type f: Nx1 matrix :param extra_data: extra_data which is not used in student t distribution - not used :returns: derivative of likelihood evaluated at points f w.r.t variance parameter - :rtype: 1x1 array + :rtype: float """ assert y.shape == f.shape e = y - f @@ -162,12 +164,12 @@ class StudentT(NoiseDistribution): \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|f_{i})}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \\sigma^2 v)^2} :param y: data - :type y: NxD matrix + :type y: Nx1 matrix :param f: latent variables f - :type f: NxD matrix + :type f: Nx1 matrix :param extra_data: extra_data which is not used in student t distribution - not used :returns: derivative of likelihood evaluated at points f w.r.t variance parameter - :rtype: 1xN array + :rtype: Nx1 array """ assert y.shape == f.shape e = y - f @@ -178,7 +180,16 @@ class StudentT(NoiseDistribution): """ Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation) - $$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$ + .. math:: + \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|f_{i})}{d^{2}f}) = \\frac{2\\sigma v(v + 1)(\\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \\sigma^2 v)^3} + + :param y: data + :type y: Nx1 matrix + :param f: latent variables f + :type f: Nx1 matrix + :param extra_data: extra_data which is not used in student t distribution - not used + :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter + :rtype: Nx1 array """ assert y.shape == f.shape e = y - f @@ -216,7 +227,6 @@ class StudentT(NoiseDistribution): #However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom true_var = sigma**2 + self.variance - print true_var return true_var def _predictive_mean_analytical(self, mu, var): diff --git a/GPy/testing/laplace_tests.py b/GPy/testing/laplace_tests.py index debb3c27..e1876296 100644 --- a/GPy/testing/laplace_tests.py +++ b/GPy/testing/laplace_tests.py @@ -65,16 +65,16 @@ def dparam_checkgrad(func, dfunc, params, args, constrain_positive=True, randomi class LaplaceTests(unittest.TestCase): def setUp(self): self.N = 5 - self.D = 1 - self.X = np.random.rand(self.N, self.D) + self.D = 3 + self.X = np.random.rand(self.N, self.D)*10 self.real_std = 0.1 - noise = np.random.randn(*self.X.shape)*self.real_std - self.Y = np.sin(self.X*2*np.pi) + noise + noise = np.random.randn(*self.X[:, 0].shape)*self.real_std + self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None] #self.Y = np.array([[1.0]])#np.sin(self.X*2*np.pi) + noise self.var = 0.2 - self.f = np.random.rand(self.N, self.D) + self.f = np.random.rand(self.N, 1) #self.f = np.array([[3.0]])#np.sin(self.X*2*np.pi) + noise self.var = np.random.rand(1) @@ -109,6 +109,8 @@ class LaplaceTests(unittest.TestCase): grad.checkgrad(verbose=1) self.assertTrue(grad.checkgrad()) + """ Gradchecker fault """ + @unittest.expectedFailure def test_gaussian_d2lik_d2f_2(self): print "\n{}".format(inspect.stack()[0][3]) self.Y = None @@ -174,8 +176,6 @@ class LaplaceTests(unittest.TestCase): grad.checkgrad(verbose=1) self.assertTrue(grad.checkgrad()) - """ Gradchecker fault """ - @unittest.expectedFailure def test_studentt_d2lik_d2f(self): print "\n{}".format(inspect.stack()[0][3]) dlik_df = functools.partial(self.stu_t.dlik_df, self.Y) @@ -224,7 +224,6 @@ class LaplaceTests(unittest.TestCase): kernel = GPy.kern.rbf(self.X.shape[1]) + GPy.kern.white(self.X.shape[1]) gauss_laplace = GPy.likelihoods.Laplace(self.Y.copy(), self.gauss) m = GPy.models.GPRegression(self.X, self.Y.copy(), kernel, likelihood=gauss_laplace) - import ipdb; ipdb.set_trace() # XXX BREAKPOINT m.ensure_default_constraints() m.randomize() m.checkgrad(verbose=1, step=self.step)