From 9364efc755405fdb3b424f4e3ffc01e68694b31e Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Tue, 30 Jul 2013 16:11:03 +0100 Subject: [PATCH] Started adding gaussian sanity checker --- GPy/examples/laplace_approximations.py | 10 ++-- GPy/likelihoods/Laplace.py | 80 +++++++++++++------------ GPy/likelihoods/likelihood_functions.py | 58 +++++------------- 3 files changed, 60 insertions(+), 88 deletions(-) diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py index 2b93122c..e8b6419f 100644 --- a/GPy/examples/laplace_approximations.py +++ b/GPy/examples/laplace_approximations.py @@ -168,23 +168,23 @@ def student_t_f_check(): m.randomize() m['t_no'] = 0.3 m.likelihood.X = X - print m + #print m plt.figure() plt.subplot(511) m.plot() - print m + #print m plt.subplot(512) m.optimize(max_f_eval=15) m.plot() - print m + #print m plt.subplot(513) m.optimize(max_f_eval=15) m.plot() - print m + #print m plt.subplot(514) m.optimize(max_f_eval=15) m.plot() - print m + #print m plt.subplot(515) m.optimize() m.plot() diff --git a/GPy/likelihoods/Laplace.py b/GPy/likelihoods/Laplace.py index f86c47b6..aeda17da 100644 --- a/GPy/likelihoods/Laplace.py +++ b/GPy/likelihoods/Laplace.py @@ -89,7 +89,8 @@ class Laplace(likelihood): expl = 0.5*expl_a + 0.5*expl_b # Might need to be -? dL_dthetaK_exp = dK_dthetaK(expl, X) dL_dthetaK_imp = dK_dthetaK(impl, X) - print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp) + #print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp) + #print "expl_a: {}, {} expl_b: {}, {}".format(np.mean(expl_a), np.std(expl_a), np.mean(expl_b), np.std(expl_b)) dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp return dL_dthetaK @@ -165,8 +166,7 @@ class Laplace(likelihood): self.aA = 0.5*self.ln_det_K_Wi__Bi self.bB = - 0.5*self.f_Ki_f self.cC = 0.5*self.y_Wi_Ki_i_y - Z_tilde = (#+ 100*self.NORMAL_CONST - + self.lik + Z_tilde = (+ self.lik + 0.5*self.ln_det_K_Wi__Bi - 0.5*self.f_Ki_f + 0.5*self.y_Wi_Ki_i_y @@ -379,7 +379,8 @@ class Laplace(likelihood): #difference = abs(new_obj - old_obj) #old_obj = new_obj.copy() - difference = np.abs(np.sum(f - f_old)) + #difference = np.abs(np.sum(f - f_old)) + difference = np.abs(np.sum(a - old_a)) #old_a = self.a.copy() #a old_a = a.copy() i += 1 @@ -391,42 +392,43 @@ class Laplace(likelihood): print "Iterations: {}, Final_difference: {}".format(i, difference) if difference > 1e-4: print "FAIL FAIL FAIL FAIL FAIL FAIL" - import ipdb; ipdb.set_trace() ### XXX BREAKPOINT - if hasattr(self, 'X'): - import pylab as pb - pb.figure() - pb.subplot(311) - pb.title('old f_hat') - pb.plot(self.X, self.f_hat) - pb.subplot(312) - pb.title('old ff') - pb.plot(self.X, self.old_ff) - pb.subplot(313) - pb.title('new f_hat') - pb.plot(self.X, f) - - pb.figure() - pb.subplot(121) - pb.title('old K') - pb.imshow(np.diagflat(self.old_K), interpolation='none') - pb.colorbar() - pb.subplot(122) - pb.title('new K') - pb.imshow(np.diagflat(K), interpolation='none') - pb.colorbar() - - pb.figure() - pb.subplot(121) - pb.title('old W') - pb.imshow(np.diagflat(self.old_W), interpolation='none') - pb.colorbar() - pb.subplot(122) - pb.title('new W') - pb.imshow(np.diagflat(W), interpolation='none') - pb.colorbar() - + if False: import ipdb; ipdb.set_trace() ### XXX BREAKPOINT - pb.close('all') + if hasattr(self, 'X'): + import pylab as pb + pb.figure() + pb.subplot(311) + pb.title('old f_hat') + pb.plot(self.X, self.f_hat) + pb.subplot(312) + pb.title('old ff') + pb.plot(self.X, self.old_ff) + pb.subplot(313) + pb.title('new f_hat') + pb.plot(self.X, f) + + pb.figure() + pb.subplot(121) + pb.title('old K') + pb.imshow(np.diagflat(self.old_K), interpolation='none') + pb.colorbar() + pb.subplot(122) + pb.title('new K') + pb.imshow(np.diagflat(K), interpolation='none') + pb.colorbar() + + pb.figure() + pb.subplot(121) + pb.title('old W') + pb.imshow(np.diagflat(self.old_W), interpolation='none') + pb.colorbar() + pb.subplot(122) + pb.title('new W') + pb.imshow(np.diagflat(W), interpolation='none') + pb.colorbar() + + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT + pb.close('all') #FIXME: DELETE THESE self.old_W = W.copy() diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index 62e09a1a..42af9c8d 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -239,7 +239,7 @@ class student_t(likelihood_function): """ assert y.shape == f.shape e = y - f - hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / (((self.sigma2*self.v) + e**2)**2) + hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) return hess def d3lik_d3f(self, y, f, extra_data=None): @@ -277,7 +277,7 @@ class student_t(likelihood_function): """ assert y.shape == f.shape e = y - f - dlik_grad_dsigma = (-self.v*(self.v+1)*e)/((self.sigma2*self.v + e**2)**2) + dlik_grad_dsigma = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) return dlik_grad_dsigma def d2lik_d2f_dstd(self, y, f, extra_data=None): @@ -289,7 +289,7 @@ class student_t(likelihood_function): assert y.shape == f.shape e = y - f dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) - / (self.sigma2*self.v + (e**2))**3 + / ((self.sigma2*self.v + (e**2))**3) ) return dlik_hess_dsigma @@ -479,7 +479,8 @@ class gaussian(likelihood_function): def _set_params(self, x): self._variance = float(x) - self.covariance_matrix = np.eye(self.N) * self._variance + self.I = np.eye(self.N) + self.covariance_matrix = self.I * self._variance self.Ki, _, _, self.ln_K = pdinv(self.covariance_matrix) # THIS MAY BE WRONG def link_function(self, y, f, extra_data=None): @@ -505,8 +506,6 @@ class gaussian(likelihood_function): """ Gradient of the link function at y, given f w.r.t f - $$\frac{dp(y_{i}|f_{i})}{df} = \frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \sigma^{2}v}$$ - :y: data :f: latent variables f :extra_data: extra_data which is not used in student t distribution @@ -514,8 +513,8 @@ class gaussian(likelihood_function): """ assert y.shape == f.shape - e = y - f - grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) + s2_i = (1.0/self._variance)*self.I + grad = np.dot(s2_i, y) - 0.5*np.dot(s2_i, f) return grad def d2lik_d2f(self, y, f, extra_data=None): @@ -526,16 +525,14 @@ class gaussian(likelihood_function): Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases (the distribution for y_{i} depends only on f_{i} not on f_{j!=i} - $$\frac{d^{2}p(y_{i}|f_{i})}{d^{3}f} = \frac{(v+1)((y_{i}-f_{i})^{2} - \sigma^{2}v)}{((y_{i}-f_{i})^{2} + \sigma^{2}v)^{2}}$$ - :y: data :f: latent variables f :extra_data: extra_data which is not used in student t distribution :returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points) """ assert y.shape == f.shape - e = y - f - hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / (((self.sigma2*self.v) + e**2)**2) + s2_i = (1.0/self._variance)*self.I + hess = np.diagonal(-0.5*s2_i) return hess def d3lik_d3f(self, y, f, extra_data=None): @@ -545,46 +542,25 @@ class gaussian(likelihood_function): $$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$ """ assert y.shape == f.shape - e = y - f - d3lik_d3f = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / - ((e**2 + self.sigma2*self.v)**3) - ) + d3lik_d3f = np.diagonal(0*self.I) return d3lik_d3f def lik_dstd(self, y, f, extra_data=None): """ Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation) - - Terms relavent to derivatives wrt sigma are: - -log(sqrt(v*pi)*s) -(1/2)*(v + 1)*log(1 + (1/v)*((y-f)/(s))^2)) - - $$\frac{dp(y_{i}|f_{i})}{d\sigma} = -\frac{1}{\sigma} + \frac{(1+v)(y_{i}-f_{i})^2}{\sigma^3 v(1 + \frac{1}{v}(\frac{(y_{i} - f_{i})}{\sigma^2})^2)}$$ """ assert y.shape == f.shape e = y - f - sigma = np.sqrt(self.sigma2) - #dlik_dsigma = ( - (1/sigma) + - #((1+self.v)*(e**2))/((sigma*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) ) - #) - #dlik_dsigma = ( - 1 + - #((1+self.v)*(e**2))/((self.sigma2*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) ) - #) - #dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1 - dlik_dsigma = (self.v*((e**2)-self.sigma2))/(sigma*((e**2)+self.sigma2*self.v)) + dlik_dsigma = -0.5*self.N*self._variance - 0.5*np.dot(e.T, e) return dlik_dsigma def dlik_df_dstd(self, y, f, extra_data=None): """ Gradient of the dlik_df w.r.t sigma parameter (standard deviation) - - $$\frac{d}{d\sigma}(\frac{dp(y_{i}|f_{i})}{df}) = \frac{-2\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \sigma^2 v)^2}$$ """ assert y.shape == f.shape - e = y - f - sigma = np.sqrt(self.sigma2) - dlik_grad_dsigma = ((-2*sigma*self.v*(self.v + 1)*e) #2 might not want to be here? - / ((self.v*self.sigma2 + e**2)**2) - ) + s_4 = 1.0/(self._variance**2) + dlik_grad_dsigma = -np.dot(s_4, np.dot(self.I, y)) + 0.5*np.dot(s_4, np.dot(self.I, f)) return dlik_grad_dsigma def d2lik_d2f_dstd(self, y, f, extra_data=None): @@ -594,13 +570,7 @@ class gaussian(likelihood_function): $$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$ """ assert y.shape == f.shape - e = y - f - sigma = np.sqrt(self.sigma2) - dlik_hess_dsigma = ( (2*sigma*self.v*(self.v + 1)*(self.sigma2*self.v - 3*(e**2))) / - ((e**2 + self.sigma2*self.v)**3) - ) - #dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2)) - #/ ((e**2 + (self.sigma**2)*self.v)**3) ) + dlik_hess_dsigma = 1.0/(2*(self._variance**2)) return dlik_hess_dsigma def _gradients(self, y, f, extra_data=None):