Started adding gaussian sanity checker

This commit is contained in:
Alan Saul 2013-07-30 16:11:03 +01:00
parent fdb7b99e0b
commit 9364efc755
3 changed files with 60 additions and 88 deletions

View file

@ -168,23 +168,23 @@ def student_t_f_check():
m.randomize() m.randomize()
m['t_no'] = 0.3 m['t_no'] = 0.3
m.likelihood.X = X m.likelihood.X = X
print m #print m
plt.figure() plt.figure()
plt.subplot(511) plt.subplot(511)
m.plot() m.plot()
print m #print m
plt.subplot(512) plt.subplot(512)
m.optimize(max_f_eval=15) m.optimize(max_f_eval=15)
m.plot() m.plot()
print m #print m
plt.subplot(513) plt.subplot(513)
m.optimize(max_f_eval=15) m.optimize(max_f_eval=15)
m.plot() m.plot()
print m #print m
plt.subplot(514) plt.subplot(514)
m.optimize(max_f_eval=15) m.optimize(max_f_eval=15)
m.plot() m.plot()
print m #print m
plt.subplot(515) plt.subplot(515)
m.optimize() m.optimize()
m.plot() m.plot()

View file

@ -89,7 +89,8 @@ class Laplace(likelihood):
expl = 0.5*expl_a + 0.5*expl_b # Might need to be -? expl = 0.5*expl_a + 0.5*expl_b # Might need to be -?
dL_dthetaK_exp = dK_dthetaK(expl, X) dL_dthetaK_exp = dK_dthetaK(expl, X)
dL_dthetaK_imp = dK_dthetaK(impl, X) dL_dthetaK_imp = dK_dthetaK(impl, X)
print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp) #print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp)
#print "expl_a: {}, {} expl_b: {}, {}".format(np.mean(expl_a), np.std(expl_a), np.mean(expl_b), np.std(expl_b))
dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
return dL_dthetaK return dL_dthetaK
@ -165,8 +166,7 @@ class Laplace(likelihood):
self.aA = 0.5*self.ln_det_K_Wi__Bi self.aA = 0.5*self.ln_det_K_Wi__Bi
self.bB = - 0.5*self.f_Ki_f self.bB = - 0.5*self.f_Ki_f
self.cC = 0.5*self.y_Wi_Ki_i_y self.cC = 0.5*self.y_Wi_Ki_i_y
Z_tilde = (#+ 100*self.NORMAL_CONST Z_tilde = (+ self.lik
+ self.lik
+ 0.5*self.ln_det_K_Wi__Bi + 0.5*self.ln_det_K_Wi__Bi
- 0.5*self.f_Ki_f - 0.5*self.f_Ki_f
+ 0.5*self.y_Wi_Ki_i_y + 0.5*self.y_Wi_Ki_i_y
@ -379,7 +379,8 @@ class Laplace(likelihood):
#difference = abs(new_obj - old_obj) #difference = abs(new_obj - old_obj)
#old_obj = new_obj.copy() #old_obj = new_obj.copy()
difference = np.abs(np.sum(f - f_old)) #difference = np.abs(np.sum(f - f_old))
difference = np.abs(np.sum(a - old_a))
#old_a = self.a.copy() #a #old_a = self.a.copy() #a
old_a = a.copy() old_a = a.copy()
i += 1 i += 1
@ -391,42 +392,43 @@ class Laplace(likelihood):
print "Iterations: {}, Final_difference: {}".format(i, difference) print "Iterations: {}, Final_difference: {}".format(i, difference)
if difference > 1e-4: if difference > 1e-4:
print "FAIL FAIL FAIL FAIL FAIL FAIL" print "FAIL FAIL FAIL FAIL FAIL FAIL"
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT if False:
if hasattr(self, 'X'):
import pylab as pb
pb.figure()
pb.subplot(311)
pb.title('old f_hat')
pb.plot(self.X, self.f_hat)
pb.subplot(312)
pb.title('old ff')
pb.plot(self.X, self.old_ff)
pb.subplot(313)
pb.title('new f_hat')
pb.plot(self.X, f)
pb.figure()
pb.subplot(121)
pb.title('old K')
pb.imshow(np.diagflat(self.old_K), interpolation='none')
pb.colorbar()
pb.subplot(122)
pb.title('new K')
pb.imshow(np.diagflat(K), interpolation='none')
pb.colorbar()
pb.figure()
pb.subplot(121)
pb.title('old W')
pb.imshow(np.diagflat(self.old_W), interpolation='none')
pb.colorbar()
pb.subplot(122)
pb.title('new W')
pb.imshow(np.diagflat(W), interpolation='none')
pb.colorbar()
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
pb.close('all') if hasattr(self, 'X'):
import pylab as pb
pb.figure()
pb.subplot(311)
pb.title('old f_hat')
pb.plot(self.X, self.f_hat)
pb.subplot(312)
pb.title('old ff')
pb.plot(self.X, self.old_ff)
pb.subplot(313)
pb.title('new f_hat')
pb.plot(self.X, f)
pb.figure()
pb.subplot(121)
pb.title('old K')
pb.imshow(np.diagflat(self.old_K), interpolation='none')
pb.colorbar()
pb.subplot(122)
pb.title('new K')
pb.imshow(np.diagflat(K), interpolation='none')
pb.colorbar()
pb.figure()
pb.subplot(121)
pb.title('old W')
pb.imshow(np.diagflat(self.old_W), interpolation='none')
pb.colorbar()
pb.subplot(122)
pb.title('new W')
pb.imshow(np.diagflat(W), interpolation='none')
pb.colorbar()
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
pb.close('all')
#FIXME: DELETE THESE #FIXME: DELETE THESE
self.old_W = W.copy() self.old_W = W.copy()

View file

@ -239,7 +239,7 @@ class student_t(likelihood_function):
""" """
assert y.shape == f.shape assert y.shape == f.shape
e = y - f e = y - f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / (((self.sigma2*self.v) + e**2)**2) hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess return hess
def d3lik_d3f(self, y, f, extra_data=None): def d3lik_d3f(self, y, f, extra_data=None):
@ -277,7 +277,7 @@ class student_t(likelihood_function):
""" """
assert y.shape == f.shape assert y.shape == f.shape
e = y - f e = y - f
dlik_grad_dsigma = (-self.v*(self.v+1)*e)/((self.sigma2*self.v + e**2)**2) dlik_grad_dsigma = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlik_grad_dsigma return dlik_grad_dsigma
def d2lik_d2f_dstd(self, y, f, extra_data=None): def d2lik_d2f_dstd(self, y, f, extra_data=None):
@ -289,7 +289,7 @@ class student_t(likelihood_function):
assert y.shape == f.shape assert y.shape == f.shape
e = y - f e = y - f
dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ (self.sigma2*self.v + (e**2))**3 / ((self.sigma2*self.v + (e**2))**3)
) )
return dlik_hess_dsigma return dlik_hess_dsigma
@ -479,7 +479,8 @@ class gaussian(likelihood_function):
def _set_params(self, x): def _set_params(self, x):
self._variance = float(x) self._variance = float(x)
self.covariance_matrix = np.eye(self.N) * self._variance self.I = np.eye(self.N)
self.covariance_matrix = self.I * self._variance
self.Ki, _, _, self.ln_K = pdinv(self.covariance_matrix) # THIS MAY BE WRONG self.Ki, _, _, self.ln_K = pdinv(self.covariance_matrix) # THIS MAY BE WRONG
def link_function(self, y, f, extra_data=None): def link_function(self, y, f, extra_data=None):
@ -505,8 +506,6 @@ class gaussian(likelihood_function):
""" """
Gradient of the link function at y, given f w.r.t f Gradient of the link function at y, given f w.r.t f
$$\frac{dp(y_{i}|f_{i})}{df} = \frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \sigma^{2}v}$$
:y: data :y: data
:f: latent variables f :f: latent variables f
:extra_data: extra_data which is not used in student t distribution :extra_data: extra_data which is not used in student t distribution
@ -514,8 +513,8 @@ class gaussian(likelihood_function):
""" """
assert y.shape == f.shape assert y.shape == f.shape
e = y - f s2_i = (1.0/self._variance)*self.I
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) grad = np.dot(s2_i, y) - 0.5*np.dot(s2_i, f)
return grad return grad
def d2lik_d2f(self, y, f, extra_data=None): def d2lik_d2f(self, y, f, extra_data=None):
@ -526,16 +525,14 @@ class gaussian(likelihood_function):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_{i} depends only on f_{i} not on f_{j!=i} (the distribution for y_{i} depends only on f_{i} not on f_{j!=i}
$$\frac{d^{2}p(y_{i}|f_{i})}{d^{3}f} = \frac{(v+1)((y_{i}-f_{i})^{2} - \sigma^{2}v)}{((y_{i}-f_{i})^{2} + \sigma^{2}v)^{2}}$$
:y: data :y: data
:f: latent variables f :f: latent variables f
:extra_data: extra_data which is not used in student t distribution :extra_data: extra_data which is not used in student t distribution
:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points) :returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
""" """
assert y.shape == f.shape assert y.shape == f.shape
e = y - f s2_i = (1.0/self._variance)*self.I
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / (((self.sigma2*self.v) + e**2)**2) hess = np.diagonal(-0.5*s2_i)
return hess return hess
def d3lik_d3f(self, y, f, extra_data=None): def d3lik_d3f(self, y, f, extra_data=None):
@ -545,46 +542,25 @@ class gaussian(likelihood_function):
$$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$ $$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$
""" """
assert y.shape == f.shape assert y.shape == f.shape
e = y - f d3lik_d3f = np.diagonal(0*self.I)
d3lik_d3f = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3)
)
return d3lik_d3f return d3lik_d3f
def lik_dstd(self, y, f, extra_data=None): def lik_dstd(self, y, f, extra_data=None):
""" """
Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation) Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation)
Terms relavent to derivatives wrt sigma are:
-log(sqrt(v*pi)*s) -(1/2)*(v + 1)*log(1 + (1/v)*((y-f)/(s))^2))
$$\frac{dp(y_{i}|f_{i})}{d\sigma} = -\frac{1}{\sigma} + \frac{(1+v)(y_{i}-f_{i})^2}{\sigma^3 v(1 + \frac{1}{v}(\frac{(y_{i} - f_{i})}{\sigma^2})^2)}$$
""" """
assert y.shape == f.shape assert y.shape == f.shape
e = y - f e = y - f
sigma = np.sqrt(self.sigma2) dlik_dsigma = -0.5*self.N*self._variance - 0.5*np.dot(e.T, e)
#dlik_dsigma = ( - (1/sigma) +
#((1+self.v)*(e**2))/((sigma*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
#)
#dlik_dsigma = ( - 1 +
#((1+self.v)*(e**2))/((self.sigma2*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
#)
#dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1
dlik_dsigma = (self.v*((e**2)-self.sigma2))/(sigma*((e**2)+self.sigma2*self.v))
return dlik_dsigma return dlik_dsigma
def dlik_df_dstd(self, y, f, extra_data=None): def dlik_df_dstd(self, y, f, extra_data=None):
""" """
Gradient of the dlik_df w.r.t sigma parameter (standard deviation) Gradient of the dlik_df w.r.t sigma parameter (standard deviation)
$$\frac{d}{d\sigma}(\frac{dp(y_{i}|f_{i})}{df}) = \frac{-2\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \sigma^2 v)^2}$$
""" """
assert y.shape == f.shape assert y.shape == f.shape
e = y - f s_4 = 1.0/(self._variance**2)
sigma = np.sqrt(self.sigma2) dlik_grad_dsigma = -np.dot(s_4, np.dot(self.I, y)) + 0.5*np.dot(s_4, np.dot(self.I, f))
dlik_grad_dsigma = ((-2*sigma*self.v*(self.v + 1)*e) #2 might not want to be here?
/ ((self.v*self.sigma2 + e**2)**2)
)
return dlik_grad_dsigma return dlik_grad_dsigma
def d2lik_d2f_dstd(self, y, f, extra_data=None): def d2lik_d2f_dstd(self, y, f, extra_data=None):
@ -594,13 +570,7 @@ class gaussian(likelihood_function):
$$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$ $$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$
""" """
assert y.shape == f.shape assert y.shape == f.shape
e = y - f dlik_hess_dsigma = 1.0/(2*(self._variance**2))
sigma = np.sqrt(self.sigma2)
dlik_hess_dsigma = ( (2*sigma*self.v*(self.v + 1)*(self.sigma2*self.v - 3*(e**2))) /
((e**2 + self.sigma2*self.v)**3)
)
#dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2))
#/ ((e**2 + (self.sigma**2)*self.v)**3) )
return dlik_hess_dsigma return dlik_hess_dsigma
def _gradients(self, y, f, extra_data=None): def _gradients(self, y, f, extra_data=None):