mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-28 06:16:24 +02:00
Started adding gaussian sanity checker
This commit is contained in:
parent
fdb7b99e0b
commit
9364efc755
3 changed files with 60 additions and 88 deletions
|
|
@ -168,23 +168,23 @@ def student_t_f_check():
|
||||||
m.randomize()
|
m.randomize()
|
||||||
m['t_no'] = 0.3
|
m['t_no'] = 0.3
|
||||||
m.likelihood.X = X
|
m.likelihood.X = X
|
||||||
print m
|
#print m
|
||||||
plt.figure()
|
plt.figure()
|
||||||
plt.subplot(511)
|
plt.subplot(511)
|
||||||
m.plot()
|
m.plot()
|
||||||
print m
|
#print m
|
||||||
plt.subplot(512)
|
plt.subplot(512)
|
||||||
m.optimize(max_f_eval=15)
|
m.optimize(max_f_eval=15)
|
||||||
m.plot()
|
m.plot()
|
||||||
print m
|
#print m
|
||||||
plt.subplot(513)
|
plt.subplot(513)
|
||||||
m.optimize(max_f_eval=15)
|
m.optimize(max_f_eval=15)
|
||||||
m.plot()
|
m.plot()
|
||||||
print m
|
#print m
|
||||||
plt.subplot(514)
|
plt.subplot(514)
|
||||||
m.optimize(max_f_eval=15)
|
m.optimize(max_f_eval=15)
|
||||||
m.plot()
|
m.plot()
|
||||||
print m
|
#print m
|
||||||
plt.subplot(515)
|
plt.subplot(515)
|
||||||
m.optimize()
|
m.optimize()
|
||||||
m.plot()
|
m.plot()
|
||||||
|
|
|
||||||
|
|
@ -89,7 +89,8 @@ class Laplace(likelihood):
|
||||||
expl = 0.5*expl_a + 0.5*expl_b # Might need to be -?
|
expl = 0.5*expl_a + 0.5*expl_b # Might need to be -?
|
||||||
dL_dthetaK_exp = dK_dthetaK(expl, X)
|
dL_dthetaK_exp = dK_dthetaK(expl, X)
|
||||||
dL_dthetaK_imp = dK_dthetaK(impl, X)
|
dL_dthetaK_imp = dK_dthetaK(impl, X)
|
||||||
print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp)
|
#print "dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp)
|
||||||
|
#print "expl_a: {}, {} expl_b: {}, {}".format(np.mean(expl_a), np.std(expl_a), np.mean(expl_b), np.std(expl_b))
|
||||||
dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
|
dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
|
||||||
return dL_dthetaK
|
return dL_dthetaK
|
||||||
|
|
||||||
|
|
@ -165,8 +166,7 @@ class Laplace(likelihood):
|
||||||
self.aA = 0.5*self.ln_det_K_Wi__Bi
|
self.aA = 0.5*self.ln_det_K_Wi__Bi
|
||||||
self.bB = - 0.5*self.f_Ki_f
|
self.bB = - 0.5*self.f_Ki_f
|
||||||
self.cC = 0.5*self.y_Wi_Ki_i_y
|
self.cC = 0.5*self.y_Wi_Ki_i_y
|
||||||
Z_tilde = (#+ 100*self.NORMAL_CONST
|
Z_tilde = (+ self.lik
|
||||||
+ self.lik
|
|
||||||
+ 0.5*self.ln_det_K_Wi__Bi
|
+ 0.5*self.ln_det_K_Wi__Bi
|
||||||
- 0.5*self.f_Ki_f
|
- 0.5*self.f_Ki_f
|
||||||
+ 0.5*self.y_Wi_Ki_i_y
|
+ 0.5*self.y_Wi_Ki_i_y
|
||||||
|
|
@ -379,7 +379,8 @@ class Laplace(likelihood):
|
||||||
|
|
||||||
#difference = abs(new_obj - old_obj)
|
#difference = abs(new_obj - old_obj)
|
||||||
#old_obj = new_obj.copy()
|
#old_obj = new_obj.copy()
|
||||||
difference = np.abs(np.sum(f - f_old))
|
#difference = np.abs(np.sum(f - f_old))
|
||||||
|
difference = np.abs(np.sum(a - old_a))
|
||||||
#old_a = self.a.copy() #a
|
#old_a = self.a.copy() #a
|
||||||
old_a = a.copy()
|
old_a = a.copy()
|
||||||
i += 1
|
i += 1
|
||||||
|
|
@ -391,42 +392,43 @@ class Laplace(likelihood):
|
||||||
print "Iterations: {}, Final_difference: {}".format(i, difference)
|
print "Iterations: {}, Final_difference: {}".format(i, difference)
|
||||||
if difference > 1e-4:
|
if difference > 1e-4:
|
||||||
print "FAIL FAIL FAIL FAIL FAIL FAIL"
|
print "FAIL FAIL FAIL FAIL FAIL FAIL"
|
||||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
if False:
|
||||||
if hasattr(self, 'X'):
|
|
||||||
import pylab as pb
|
|
||||||
pb.figure()
|
|
||||||
pb.subplot(311)
|
|
||||||
pb.title('old f_hat')
|
|
||||||
pb.plot(self.X, self.f_hat)
|
|
||||||
pb.subplot(312)
|
|
||||||
pb.title('old ff')
|
|
||||||
pb.plot(self.X, self.old_ff)
|
|
||||||
pb.subplot(313)
|
|
||||||
pb.title('new f_hat')
|
|
||||||
pb.plot(self.X, f)
|
|
||||||
|
|
||||||
pb.figure()
|
|
||||||
pb.subplot(121)
|
|
||||||
pb.title('old K')
|
|
||||||
pb.imshow(np.diagflat(self.old_K), interpolation='none')
|
|
||||||
pb.colorbar()
|
|
||||||
pb.subplot(122)
|
|
||||||
pb.title('new K')
|
|
||||||
pb.imshow(np.diagflat(K), interpolation='none')
|
|
||||||
pb.colorbar()
|
|
||||||
|
|
||||||
pb.figure()
|
|
||||||
pb.subplot(121)
|
|
||||||
pb.title('old W')
|
|
||||||
pb.imshow(np.diagflat(self.old_W), interpolation='none')
|
|
||||||
pb.colorbar()
|
|
||||||
pb.subplot(122)
|
|
||||||
pb.title('new W')
|
|
||||||
pb.imshow(np.diagflat(W), interpolation='none')
|
|
||||||
pb.colorbar()
|
|
||||||
|
|
||||||
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||||
pb.close('all')
|
if hasattr(self, 'X'):
|
||||||
|
import pylab as pb
|
||||||
|
pb.figure()
|
||||||
|
pb.subplot(311)
|
||||||
|
pb.title('old f_hat')
|
||||||
|
pb.plot(self.X, self.f_hat)
|
||||||
|
pb.subplot(312)
|
||||||
|
pb.title('old ff')
|
||||||
|
pb.plot(self.X, self.old_ff)
|
||||||
|
pb.subplot(313)
|
||||||
|
pb.title('new f_hat')
|
||||||
|
pb.plot(self.X, f)
|
||||||
|
|
||||||
|
pb.figure()
|
||||||
|
pb.subplot(121)
|
||||||
|
pb.title('old K')
|
||||||
|
pb.imshow(np.diagflat(self.old_K), interpolation='none')
|
||||||
|
pb.colorbar()
|
||||||
|
pb.subplot(122)
|
||||||
|
pb.title('new K')
|
||||||
|
pb.imshow(np.diagflat(K), interpolation='none')
|
||||||
|
pb.colorbar()
|
||||||
|
|
||||||
|
pb.figure()
|
||||||
|
pb.subplot(121)
|
||||||
|
pb.title('old W')
|
||||||
|
pb.imshow(np.diagflat(self.old_W), interpolation='none')
|
||||||
|
pb.colorbar()
|
||||||
|
pb.subplot(122)
|
||||||
|
pb.title('new W')
|
||||||
|
pb.imshow(np.diagflat(W), interpolation='none')
|
||||||
|
pb.colorbar()
|
||||||
|
|
||||||
|
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||||
|
pb.close('all')
|
||||||
|
|
||||||
#FIXME: DELETE THESE
|
#FIXME: DELETE THESE
|
||||||
self.old_W = W.copy()
|
self.old_W = W.copy()
|
||||||
|
|
|
||||||
|
|
@ -239,7 +239,7 @@ class student_t(likelihood_function):
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
e = y - f
|
||||||
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / (((self.sigma2*self.v) + e**2)**2)
|
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
||||||
return hess
|
return hess
|
||||||
|
|
||||||
def d3lik_d3f(self, y, f, extra_data=None):
|
def d3lik_d3f(self, y, f, extra_data=None):
|
||||||
|
|
@ -277,7 +277,7 @@ class student_t(likelihood_function):
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
e = y - f
|
||||||
dlik_grad_dsigma = (-self.v*(self.v+1)*e)/((self.sigma2*self.v + e**2)**2)
|
dlik_grad_dsigma = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
||||||
return dlik_grad_dsigma
|
return dlik_grad_dsigma
|
||||||
|
|
||||||
def d2lik_d2f_dstd(self, y, f, extra_data=None):
|
def d2lik_d2f_dstd(self, y, f, extra_data=None):
|
||||||
|
|
@ -289,7 +289,7 @@ class student_t(likelihood_function):
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
e = y - f
|
||||||
dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
||||||
/ (self.sigma2*self.v + (e**2))**3
|
/ ((self.sigma2*self.v + (e**2))**3)
|
||||||
)
|
)
|
||||||
return dlik_hess_dsigma
|
return dlik_hess_dsigma
|
||||||
|
|
||||||
|
|
@ -479,7 +479,8 @@ class gaussian(likelihood_function):
|
||||||
|
|
||||||
def _set_params(self, x):
|
def _set_params(self, x):
|
||||||
self._variance = float(x)
|
self._variance = float(x)
|
||||||
self.covariance_matrix = np.eye(self.N) * self._variance
|
self.I = np.eye(self.N)
|
||||||
|
self.covariance_matrix = self.I * self._variance
|
||||||
self.Ki, _, _, self.ln_K = pdinv(self.covariance_matrix) # THIS MAY BE WRONG
|
self.Ki, _, _, self.ln_K = pdinv(self.covariance_matrix) # THIS MAY BE WRONG
|
||||||
|
|
||||||
def link_function(self, y, f, extra_data=None):
|
def link_function(self, y, f, extra_data=None):
|
||||||
|
|
@ -505,8 +506,6 @@ class gaussian(likelihood_function):
|
||||||
"""
|
"""
|
||||||
Gradient of the link function at y, given f w.r.t f
|
Gradient of the link function at y, given f w.r.t f
|
||||||
|
|
||||||
$$\frac{dp(y_{i}|f_{i})}{df} = \frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \sigma^{2}v}$$
|
|
||||||
|
|
||||||
:y: data
|
:y: data
|
||||||
:f: latent variables f
|
:f: latent variables f
|
||||||
:extra_data: extra_data which is not used in student t distribution
|
:extra_data: extra_data which is not used in student t distribution
|
||||||
|
|
@ -514,8 +513,8 @@ class gaussian(likelihood_function):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
s2_i = (1.0/self._variance)*self.I
|
||||||
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
grad = np.dot(s2_i, y) - 0.5*np.dot(s2_i, f)
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def d2lik_d2f(self, y, f, extra_data=None):
|
def d2lik_d2f(self, y, f, extra_data=None):
|
||||||
|
|
@ -526,16 +525,14 @@ class gaussian(likelihood_function):
|
||||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
(the distribution for y_{i} depends only on f_{i} not on f_{j!=i}
|
(the distribution for y_{i} depends only on f_{i} not on f_{j!=i}
|
||||||
|
|
||||||
$$\frac{d^{2}p(y_{i}|f_{i})}{d^{3}f} = \frac{(v+1)((y_{i}-f_{i})^{2} - \sigma^{2}v)}{((y_{i}-f_{i})^{2} + \sigma^{2}v)^{2}}$$
|
|
||||||
|
|
||||||
:y: data
|
:y: data
|
||||||
:f: latent variables f
|
:f: latent variables f
|
||||||
:extra_data: extra_data which is not used in student t distribution
|
:extra_data: extra_data which is not used in student t distribution
|
||||||
:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
|
:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
s2_i = (1.0/self._variance)*self.I
|
||||||
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / (((self.sigma2*self.v) + e**2)**2)
|
hess = np.diagonal(-0.5*s2_i)
|
||||||
return hess
|
return hess
|
||||||
|
|
||||||
def d3lik_d3f(self, y, f, extra_data=None):
|
def d3lik_d3f(self, y, f, extra_data=None):
|
||||||
|
|
@ -545,46 +542,25 @@ class gaussian(likelihood_function):
|
||||||
$$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$
|
$$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
d3lik_d3f = np.diagonal(0*self.I)
|
||||||
d3lik_d3f = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
|
||||||
((e**2 + self.sigma2*self.v)**3)
|
|
||||||
)
|
|
||||||
return d3lik_d3f
|
return d3lik_d3f
|
||||||
|
|
||||||
def lik_dstd(self, y, f, extra_data=None):
|
def lik_dstd(self, y, f, extra_data=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation)
|
Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation)
|
||||||
|
|
||||||
Terms relavent to derivatives wrt sigma are:
|
|
||||||
-log(sqrt(v*pi)*s) -(1/2)*(v + 1)*log(1 + (1/v)*((y-f)/(s))^2))
|
|
||||||
|
|
||||||
$$\frac{dp(y_{i}|f_{i})}{d\sigma} = -\frac{1}{\sigma} + \frac{(1+v)(y_{i}-f_{i})^2}{\sigma^3 v(1 + \frac{1}{v}(\frac{(y_{i} - f_{i})}{\sigma^2})^2)}$$
|
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
e = y - f
|
||||||
sigma = np.sqrt(self.sigma2)
|
dlik_dsigma = -0.5*self.N*self._variance - 0.5*np.dot(e.T, e)
|
||||||
#dlik_dsigma = ( - (1/sigma) +
|
|
||||||
#((1+self.v)*(e**2))/((sigma*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
|
|
||||||
#)
|
|
||||||
#dlik_dsigma = ( - 1 +
|
|
||||||
#((1+self.v)*(e**2))/((self.sigma2*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
|
|
||||||
#)
|
|
||||||
#dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1
|
|
||||||
dlik_dsigma = (self.v*((e**2)-self.sigma2))/(sigma*((e**2)+self.sigma2*self.v))
|
|
||||||
return dlik_dsigma
|
return dlik_dsigma
|
||||||
|
|
||||||
def dlik_df_dstd(self, y, f, extra_data=None):
|
def dlik_df_dstd(self, y, f, extra_data=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the dlik_df w.r.t sigma parameter (standard deviation)
|
Gradient of the dlik_df w.r.t sigma parameter (standard deviation)
|
||||||
|
|
||||||
$$\frac{d}{d\sigma}(\frac{dp(y_{i}|f_{i})}{df}) = \frac{-2\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \sigma^2 v)^2}$$
|
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
s_4 = 1.0/(self._variance**2)
|
||||||
sigma = np.sqrt(self.sigma2)
|
dlik_grad_dsigma = -np.dot(s_4, np.dot(self.I, y)) + 0.5*np.dot(s_4, np.dot(self.I, f))
|
||||||
dlik_grad_dsigma = ((-2*sigma*self.v*(self.v + 1)*e) #2 might not want to be here?
|
|
||||||
/ ((self.v*self.sigma2 + e**2)**2)
|
|
||||||
)
|
|
||||||
return dlik_grad_dsigma
|
return dlik_grad_dsigma
|
||||||
|
|
||||||
def d2lik_d2f_dstd(self, y, f, extra_data=None):
|
def d2lik_d2f_dstd(self, y, f, extra_data=None):
|
||||||
|
|
@ -594,13 +570,7 @@ class gaussian(likelihood_function):
|
||||||
$$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$
|
$$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
dlik_hess_dsigma = 1.0/(2*(self._variance**2))
|
||||||
sigma = np.sqrt(self.sigma2)
|
|
||||||
dlik_hess_dsigma = ( (2*sigma*self.v*(self.v + 1)*(self.sigma2*self.v - 3*(e**2))) /
|
|
||||||
((e**2 + self.sigma2*self.v)**3)
|
|
||||||
)
|
|
||||||
#dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2))
|
|
||||||
#/ ((e**2 + (self.sigma**2)*self.v)**3) )
|
|
||||||
return dlik_hess_dsigma
|
return dlik_hess_dsigma
|
||||||
|
|
||||||
def _gradients(self, y, f, extra_data=None):
|
def _gradients(self, y, f, extra_data=None):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue