diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index 153b0ab9..f2ff60e5 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -113,10 +113,8 @@ class Bernoulli(Likelihood): .. Note: Each y_i must be in {0, 1} """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape #objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y)) - objective = np.where(y, inv_link_f, 1.-inv_link_f) - return np.exp(np.sum(np.log(objective))) + return np.where(y, inv_link_f, 1.-inv_link_f) def logpdf_link(self, inv_link_f, y, Y_metadata=None): """ @@ -133,9 +131,7 @@ class Bernoulli(Likelihood): :returns: log likelihood evaluated at points inverse link of f. :rtype: float """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape #objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f) - state = np.seterr(divide='ignore') p = np.where(y==1, inv_link_f, 1.-inv_link_f) return np.log(p) @@ -154,13 +150,10 @@ class Bernoulli(Likelihood): :returns: gradient of log likelihood evaluated at points inverse link of f. :rtype: Nx1 array """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape #grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f) - state = np.seterr(divide='ignore') - # TODO check y \in {0, 1} or {-1, 1} - grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f)) - np.seterr(**state) - return grad + #grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f)) + denom = np.where(y, inv_link_f, -(1-inv_link_f)) + return 1./denom def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): """ @@ -183,13 +176,12 @@ class Bernoulli(Likelihood): Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases (the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i) """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape #d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2) state = np.seterr(divide='ignore') # TODO check y \in {0, 1} or {-1, 1} - d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f)) - np.seterr(**state) - return d2logpdf_dlink2 + #d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f)) + arg = np.where(y, inv_link_f, 1.-inv_link_f) + return -1./np.square(arg) def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): """ diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index 56373097..e2ecccc3 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -146,20 +146,26 @@ class Likelihood(Parameterized): if gh_points is None: gh_x, gh_w = np.polynomial.hermite.hermgauss(20) + else: + gh_x, gh_w = gh_points shape = m.shape m,v,Y = m.flatten(), v.flatten(), Y.flatten() #make a grid of points X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None] - logp = self.logpdf(X,Y[:,None]) - p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability - N = stats.norm.pdf(X) - F = np.log(p).dot(self.gh_w) - NoverP = N/p - dF_dm = (NoverP*self.Ysign[:,None]).dot(self.gh_w) - dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(self.gh_w) + #evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid. + # broadcast needs to be handled carefully. + logp = self.logpdf(X,Y[:,None]) + dlogp_dx = self.dlogpdf_df(X, Y[:,None]) + d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None]) + + #average over the gird to get derivatives of the Gaussian's parameters + F = np.dot(logp, gh_w) + dF_dm = np.dot(dlogp_dx, gh_w) + dF_dv = np.dot(d2logp_dx2, gh_w)/2. + return F, dF_dm, dF_dv diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py index a58e80eb..19cff9b8 100644 --- a/GPy/testing/likelihood_tests.py +++ b/GPy/testing/likelihood_tests.py @@ -352,7 +352,7 @@ class TestNoiseModels(object): print model #print model._get_params() np.testing.assert_almost_equal( - model.pdf(f.copy(), Y.copy()), + model.pdf(f.copy(), Y.copy()).prod(), np.exp(model.logpdf(f.copy(), Y.copy()).sum()) )