diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 0ef6e15e..368ecd9f 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -89,7 +89,7 @@ class GP(Model): self.link_parameter(self.kern) self.link_parameter(self.likelihood) - def set_XY(self, X=None, Y=None): + def set_XY(self, X=None, Y=None, trigger_update=True): """ Set the input / output data of the model This is useful if we wish to change our existing data but maintain the same model @@ -99,7 +99,7 @@ class GP(Model): :param Y: output observations :type Y: np.ndarray """ - self.update_model(False) + if trigger_update: self.update_model(False) if Y is not None: if self.normalizer is not None: self.normalizer.scale_by(Y) @@ -123,26 +123,26 @@ class GP(Model): self.link_parameters(self.X) else: self.X = ObsAr(X) - self.update_model(True) - self._trigger_params_changed() + if trigger_update: self.update_model(True) + if trigger_update: self._trigger_params_changed() - def set_X(self,X): + def set_X(self,X, trigger_update=True): """ Set the input data of the model :param X: input observations :type X: np.ndarray """ - self.set_XY(X=X) + self.set_XY(X=X, trigger_update=trigger_update) - def set_Y(self,Y): + def set_Y(self,Y, trigger_update=True): """ Set the output data of the model :param X: output observations :type X: np.ndarray """ - self.set_XY(Y=Y) + self.set_XY(Y=Y, trigger_update=trigger_update) def parameters_changed(self): """ diff --git a/GPy/core/model.py b/GPy/core/model.py index 6f6f0ee8..f6400588 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -214,14 +214,14 @@ class Model(Parameterized): self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e10, 1e10) return obj_f, self.obj_grads - def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, **kwargs): + def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs): """ Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. kwargs are passed to the optimizer. They can be: - :param max_f_eval: maximum number of function evaluations - :type max_f_eval: int + :param max_iters: maximum number of function evaluations + :type max_iters: int :messages: True: Display messages during optimisation, "ipython_notebook": :type messages: bool"string :param optimizer: which optimizer to use (defaults to self.preferred optimizer) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 709f800a..d3e3f592 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -62,6 +62,15 @@ class SparseGP(GP): def has_uncertain_inputs(self): return isinstance(self.X, VariationalPosterior) + + def set_Z(self, Z, trigger_update=True): + if trigger_update: self.update_model(False) + self.unlink_parameter(self.Z) + from ..core import Param + self.Z = Param('inducing inputs',Z) + self.link_parameter(self.Z, index=0) + if trigger_update: self.update_model(True) + if trigger_update: self._trigger_params_changed() def parameters_changed(self): self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata) diff --git a/GPy/core/svgp.py b/GPy/core/svgp.py index a0eff842..8330cc7e 100644 --- a/GPy/core/svgp.py +++ b/GPy/core/svgp.py @@ -25,20 +25,15 @@ class SVGP(SparseGP): Hensman, Matthews and Ghahramani, Scalable Variational GP Classification, ArXiv 1411.2005 """ - if batchsize is None: - batchsize = X.shape[0] - - self.X_all, self.Y_all = X, Y - # how to rescale the batch likelihood in case of minibatches self.batchsize = batchsize - batch_scale = float(self.X_all.shape[0])/float(self.batchsize) - #KL_scale = 1./np.float64(self.mpi_comm.size) - KL_scale = 1.0 - - import climin.util - #Make a climin slicer to make drawing minibatches much quicker. Annoyingly, this doesn;t pickle. - self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize) - X_batch, Y_batch = self.new_batch() + self.X_all, self.Y_all = X, Y + if batchsize is None: + X_batch, Y_batch = X, Y + else: + import climin.util + #Make a climin slicer to make drawing minibatches much quicker + self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize) + X_batch, Y_batch = self.new_batch() #create the SVI inference method inf_method = svgp_inf() diff --git a/GPy/core/verbose_optimization.py b/GPy/core/verbose_optimization.py index 4b1d0220..db2ac713 100644 --- a/GPy/core/verbose_optimization.py +++ b/GPy/core/verbose_optimization.py @@ -11,7 +11,7 @@ def exponents(fnow, current_grad): return np.sign(exps) * np.log10(exps).astype(int) class VerboseOptimization(object): - def __init__(self, model, opt, maxiters, verbose=False, current_iteration=0, ipython_notebook=True): + def __init__(self, model, opt, maxiters, verbose=False, current_iteration=0, ipython_notebook=True, clear_after_finish=False): self.verbose = verbose if self.verbose: self.model = model @@ -22,6 +22,7 @@ class VerboseOptimization(object): self.opt_name = opt.opt_name self.model.add_observer(self, self.print_status) self.status = 'running' + self.clear = clear_after_finish self.update() @@ -37,30 +38,31 @@ class VerboseOptimization(object): self.ipython_notebook = False if self.ipython_notebook: - self.text.set_css('width', '100%') - #self.progress.set_css('width', '100%') - left_col = ContainerWidget(children = [self.progress, self.text]) right_col = ContainerWidget(children = [self.model_show]) - hor_align = ContainerWidget(children = [left_col, right_col]) + self.hor_align = ContainerWidget(children = [left_col, right_col]) - display(hor_align) + display(self.hor_align) + + try: + self.text.set_css('width', '100%') + left_col.set_css({ + 'padding': '2px', + 'width': "100%", + }) + + right_col.set_css({ + 'padding': '2px', + }) + + self.hor_align.set_css({ + 'width': "100%", + }) + except: + pass - left_col.set_css({ - 'padding': '2px', - 'width': "100%", - }) - - right_col.set_css({ - 'padding': '2px', - }) - - hor_align.set_css({ - 'width': "100%", - }) - - hor_align.remove_class('vbox') - hor_align.add_class('hbox') + self.hor_align.remove_class('vbox') + self.hor_align.add_class('hbox') left_col.add_class("box-flex1") right_col.add_class('box-flex0') @@ -148,3 +150,5 @@ class VerboseOptimization(object): print('Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start)) print('Optimization status: {0:.5g}'.format(self.status)) print() + elif self.clear: + self.hor_align.close() diff --git a/GPy/inference/latent_function_inference/svgp.py b/GPy/inference/latent_function_inference/svgp.py index 9726335f..8bffd9d7 100644 --- a/GPy/inference/latent_function_inference/svgp.py +++ b/GPy/inference/latent_function_inference/svgp.py @@ -43,7 +43,7 @@ class SVGP(LatentFunctionInference): #quadrature for the likelihood - F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v) + F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v, Y_metadata=Y_metadata) #rescale the F term if working on a batch F, dF_dmu, dF_dv = F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 77f0d76e..696a8b04 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -181,9 +181,12 @@ class Add(CombinationKernel): def input_sensitivity(self, summarize=True): if summarize: - return reduce(np.add, [k.input_sensitivity(summarize) for k in self.parts]) + i_s = np.zeros((self.input_dim)) + for k in self.parts: + i_s[k.active_dims] += k.input_sensitivity(summarize) + return i_s else: i_s = np.zeros((len(self.parts), self.input_dim)) from operator import setitem - [setitem(i_s, (i, Ellipsis), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)] + [setitem(i_s, (i, k.active_dims), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)] return i_s diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index ef29da08..3157bd5b 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -6,3 +6,5 @@ from .poisson import Poisson from .student_t import StudentT from .likelihood import Likelihood from .mixed_noise import MixedNoise +from .binomial import Binomial + diff --git a/GPy/likelihoods/binomial.py b/GPy/likelihoods/binomial.py new file mode 100644 index 00000000..22009968 --- /dev/null +++ b/GPy/likelihoods/binomial.py @@ -0,0 +1,125 @@ +# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt) +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf +from . import link_functions +from .likelihood import Likelihood +from scipy import special + +class Binomial(Likelihood): + """ + Binomial likelihood + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} + + .. Note:: + Y takes values in either {-1, 1} or {0, 1}. + link function should have the domain [0, 1], e.g. probit (default) or Heaviside + + .. See also:: + likelihood.py, for the parent class + """ + def __init__(self, gp_link=None): + if gp_link is None: + gp_link = link_functions.Probit() + + super(Binomial, self).__init__(gp_link, 'Binomial') + + def conditional_mean(self, gp, Y_metadata): + return self.gp_link(gp)*Y_metadata['trials'] + + def pdf_link(self, inv_link_f, y, Y_metadata): + """ + Likelihood function given inverse link of f. + + .. math:: + p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} + + :param inv_link_f: latent variables inverse link of f. + :type inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata must contain 'trials' + :returns: likelihood evaluated for this point + :rtype: float + + .. Note: + Each y_i must be in {0, 1} + """ + return np.exp(self.logpdf_link(inv_link_f, y, Y_metadata)) + + def logpdf_link(self, inv_link_f, y, Y_metadata=None): + """ + Log Likelihood function given inverse link of f. + + .. math:: + \\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i}) + + :param inv_link_f: latent variables inverse link of f. + :type inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata must contain 'trials' + :returns: log likelihood evaluated at points inverse link of f. + :rtype: float + """ + N = Y_metadata['trials'] + nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1) + + return nchoosey + y*np.log(inv_link_f) + (N-y)*np.log(1.-inv_link_f) + + def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): + """ + Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f. + + :param inv_link_f: latent variables inverse link of f. + :type inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata must contain 'trials' + :returns: gradient of log likelihood evaluated at points inverse link of f. + :rtype: Nx1 array + """ + N = Y_metadata['trials'] + return y/inv_link_f - (N-y)/(1-inv_link_f) + + def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): + """ + Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j + i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j. + + + .. math:: + \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}} + + :param inv_link_f: latent variables inverse link of f. + :type inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata not used in binomial + :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f. + :rtype: Nx1 array + + .. Note:: + Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases + (the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i) + """ + N = Y_metadata['trials'] + return -y/np.square(inv_link_f) - (N-y)/np.square(1-inv_link_f) + + def samples(self, gp, Y_metadata=None): + """ + Returns a set of samples of observations based on a given value of the latent variable. + + :param gp: latent variable + """ + orig_shape = gp.shape + gp = gp.flatten() + N = Y_metadata['trials'] + Ysim = np.random.binomial(N, self.gp_link.transf(gp)) + return Ysim.reshape(orig_shape) + + def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None): + pass diff --git a/GPy/likelihoods/exponential.py b/GPy/likelihoods/exponential.py index 1df48412..0a6c543d 100644 --- a/GPy/likelihoods/exponential.py +++ b/GPy/likelihoods/exponential.py @@ -57,9 +57,8 @@ class Exponential(Likelihood): :rtype: float """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape log_objective = np.log(link_f) - y*link_f - return np.sum(log_objective) + return log_objective def dlogpdf_dlink(self, link_f, y, Y_metadata=None): """ @@ -77,7 +76,6 @@ class Exponential(Likelihood): :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape grad = 1./link_f - y #grad = y/(link_f**2) - 1./link_f return grad @@ -103,7 +101,6 @@ class Exponential(Likelihood): Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape hess = -1./(link_f**2) #hess = -2*y/(link_f**3) + 1/(link_f**2) return hess @@ -123,7 +120,6 @@ class Exponential(Likelihood): :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape d3lik_dlink3 = 2./(link_f**3) #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3) return d3lik_dlink3 diff --git a/GPy/likelihoods/gamma.py b/GPy/likelihoods/gamma.py index c153bd1c..79aba4a5 100644 --- a/GPy/likelihoods/gamma.py +++ b/GPy/likelihoods/gamma.py @@ -66,12 +66,11 @@ class Gamma(Likelihood): :rtype: float """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape #alpha = self.gp_link.transf(gp)*self.beta #return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha)) alpha = link_f*self.beta log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y - return np.sum(log_objective) + return log_objective def dlogpdf_dlink(self, link_f, y, Y_metadata=None): """ @@ -90,7 +89,6 @@ class Gamma(Likelihood): :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta #old #return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta @@ -118,7 +116,6 @@ class Gamma(Likelihood): Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2) #old #return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta @@ -140,6 +137,5 @@ class Gamma(Likelihood): :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3) return d3lik_dlink3 diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 8029eeba..eb0e0e78 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -130,11 +130,10 @@ class Gaussian(Likelihood): :returns: log likelihood evaluated for this point :rtype: float """ - assert np.asarray(link_f).shape == np.asarray(y).shape N = y.shape[0] - ln_det_cov = N*np.log(self.variance) + ln_det_cov = np.log(self.variance) - return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi)) + return -0.5*((y-link_f)**2/self.variance + ln_det_cov + np.log(2.*np.pi)) def dlogpdf_dlink(self, link_f, y, Y_metadata=None): """ @@ -151,8 +150,7 @@ class Gaussian(Likelihood): :returns: gradient of log likelihood evaluated at points link(f) :rtype: Nx1 array """ - assert np.asarray(link_f).shape == np.asarray(y).shape - s2_i = (1.0/self.variance) + s2_i = 1.0/self.variance grad = s2_i*y - s2_i*link_f return grad @@ -178,9 +176,9 @@ class Gaussian(Likelihood): Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) """ - assert np.asarray(link_f).shape == np.asarray(y).shape N = y.shape[0] - hess = -(1.0/self.variance)*np.ones((N, 1)) + D = link_f.shape[1] + hess = -(1.0/self.variance)*np.ones((N, D)) return hess def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): @@ -198,9 +196,9 @@ class Gaussian(Likelihood): :returns: third derivative of log likelihood evaluated at points link(f) :rtype: Nx1 array """ - assert np.asarray(link_f).shape == np.asarray(y).shape N = y.shape[0] - d3logpdf_dlink3 = np.zeros((N,1)) + D = link_f.shape[1] + d3logpdf_dlink3 = np.zeros((N,D)) return d3logpdf_dlink3 def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None): @@ -218,12 +216,11 @@ class Gaussian(Likelihood): :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter :rtype: float """ - assert np.asarray(link_f).shape == np.asarray(y).shape e = y - link_f s_4 = 1.0/(self.variance**2) N = y.shape[0] - dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e)) - return np.sum(dlik_dsigma) # Sure about this sum? + dlik_dsigma = -0.5/self.variance + 0.5*s_4*np.square(e) + return dlik_dsigma def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None): """ @@ -240,7 +237,6 @@ class Gaussian(Likelihood): :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter :rtype: Nx1 array """ - assert np.asarray(link_f).shape == np.asarray(y).shape s_4 = 1.0/(self.variance**2) dlik_grad_dsigma = -s_4*y + s_4*link_f return dlik_grad_dsigma @@ -260,15 +256,15 @@ class Gaussian(Likelihood): :returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter :rtype: Nx1 array """ - assert np.asarray(link_f).shape == np.asarray(y).shape s_4 = 1.0/(self.variance**2) N = y.shape[0] - d2logpdf_dlink2_dvar = np.ones((N,1))*s_4 + D = link_f.shape[1] + d2logpdf_dlink2_dvar = np.ones((N, D))*s_4 return d2logpdf_dlink2_dvar def dlogpdf_link_dtheta(self, f, y, Y_metadata=None): dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata) - return np.asarray([[dlogpdf_dvar]]) + return dlogpdf_dvar def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None): dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata) diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index 813f912f..ddf132f8 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -131,7 +131,7 @@ class Likelihood(Parameterized): return z, mean, variance - def variational_expectations(self, Y, m, v, gh_points=None): + def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None): """ Use Gauss-Hermite Quadrature to compute @@ -158,9 +158,9 @@ class Likelihood(Parameterized): #evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid. # broadcast needs to be handled carefully. - logp = self.logpdf(X,Y[:,None]) - dlogp_dx = self.dlogpdf_df(X, Y[:,None]) - d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None]) + logp = self.logpdf(X,Y[:,None], Y_metadata=Y_metadata) + dlogp_dx = self.dlogpdf_df(X, Y[:,None], Y_metadata=Y_metadata) + d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None], Y_metadata=Y_metadata) #clipping for numerical stability #logp = np.clip(logp,-1e9,1e9) @@ -425,7 +425,7 @@ class Likelihood(Parameterized): return np.zeros([f.shape[0], 0]) def _laplace_gradients(self, f, y, Y_metadata=None): - dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata) + dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata).sum(axis=0) dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata) d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata) diff --git a/GPy/likelihoods/poisson.py b/GPy/likelihoods/poisson.py index d6c4334b..5aa85a91 100644 --- a/GPy/likelihoods/poisson.py +++ b/GPy/likelihoods/poisson.py @@ -64,8 +64,7 @@ class Poisson(Likelihood): :rtype: float """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1)) + return -link_f + y*np.log(link_f) - special.gammaln(y+1) def dlogpdf_dlink(self, link_f, y, Y_metadata=None): """ @@ -83,7 +82,6 @@ class Poisson(Likelihood): :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape return y/link_f - 1 def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): @@ -107,12 +105,7 @@ class Poisson(Likelihood): Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - hess = -y/(link_f**2) - return hess - #d2_df = self.gp_link.d2transf_df2(gp) - #transf = self.gp_link.transf(gp) - #return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df + return -y/(link_f**2) def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): """ @@ -129,7 +122,6 @@ class Poisson(Likelihood): :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape d3lik_dlink3 = 2*y/(link_f)**3 return d3lik_dlink3 diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py index 745ce9e8..131c7e63 100644 --- a/GPy/likelihoods/student_t.py +++ b/GPy/likelihoods/student_t.py @@ -86,7 +86,6 @@ class StudentT(Likelihood): :rtype: float """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape e = y - inv_link_f #FIXME: #Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?! @@ -97,7 +96,7 @@ class StudentT(Likelihood): - 0.5*np.log(self.sigma2 * self.v * np.pi) - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) ) - return np.sum(objective) + return objective def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): """ @@ -115,7 +114,6 @@ class StudentT(Likelihood): :rtype: Nx1 array """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape e = y - inv_link_f grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) return grad @@ -141,7 +139,6 @@ class StudentT(Likelihood): Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape e = y - inv_link_f hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) return hess @@ -161,7 +158,6 @@ class StudentT(Likelihood): :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape e = y - inv_link_f d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / ((e**2 + self.sigma2*self.v)**3) @@ -183,10 +179,9 @@ class StudentT(Likelihood): :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: float """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape e = y - inv_link_f dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) - return np.sum(dlogpdf_dvar) + return dlogpdf_dvar def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None): """ @@ -203,7 +198,6 @@ class StudentT(Likelihood): :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: Nx1 array """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape e = y - inv_link_f dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) return dlogpdf_dlink_dvar @@ -223,7 +217,6 @@ class StudentT(Likelihood): :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :rtype: Nx1 array """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape e = y - inv_link_f d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) / ((self.sigma2*self.v + (e**2))**3) @@ -246,7 +239,7 @@ class StudentT(Likelihood): return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) def predictive_mean(self, mu, sigma, Y_metadata=None): - # The comment here confuses mean and median. + # The comment here confuses mean and median. return self.gp_link.transf(mu) # only true if link is monotonic, which it is. def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py index 3fe01c46..eb33bba4 100644 --- a/GPy/testing/likelihood_tests.py +++ b/GPy/testing/likelihood_tests.py @@ -362,7 +362,7 @@ class TestNoiseModels(object): def t_dlogpdf_df(self, model, Y, f): print("\n{}".format(inspect.stack()[0][3])) self.description = "\n{}".format(inspect.stack()[0][3]) - logpdf = functools.partial(model.logpdf, y=Y) + logpdf = functools.partial(np.sum(model.logpdf), y=Y) dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y) grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g') grad.randomize() @@ -652,9 +652,9 @@ class LaplaceTests(unittest.TestCase): print(m2) optimizer = 'scg' print("Gaussian") - m1.optimize(optimizer, messages=debug) - print("Laplace Gaussian") - m2.optimize(optimizer, messages=debug) + m1.optimize(optimizer, messages=debug, ipython_notebook=False) + print ("Laplace Gaussian") + m2.optimize(optimizer, messages=debug, ipython_notebook=False) if debug: print(m1) print(m2)