diff --git a/GPy/inference/optimization/optimization.py b/GPy/inference/optimization/optimization.py index aa9be793..5aa2ed03 100644 --- a/GPy/inference/optimization/optimization.py +++ b/GPy/inference/optimization/optimization.py @@ -140,6 +140,10 @@ class opt_lbfgsb(Optimizer): self.funct_eval = opt_result[2]['funcalls'] self.status = rcstrings[opt_result[2]['warnflag']] + #a more helpful error message is available in opt_result in the Error case + if opt_result[2]['warnflag']==2: + self.status = 'Error' + opt_result[2]['task'] + class opt_simplex(Optimizer): def __init__(self, *args, **kwargs): Optimizer.__init__(self, *args, **kwargs) diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index f5690aa4..2febda96 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -2,10 +2,10 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf +from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf + import link_functions from likelihood import Likelihood -from scipy import stats class Bernoulli(Likelihood): """ @@ -81,19 +81,18 @@ class Bernoulli(Likelihood): if isinstance(self.gp_link, link_functions.Probit): if gh_points is None: - gh_x, gh_w = np.polynomial.hermite.hermgauss(20) + gh_x, gh_w = self._gh_points() else: gh_x, gh_w = gh_points - from scipy import stats shape = m.shape m,v,Y = m.flatten(), v.flatten(), Y.flatten() Ysign = np.where(Y==1,1,-1) X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + (m*Ysign)[:,None] - p = stats.norm.cdf(X) + p = std_norm_cdf(X) p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability - N = stats.norm.pdf(X) + N = std_norm_pdf(X) F = np.log(p).dot(gh_w) NoverP = N/p dF_dm = (NoverP*Ysign[:,None]).dot(gh_w) @@ -106,10 +105,10 @@ class Bernoulli(Likelihood): def predictive_mean(self, mu, variance, Y_metadata=None): if isinstance(self.gp_link, link_functions.Probit): - return stats.norm.cdf(mu/np.sqrt(1+variance)) + return std_norm_cdf(mu/np.sqrt(1+variance)) elif isinstance(self.gp_link, link_functions.Heaviside): - return stats.norm.cdf(mu/np.sqrt(variance)) + return std_norm_cdf(mu/np.sqrt(variance)) else: raise NotImplementedError diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index 4f3f2e37..9f2f3e7a 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt) +# Copyright (c) 2012-2015 The GPy authors (see AUTHORS.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np @@ -165,6 +165,13 @@ class Likelihood(Parameterized): return z, mean, variance + #only compute gh points if required + __gh_points = None + def _gh_points(self): + if self.__gh_points is None: + self.__gh_points = np.polynomial.hermite.hermgauss(20) + return self.__gh_points + def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None): """ Use Gauss-Hermite Quadrature to compute @@ -177,10 +184,9 @@ class Likelihood(Parameterized): if no gh_points are passed, we construct them using defualt options """ - #May be broken if gh_points is None: - gh_x, gh_w = np.polynomial.hermite.hermgauss(20) + gh_x, gh_w = self._gh_points() else: gh_x, gh_w = gh_points diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py index a4ddc760..6b297f92 100644 --- a/GPy/likelihoods/link_functions.py +++ b/GPy/likelihoods/link_functions.py @@ -1,10 +1,9 @@ -# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt) +# Copyright (c) 2012-2015 The GPy authors (see AUTHORS.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from scipy import stats +from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf import scipy as sp -from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf _exp_lim_val = np.finfo(np.float64).max _lim_val = np.log(_exp_lim_val) @@ -64,13 +63,12 @@ class Identity(GPTransformation): def d3transf_df3(self,f): return np.zeros_like(f) - class Probit(GPTransformation): """ .. math:: g(f) = \\Phi^{-1} (mu) - + """ def transf(self,f): return std_norm_cdf(f) @@ -79,13 +77,10 @@ class Probit(GPTransformation): return std_norm_pdf(f) def d2transf_df2(self,f): - #FIXME return -f * std_norm_pdf(f) def d3transf_df3(self,f): - #FIXME - f2 = f**2 - return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2) + return (np.square(f)-1.)*std_norm_pdf(f) class Cloglog(GPTransformation): @@ -98,7 +93,7 @@ class Cloglog(GPTransformation): or f = \log (-\log(1-p)) - + """ def transf(self,f): return 1-np.exp(-np.exp(f)) @@ -123,16 +118,16 @@ class Log(GPTransformation): """ def transf(self,f): - return np.exp(np.clip(f, -_lim_val, _lim_val)) + return np.exp(np.clip(f, -np.inf, _lim_val)) def dtransf_df(self,f): - return np.exp(np.clip(f, -_lim_val, _lim_val)) + return np.exp(np.clip(f, -np.inf, _lim_val)) def d2transf_df2(self,f): - return np.exp(np.clip(f, -_lim_val, _lim_val)) + return np.exp(np.clip(f, -np.inf, _lim_val)) def d3transf_df3(self,f): - return np.exp(np.clip(f, -_lim_val, _lim_val)) + return np.exp(np.clip(f, -np.inf, _lim_val)) class Log_ex_1(GPTransformation): """ @@ -174,7 +169,7 @@ class Heaviside(GPTransformation): .. math:: - g(f) = I_{x \\in A} + g(f) = I_{x \\geq 0} """ def transf(self,f): diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 559014f7..5950de08 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -476,7 +476,7 @@ class GradientTests(np.testing.TestCase): likelihood = GPy.likelihoods.MixedNoise(likelihoods_list=likelihoods_list) m = GPy.core.SparseGP(X, Y, X[np.random.choice(num_obs, 10)], kern, likelihood, - GPy.inference.latent_function_inference.VarDTC(), + inference_method=GPy.inference.latent_function_inference.VarDTC(), Y_metadata=Y_metadata) self.assertTrue(m.checkgrad()) diff --git a/GPy/util/misc.py b/GPy/util/misc.py index 99bd62b3..84bf4dc1 100644 --- a/GPy/util/misc.py +++ b/GPy/util/misc.py @@ -23,7 +23,7 @@ def chain_1(df_dg, dg_dx): """ if np.all(dg_dx==1.): return df_dg - if len(df_dg) > 1 and df_dg.shape[-1] > 1: + if len(df_dg) > 1 and len(df_dg.shape)>1 and df_dg.shape[-1] > 1: import ipdb; ipdb.set_trace() # XXX BREAKPOINT raise NotImplementedError('Not implemented for matricies yet') return df_dg * dg_dx @@ -37,7 +37,7 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2): """ if np.all(dg_dx==1.) and np.all(d2g_dx2 == 0): return d2f_dg2 - if len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1: + if len(d2f_dg2) > 1 and len(d2f_dg2.shape)>1 and d2f_dg2.shape[-1] > 1: raise NotImplementedError('Not implemented for matricies yet') #dg_dx_2 = np.clip(dg_dx, 1e-12, _lim_val_square)**2 dg_dx_2 = dg_dx**2 diff --git a/GPy/util/univariate_Gaussian.py b/GPy/util/univariate_Gaussian.py index 09b2e99c..79864f86 100644 --- a/GPy/util/univariate_Gaussian.py +++ b/GPy/util/univariate_Gaussian.py @@ -1,77 +1,15 @@ # Copyright (c) 2012, 2013 Ricardo Andrade +# Copyright (c) 2015 James Hensman # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np from scipy import weave +from scipy.special import ndtr as std_norm_cdf +#define a standard normal pdf +_sqrt_2pi = np.sqrt(2*np.pi) def std_norm_pdf(x): - """Standard Gaussian density function""" - return 1./np.sqrt(2.*np.pi)*np.exp(-.5*x**2) - -def std_norm_cdf(x): - """ - Cumulative standard Gaussian distribution - Based on Abramowitz, M. and Stegun, I. (1970) - """ - x_shape = np.asarray(x).shape - - if len(x_shape) == 0 or x_shape[0] == 1: - sign = np.sign(x) - x *= sign - x /= np.sqrt(2.) - t = 1.0/(1.0 + 0.3275911*x) - erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429))))) - cdf_x = 0.5*(1.0 + sign*erf) - return cdf_x - else: - x = np.atleast_1d(x).copy() - cdf_x = np.zeros_like(x) - sign = np.ones_like(x) - neg_x_ind = x<0 - sign[neg_x_ind] = -1.0 - x[neg_x_ind] = -x[neg_x_ind] - x /= np.sqrt(2.) - t = 1.0/(1.0 + 0.3275911*x) - erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429))))) - cdf_x = 0.5*(1.0 + sign*erf) - cdf_x = cdf_x.reshape(x_shape) - return cdf_x - -def std_norm_cdf_weave(x): - """ - Cumulative standard Gaussian distribution - Based on Abramowitz, M. and Stegun, I. (1970) - - A weave implementation of std_norm_cdf, which is faster. this is unused, - because of the difficulties of a weave dependency. (see github issue #94) - - """ - #Generalize for many x - x = np.asarray(x).copy() - cdf_x = np.zeros_like(x) - N = x.size - support_code = "#include " - code = """ - - double sign, t, erf; - for (int i=0; i