diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py index efd9476f..6eaaf96c 100644 --- a/GPy/core/mapping.py +++ b/GPy/core/mapping.py @@ -34,7 +34,7 @@ class Mapping(Parameterized): raise NotImplementedError def df_dtheta(self, dL_df, X): - """The gradient of the outputs of the multi-layer perceptron with respect to each of the parameters. + """The gradient of the outputs of the mapping with respect to each of the parameters. :param dL_df: gradient of the objective with respect to the function. :type dL_df: ndarray (num_data x output_dim) @@ -50,7 +50,7 @@ class Mapping(Parameterized): """ Plots the mapping associated with the model. - In one dimension, the function is plotted. - - In two dimsensions, a contour-plot shows the function + - In two dimensions, a contour-plot shows the function - In higher dimensions, we've not implemented this yet !TODO! Can plot only part of the data and part of the posterior functions diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index 12315a29..9ba3f83f 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -1,4 +1,4 @@ -# Copyright (c) 2013, GPy authors (see AUTHORS.txt). +# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) # #Parts of this file were influenced by the Matlab GPML framework written by @@ -91,7 +91,11 @@ class Laplace(object): iteration = 0 while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter: W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata) + if np.any(np.isnan(W)): + raise ValueError('One or more element(s) of W is NaN') grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata) + if np.any(np.isnan(grad)): + raise ValueError('One or more element(s) of grad is NaN') W_f = W*f @@ -141,25 +145,30 @@ class Laplace(object): """ #At this point get the hessian matrix (or vector as W is diagonal) W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata) + if np.any(np.isnan(W)): + raise ValueError('One or more element(s) of W is NaN') K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave) #compute vital matrices C = np.dot(LiW12, K) - Ki_W_i = K - C.T.dot(C) #Could this be wrong? + Ki_W_i = K - C.T.dot(C) #compute the log marginal log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L))) - #Compute vival matrices for derivatives + # Compute matrices for derivatives dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat - dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9. + if np.any(np.isnan(dW_df)): + raise ValueError('One or more element(s) of dW_df is NaN') + + dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) # s2 in R&W p126 line 9. #BiK, _ = dpotrs(L, K, lower=1) #dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i) #################### - #compute dL_dK# + # compute dL_dK # #################### if kern.size > 0 and not kern.is_fixed: #Explicit @@ -202,12 +211,12 @@ class Laplace(object): def _compute_B_statistics(self, K, W, log_concave): """ Rasmussen suggests the use of a numerically stable positive definite matrix B - Which has a positive diagonal element and can be easyily inverted + Which has a positive diagonal elements and can be easily inverted :param K: Prior Covariance matrix evaluated at locations X :type K: NxN matrix :param W: Negative hessian at a point (diagonal matrix) - :type W: Vector of diagonal values of hessian (1xN) + :type W: Vector of diagonal values of Hessian (1xN) :returns: (W12BiW12, L_B, Li_W12) """ if not log_concave: @@ -218,7 +227,8 @@ class Laplace(object): # If the likelihood is non-log-concave. We wan't to say that there is a negative variance # To cause the posterior to become less certain than the prior and likelihood, # This is a property only held by non-log-concave likelihoods - + if np.any(np.isnan(W)): + raise ValueError('One or more element(s) of W is NaN') #W is diagonal so its sqrt is just the sqrt of the diagonal elements W_12 = np.sqrt(W) B = np.eye(K.shape[0]) + W_12*K*W_12.T diff --git a/GPy/kern/_src/symbolic.py b/GPy/kern/_src/symbolic.py index 4f373fae..c7bbae73 100644 --- a/GPy/kern/_src/symbolic.py +++ b/GPy/kern/_src/symbolic.py @@ -94,8 +94,8 @@ class Symbolic(Kern): val = 1.0 # TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class. if param is not None: - if param.has_key(theta): - val = param[theta] + if param.has_key(theta.name): + val = param[theta.name] setattr(self, theta.name, Param(theta.name, val, None)) self.add_parameters(getattr(self, theta.name)) diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index cfdfaf72..cf3f4287 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -4,7 +4,11 @@ from gaussian import Gaussian from gamma import Gamma from poisson import Poisson from student_t import StudentT +from sstudent_t import SstudentT from likelihood import Likelihood from mixed_noise import MixedNoise from symbolic import Symbolic from negative_binomial import Negative_binomial +from skew_normal import Skew_normal +from skew_exponential import Skew_exponential +from null_category import Null_category diff --git a/GPy/likelihoods/symbolic.py b/GPy/likelihoods/symbolic.py index ddc430dc..5d052119 100644 --- a/GPy/likelihoods/symbolic.py +++ b/GPy/likelihoods/symbolic.py @@ -16,7 +16,6 @@ from GPy.util.functions import cum_gaussian, ln_cum_gaussian from likelihood import Likelihood from ..core.parameterization import Param -func_modules = ['numpy', {'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma, 'cum_gaussian':cum_gaussian, 'ln_cum_gaussian':ln_cum_gaussian}] if sympy_available: class Symbolic(Likelihood): @@ -26,55 +25,73 @@ if sympy_available: Likelihood where the form of the likelihood is provided by a sympy expression. """ - def __init__(self, pdf=None, log_pdf=None, cdf=None, logZ=None, gp_link=None, name='symbolic', log_concave=False, param=None): + def __init__(self, log_pdf=None, logZ=None, missing_log_pdf=None, gp_link=None, name='symbolic', log_concave=False, param=None, func_modules=[]): + if gp_link is None: gp_link = link_functions.Identity() - if pdf is None and log_pdf is None and cdf is None: - raise ValueError, "You must provide an argument for the pdf or the log pdf." + if log_pdf is None: + raise ValueError, "You must provide an argument for the log pdf." + + self.func_modules = func_modules + [{'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma, 'cum_gaussian':cum_gaussian, 'ln_cum_gaussian':ln_cum_gaussian}, 'numpy'] super(Symbolic, self).__init__(gp_link, name=name) - - if pdf is None and log_pdf: - self._sp_pdf = sym.exp(log_pdf).simplify() - self._sp_log_pdf = log_pdf - - if log_pdf is None and pdf: - self._sp_pdf = pdf - self._sp_log_pdf = sym.log(pdf).simplify() - - # TODO: build pdf and log pdf from CDF or - # compute CDF given pdf/log-pdf. Also check log - # pdf, pdf and CDF are consistent. + self.missing_data = False + self._sym_log_pdf = log_pdf + if missing_log_pdf: + self.missing_data = True + self._sym_missing_log_pdf = missing_log_pdf # pull the variable names out of the symbolic pdf - sp_vars = [e for e in self._sp_pdf.atoms() if e.is_Symbol] - self._sp_f = [e for e in sp_vars if e.name=='f'] - if not self._sp_f: - raise ValueError('No variable f in pdf or log pdf.') - self._sp_y = [e for e in sp_vars if e.name=='y'] - if not self._sp_f: - raise ValueError('No variable y in pdf or log pdf.') - self._sp_theta = sorted([e for e in sp_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name) + sym_vars = [e for e in self._sym_log_pdf.atoms() if e.is_Symbol] + self._sym_f = [e for e in sym_vars if e.name=='f'] + if not self._sym_f: + raise ValueError('No variable f in log pdf.') + self._sym_y = [e for e in sym_vars if e.name=='y'] + if not self._sym_y: + raise ValueError('No variable y in log pdf.') + self._sym_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name) + + theta_names = [theta.name for theta in self._sym_theta] + if self.missing_data: + # pull the variable names out of missing data + sym_vars = [e for e in self._sym_missing_log_pdf.atoms() if e.is_Symbol] + sym_f = [e for e in sym_vars if e.name=='f'] + if not sym_f: + raise ValueError('No variable f in missing log pdf.') + sym_y = [e for e in sym_vars if e.name=='y'] + if sym_y: + raise ValueError('Data is present in missing data portion of likelihood.') + # additional missing data parameters + missing_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='missing' or e.name in theta_names)],key=lambda e:e.name) + self._sym_theta += missing_theta + self._sym_theta = sorted(self._sym_theta, key=lambda e:e.name) # These are all the arguments need to compute likelihoods. - self.arg_list = self._sp_y + self._sp_f + self._sp_theta + self.arg_list = self._sym_y + self._sym_f + self._sym_theta # these are arguments for computing derivatives. - derivative_arguments = self._sp_f + self._sp_theta + derivative_arguments = self._sym_f + self._sym_theta # Do symbolic work to compute derivatives. - self._log_pdf_derivatives = {theta.name : sym.diff(self._sp_log_pdf,theta).simplify() for theta in derivative_arguments} + self._log_pdf_derivatives = {theta.name : sym.diff(self._sym_log_pdf,theta).simplify() for theta in derivative_arguments} self._log_pdf_second_derivatives = {theta.name : sym.diff(self._log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments} self._log_pdf_third_derivatives = {theta.name : sym.diff(self._log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} + if self.missing_data: + # Do symbolic work to compute derivatives. + self._missing_log_pdf_derivatives = {theta.name : sym.diff(self._sym_missing_log_pdf,theta).simplify() for theta in derivative_arguments} + self._missing_log_pdf_second_derivatives = {theta.name : sym.diff(self._missing_log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments} + self._missing_log_pdf_third_derivatives = {theta.name : sym.diff(self._missing_log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} + + # Add parameters to the model. - for theta in self._sp_theta: + for theta in self._sym_theta: val = 1.0 # TODO: need to decide how to handle user passing values for the se parameter vectors. if param is not None: - if param.has_key(theta): - val = param[theta] + if param.has_key(theta.name): + val = param[theta.name] setattr(self, theta.name, Param(theta.name, val, None)) self.add_parameters(getattr(self, theta.name)) @@ -93,14 +110,18 @@ if sympy_available: """Generate the code from the symbolic parts that will be used for likleihod computation.""" # TODO: Check here whether theano is available and set up # functions accordingly. - self._pdf_function = lambdify(self.arg_list, self._sp_pdf, func_modules) - self._log_pdf_function = lambdify(self.arg_list, self._sp_log_pdf, func_modules) + self._log_pdf_function = lambdify(self.arg_list, self._sym_log_pdf, self.func_modules) # compute code for derivatives (for implicit likelihood terms # we need up to 3rd derivatives) - setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], func_modules) for key in self._log_pdf_derivatives.keys()}) - setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], func_modules) for key in self._log_pdf_second_derivatives.keys()}) - setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], func_modules) for key in self._log_pdf_third_derivatives.keys()}) + setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], self.func_modules) for key in self._log_pdf_derivatives.keys()}) + setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], self.func_modules) for key in self._log_pdf_second_derivatives.keys()}) + setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], self.func_modules) for key in self._log_pdf_third_derivatives.keys()}) + + if self.missing_data: + setattr(self, '_missing_first_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_derivatives[key], self.func_modules) for key in self._missing_log_pdf_derivatives.keys()}) + setattr(self, '_missing_second_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_second_derivatives[key], self.func_modules) for key in self._missing_log_pdf_second_derivatives.keys()}) + setattr(self, '_missing_third_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_third_derivatives[key], self.func_modules) for key in self._missing_log_pdf_third_derivatives.keys()}) # TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta @@ -121,7 +142,7 @@ if sympy_available: # computed in the inference code. TODO: Thought: How does this # effect EP? Shouldn't this be done by a separate # Laplace-approximation specific call? - for grad, theta in zip(grads, self._sp_theta): + for grad, theta in zip(grads, self._sym_theta): parameter = getattr(self, theta.name) setattr(parameter, 'gradient', grad) @@ -131,11 +152,11 @@ if sympy_available: # need to do a lot of precomputation to ensure that the # likelihoods and gradients are computed together, then check # for parameter changes before updating. - for i, fvar in enumerate(self._sp_f): + for i, fvar in enumerate(self._sym_f): self._arguments[fvar.name] = f - for i, yvar in enumerate(self._sp_y): + for i, yvar in enumerate(self._sym_y): self._arguments[yvar.name] = y - for theta in self._sp_theta: + for theta in self._sym_theta: self._arguments[theta.name] = np.asarray(getattr(self, theta.name)) def pdf_link(self, inv_link_f, y, Y_metadata=None): @@ -150,10 +171,7 @@ if sympy_available: :returns: likelihood evaluated for this point :rtype: float """ - assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape - self._arguments_update(inv_link_f, y) - l = self._pdf_function(**self._arguments) - return np.prod(l) + return np.exp(self.logpdf_link(inv_link_f, y, Y_metadata=None)) def logpdf_link(self, inv_link_f, y, Y_metadata=None): """ @@ -170,7 +188,10 @@ if sympy_available: """ assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - ll = self._log_pdf_function(**self._arguments) + if self.missing_data: + ll = np.where(np.isnan(y), self._missing_log_pdf_function(**self._missing_arguments), self._log_pdf_function(**self._arguments)) + else: + ll = np.where(np.isnan(y), 0., self._log_pdf_function(**self._arguments)) return np.sum(ll) def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): @@ -188,7 +209,10 @@ if sympy_available: """ assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - return self._first_derivative_code['f'](**self._arguments) + if self.missing_data: + return np.where(np.isnan(y), self._missing_first_derivative_code['f'](**self._missing_argments), self._first_derivative_code['f'](**self._argments)) + else: + return np.where(np.isnan(y), 0., self._first_derivative_code['f'](**self._arguments)) def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): """ @@ -212,36 +236,50 @@ if sympy_available: """ assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - return self._second_derivative_code['f'](**self._arguments) + if self.missing_data: + return np.where(np.isnan(y), self._missing_second_derivative_code['f'](**self._missing_argments), self._second_derivative_code['f'](**self._argments)) + else: + return np.where(np.isnan(y), 0., self._second_derivative_code['f'](**self._arguments)) def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - return self._third_derivative_code['f'](**self._arguments) - raise NotImplementedError + if self.missing_data: + return np.where(np.isnan(y), self._missing_third_derivative_code['f'](**self._missing_argments), self._third_derivative_code['f'](**self._argments)) + else: + return np.where(np.isnan(y), 0., self._third_derivative_code['f'](**self._arguments)) def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None): assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - g = np.zeros((y.shape[0], len(self._sp_theta))) - for i, theta in enumerate(self._sp_theta): - g[:, i:i+1] = self._first_derivative_code[theta.name](**self._arguments) + g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta))) + for i, theta in enumerate(self._sym_theta): + if self.missing_data: + g[:, i:i+1] = np.where(np.isnan(y), self._missing_first_derivative_code[theta.name](**self._arguments), self._first_derivative_code[theta.name](**self._arguments)) + else: + g[:, i:i+1] = np.where(np.isnan(y), 0., self._first_derivative_code[theta.name](**self._arguments)) return g.sum(0) def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None): assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - g = np.zeros((y.shape[0], len(self._sp_theta))) - for i, theta in enumerate(self._sp_theta): - g[:, i:i+1] = self._second_derivative_code[theta.name](**self._arguments) + g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta))) + for i, theta in enumerate(self._sym_theta): + if self.missing_data: + g[:, i:i+1] = np.where(np.isnan(y), self._missing_second_derivative_code[theta.name](**self._arguments), self._second_derivative_code[theta.name](**self._arguments)) + else: + g[:, i:i+1] = np.where(np.isnan(y), 0., self._second_derivative_code[theta.name](**self._arguments)) return g def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None): assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) - g = np.zeros((y.shape[0], len(self._sp_theta))) - for i, theta in enumerate(self._sp_theta): - g[:, i:i+1] = self._third_derivative_code[theta.name](**self._arguments) + g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta))) + for i, theta in enumerate(self._sym_theta): + if self.missing_data: + g[:, i:i+1] = np.where(np.isnan(y), self._missing_third_derivative_code[theta.name](**self._arguments), self._third_derivative_code[theta.name](**self._arguments)) + else: + g[:, i:i+1] = np.where(np.isnan(y), 0., self._third_derivative_code[theta.name](**self._arguments)) return g def predictive_mean(self, mu, sigma, Y_metadata=None): diff --git a/GPy/util/functions.py b/GPy/util/functions.py new file mode 100644 index 00000000..a9ee1084 --- /dev/null +++ b/GPy/util/functions.py @@ -0,0 +1,18 @@ +import numpy as np +from scipy.special import erf, erfcx +import sys +epsilon = sys.float_info.epsilon +lim_val = -np.log(epsilon) + +def cum_gaussian(x): + g=0.5*(1+erf(x/np.sqrt(2))) + return np.where(g==0, epsilon, np.where(g==1, 1-epsilon, g)) + +def ln_cum_gaussian(x): + return np.where(x < 0, -.5*x*x + np.log(.5) + np.log(erfcx(-np.sqrt(2)/2*x)), np.log(cum_gaussian(x))) + +def clip_exp(x): + if any(x>=lim_val) or any(x<=-lim_val): + return np.where(x-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val)) + else: + return np.exp(x) diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py index 5074a42c..5b3ac312 100644 --- a/GPy/util/symbolic.py +++ b/GPy/util/symbolic.py @@ -18,13 +18,16 @@ class ln_cum_gaussian(Function): def fdiff(self, argindex=1): x = self.args[0] - return 1/cum_gaussian(x)*gaussian(x) + return exp(-ln_cum_gaussian(x) - 0.5*x*x)/sqrt(2*pi) @classmethod def eval(cls, x): if x.is_Number: return log(cum_gaussian(x)) + def _eval_is_real(self): + return self.args[0].is_real + class cum_gaussian(Function): nargs = 1 def fdiff(self, argindex=1): @@ -36,12 +39,17 @@ class cum_gaussian(Function): if x.is_Number: return 0.5*(1+erf(sqrt(2)/2*x)) + def _eval_is_real(self): + return self.args[0].is_real + class gaussian(Function): nargs = 1 @classmethod def eval(cls, x): return 1/sqrt(2*pi)*exp(-0.5*x*x) + def _eval_is_real(self): + return True class ln_diff_erf(Function): nargs = 2