From 9b5a1edb23061b16c79bcb9961f120a210053ca9 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Mon, 7 Apr 2014 10:31:13 +0200 Subject: [PATCH] Partial changes to symbolic, including adding mapping covariance and beginning to unify code generation. --- GPy/core/mapping.py | 8 ++ GPy/kern/__init__.py | 12 ++- GPy/kern/_src/symbolic.py | 154 +++++++++++++++------------ GPy/likelihoods/negative_binomial.py | 6 +- GPy/likelihoods/symbolic.py | 56 ++++++---- GPy/mappings/__init__.py | 12 ++- GPy/mappings/linear.py | 19 ++-- GPy/util/functions.py | 21 ++-- GPy/util/symbolic.py | 96 +++++++++++++---- 9 files changed, 252 insertions(+), 132 deletions(-) diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py index 6eaaf96c..8cfd9ad0 100644 --- a/GPy/core/mapping.py +++ b/GPy/core/mapping.py @@ -65,6 +65,14 @@ class Mapping(Parameterized): else: raise NameError, "matplotlib package has not been imported." +class Bijective_mapping(Mapping): + """This is a mapping that is bijective, i.e. you can go from X to f and also back from f to X. The inverse mapping is called g().""" + def __init__(self, input_dim, output_dim, name='bijective_mapping'): + super(Bijective_apping, self).__init__(name=name) + + def g(self, f): + """Inverse mapping from output domain of the function to the inputs.""" + raise NotImplementedError from model import Model diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 37cd71ec..a04c16a5 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -3,7 +3,6 @@ from _src.rbf import RBF from _src.linear import Linear, LinearFull from _src.static import Bias, White from _src.brownian import Brownian -from _src.symbolic import Symbolic from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine from _src.mlp import MLP from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 @@ -12,3 +11,14 @@ from _src.coregionalize import Coregionalize from _src.ssrbf import SSRBF # TODO: ZD: did you remove this? from _src.ODE_UY import ODE_UY +# TODO: put this in an init file somewhere +try: + import sympy as sym + sympy_available=True +except ImportError: + sympy_available=False + +if sympy_available: + from _src.symbolic import Symbolic + from _src.heat_eqinit import Heat_eqinit + diff --git a/GPy/kern/_src/symbolic.py b/GPy/kern/_src/symbolic.py index c7bbae73..79abd91d 100644 --- a/GPy/kern/_src/symbolic.py +++ b/GPy/kern/_src/symbolic.py @@ -1,15 +1,17 @@ # Check Matthew Rocklin's blog post. try: - import sympy as sp + import sympy as sym sympy_available=True from sympy.utilities.lambdify import lambdify + from GPy.util.symbolic import stabilise except ImportError: sympy_available=False import numpy as np from kern import Kern +from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma +from GPy.util.functions import normcdf, normcdfln, logistic, logisticln from ...core.parameterization import Param -from ...core.parameterization.transformations import Logexp class Symbolic(Kern): """ @@ -26,28 +28,41 @@ class Symbolic(Kern): - to handle multiple inputs, call them x_1, z_1, etc - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. """ - def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None): + def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None, func_modules=[]): if k is None: raise ValueError, "You must provide an argument for the covariance function." - super(Sympykern, self).__init__(input_dim, active_dims, name) - self._sp_k = k + self.func_modules = func_modules + self.func_modules += [{'gamma':gamma, + 'gammaln':gammaln, + 'erf':erf, 'erfc':erfc, + 'erfcx':erfcx, + 'polygamma':polygamma, + 'normcdf':normcdf, + 'normcdfln':normcdfln, + 'logistic':logistic, + 'logisticln':logisticln}, + 'numpy'] + + super(Symbolic, self).__init__(input_dim, active_dims, name) + + self._sym_k = k # pull the variable names out of the symbolic covariance function. - sp_vars = [e for e in k.atoms() if e.is_Symbol] - self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:])) - self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:])) + sym_vars = [e for e in k.atoms() if e.is_Symbol] + self._sym_x= sorted([e for e in sym_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:])) + self._sym_z= sorted([e for e in sym_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:])) # Check that variable names make sense. - assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)]) - assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)]) - assert len(self._sp_x)==len(self._sp_z) - x_dim=len(self._sp_x) + assert all([x.name=='x_%i'%i for i,x in enumerate(self._sym_x)]) + assert all([z.name=='z_%i'%i for i,z in enumerate(self._sym_z)]) + assert len(self._sym_x)==len(self._sym_z) + x_dim=len(self._sym_x) - self._sp_kdiag = k - for x, z in zip(self._sp_x, self._sp_z): - self._sp_kdiag = self._sp_kdiag.subs(z, x) + self._sym_kdiag = k + for x, z in zip(self._sym_x, self._sym_z): + self._sym_kdiag = self._sym_kdiag.subs(z, x) # If it is a multi-output covariance, add an input for indexing the outputs. self._real_input_dim = x_dim @@ -56,22 +71,22 @@ class Symbolic(Kern): self.output_dim = output_dim # extract parameter names from the covariance - thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name) + thetas = sorted([e for e in sym_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name) # Look for parameters with index (subscripts), they are associated with different outputs. if self.output_dim>1: - self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name) - self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name) + self._sym_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name) + self._sym_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name) # Make sure parameter appears with both indices! - assert len(self._sp_theta_i)==len(self._sp_theta_j) - assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)]) + assert len(self._sym_theta_i)==len(self._sym_theta_j) + assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j)]) # Extract names of shared parameters (those without a subscript) - self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j] + self._sym_theta = [theta for theta in thetas if theta not in self._sym_theta_i and theta not in self._sym_theta_j] - self.num_split_params = len(self._sp_theta_i) - self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i] + self.num_split_params = len(self._sym_theta_i) + self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sym_theta_i] # Add split parameters to the model. for theta in self._split_theta_names: # TODO: what if user has passed a parameter vector, how should that be stored and interpreted? @@ -79,18 +94,18 @@ class Symbolic(Kern): self.add_parameter(getattr(self, theta)) - self.num_shared_params = len(self._sp_theta) - for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j): - self._sp_kdiag = self._sp_kdiag.subs(theta_j, theta_i) + self.num_shared_params = len(self._sym_theta) + for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j): + self._sym_kdiag = self._sym_kdiag.subs(theta_j, theta_i) else: self.num_split_params = 0 self._split_theta_names = [] - self._sp_theta = thetas - self.num_shared_params = len(self._sp_theta) + self._sym_theta = thetas + self.num_shared_params = len(self._sym_theta) # Add parameters to the model. - for theta in self._sp_theta: + for theta in self._sym_theta: val = 1.0 # TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class. if param is not None: @@ -100,25 +115,25 @@ class Symbolic(Kern): self.add_parameters(getattr(self, theta.name)) # Differentiate with respect to parameters. - derivative_arguments = self._sp_x + self._sp_theta + derivative_arguments = self._sym_x + self._sym_theta if self.output_dim > 1: - derivative_arguments += self._sp_theta_i + derivative_arguments += self._sym_theta_i - self.derivatives = {theta.name : sp.diff(self._sp_k,theta).simplify() for theta in derivative_arguments} - self.diag_derivatives = {theta.name : sp.diff(self._sp_kdiag,theta).simplify() for theta in derivative_arguments} + self.derivatives = {theta.name : stabilise(sym.diff(self._sym_k,theta)) for theta in derivative_arguments} + self.diag_derivatives = {theta.name : stabilise(sym.diff(self._sym_kdiag,theta)) for theta in derivative_arguments} # This gives the parameters for the arg list. - self.arg_list = self._sp_x + self._sp_z + self._sp_theta - self.diag_arg_list = self._sp_x + self._sp_theta + self.arg_list = self._sym_x + self._sym_z + self._sym_theta + self.diag_arg_list = self._sym_x + self._sym_theta if self.output_dim > 1: - self.arg_list += self._sp_theta_i + self._sp_theta_j - self.diag_arg_list += self._sp_theta_i + self.arg_list += self._sym_theta_i + self._sym_theta_j + self.diag_arg_list += self._sym_theta_i # Check if there are additional linear operators on the covariance. - self._sp_operators = operators + self._sym_operators = operators # TODO: Deal with linear operators - #if self._sp_operators: - # for operator in self._sp_operators: + #if self._sym_operators: + # for operator in self._sym_operators: # psi_stats aren't yet implemented. if False: @@ -128,17 +143,14 @@ class Symbolic(Kern): self._gen_code() def __add__(self,other): - return spkern(self._sp_k+other._sp_k) + return spkern(self._sym_k+other._sym_k) def _gen_code(self): - #fn_theano = theano_function([self.arg_lists], [self._sp_k + self.derivatives], dims={x: 1}, dtypes={x_0: 'float64', z_0: 'float64'}) - self._K_function = lambdify(self.arg_list, self._sp_k, 'numpy') - for key in self.derivatives.keys(): - setattr(self, '_K_diff_' + key, lambdify(self.arg_list, self.derivatives[key], 'numpy')) - - self._Kdiag_function = lambdify(self.diag_arg_list, self._sp_kdiag, 'numpy') - for key in self.derivatives.keys(): - setattr(self, '_Kdiag_diff_' + key, lambdify(self.diag_arg_list, self.diag_derivatives[key], 'numpy')) + #fn_theano = theano_function([self.arg_lists], [self._sym_k + self.derivatives], dims={x: 1}, dtypes={x_0: 'float64', z_0: 'float64'}) + self._K_function = lambdify(self.arg_list, self._sym_k, self.func_modules) + self._K_derivatives_code = {key: lambdify(self.arg_list, self.derivatives[key], self.func_modules) for key in self.derivatives.keys()} + self._Kdiag_function = lambdify(self.diag_arg_list, self._sym_kdiag, self.func_modules) + self._Kdiag_derivatives_code = {key: lambdify(self.diag_arg_list, self.diag_derivatives[key], self.func_modules) for key in self.diag_derivatives.keys()} def K(self,X,X2=None): self._K_computations(X, X2) @@ -157,8 +169,8 @@ class Symbolic(Kern): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) gradients_X = np.zeros((X.shape[0], X.shape[1])) - for i, x in enumerate(self._sp_x): - gf = getattr(self, '_K_diff_' + x.name) + for i, x in enumerate(self._sym_x): + gf = self._K_derivatives_code[x.name] gradients_X[:, i] = (gf(**self._arguments)*dL_dK).sum(1) if X2 is None: gradients_X *= 2 @@ -167,25 +179,25 @@ class Symbolic(Kern): def gradients_X_diag(self, dL_dK, X): self._K_computations(X) dX = np.zeros_like(X) - for i, x in enumerate(self._sp_x): - gf = getattr(self, '_Kdiag_diff_' + x.name) + for i, x in enumerate(self._sym_x): + gf = self._Kdiag_derivatives_code[x.name] dX[:, i] = gf(**self._diag_arguments)*dL_dK return dX def update_gradients_full(self, dL_dK, X, X2=None): # Need to extract parameters to local variables first self._K_computations(X, X2) - for theta in self._sp_theta: + for theta in self._sym_theta: parameter = getattr(self, theta.name) - gf = getattr(self, '_K_diff_' + theta.name) + gf = self._K_derivatives_code[theta.name] gradient = (gf(**self._arguments)*dL_dK).sum() if X2 is not None: gradient += (gf(**self._reverse_arguments)*dL_dK).sum() setattr(parameter, 'gradient', gradient) if self.output_dim>1: - for theta in self._sp_theta_i: + for theta in self._sym_theta_i: parameter = getattr(self, theta.name[:-2]) - gf = getattr(self, '_K_diff_' + theta.name) + gf = self._K_derivatives_code[theta.name] A = gf(**self._arguments)*dL_dK gradient = np.asarray([A[np.where(self._output_ind==i)].T.sum() for i in np.arange(self.output_dim)]) @@ -200,14 +212,14 @@ class Symbolic(Kern): def update_gradients_diag(self, dL_dKdiag, X): self._K_computations(X) - for theta in self._sp_theta: + for theta in self._sym_theta: parameter = getattr(self, theta.name) - gf = getattr(self, '_Kdiag_diff_' + theta.name) + gf = self._Kdiag_derivatives_code[theta.name] setattr(parameter, 'gradient', (gf(**self._diag_arguments)*dL_dKdiag).sum()) if self.output_dim>1: - for theta in self._sp_theta_i: + for theta in self._sym_theta_i: parameter = getattr(self, theta.name[:-2]) - gf = getattr(self, '_Kdiag_diff_' + theta.name) + gf = self._Kdiag_derivatives_code[theta.name] a = gf(**self._diag_arguments)*dL_dKdiag setattr(parameter, 'gradient', np.asarray([a[np.where(self._output_ind==i)].sum() @@ -220,40 +232,40 @@ class Symbolic(Kern): # parameter updates here. self._arguments = {} self._diag_arguments = {} - for i, x in enumerate(self._sp_x): + for i, x in enumerate(self._sym_x): self._arguments[x.name] = X[:, i][:, None] self._diag_arguments[x.name] = X[:, i][:, None] if self.output_dim > 1: self._output_ind = np.asarray(X[:, -1], dtype='int') - for i, theta in enumerate(self._sp_theta_i): + for i, theta in enumerate(self._sym_theta_i): self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind])[:, None] self._diag_arguments[theta.name] = self._arguments[theta.name] - for theta in self._sp_theta: + for theta in self._sym_theta: self._arguments[theta.name] = np.asarray(getattr(self, theta.name)) self._diag_arguments[theta.name] = self._arguments[theta.name] if X2 is not None: - for i, z in enumerate(self._sp_z): + for i, z in enumerate(self._sym_z): self._arguments[z.name] = X2[:, i][None, :] if self.output_dim > 1: self._output_ind2 = np.asarray(X2[:, -1], dtype='int') - for i, theta in enumerate(self._sp_theta_j): + for i, theta in enumerate(self._sym_theta_j): self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind2])[None, :] else: - for z in self._sp_z: + for z in self._sym_z: self._arguments[z.name] = self._arguments['x_'+z.name[2:]].T if self.output_dim > 1: self._output_ind2 = self._output_ind - for theta in self._sp_theta_j: + for theta in self._sym_theta_j: self._arguments[theta.name] = self._arguments[theta.name[:-2] + '_i'].T if X2 is not None: # These arguments are needed in gradients when X2 is not equal to X. self._reverse_arguments = self._arguments - for x, z in zip(self._sp_x, self._sp_z): + for x, z in zip(self._sym_x, self._sym_z): self._reverse_arguments[x.name] = self._arguments[z.name].T self._reverse_arguments[z.name] = self._arguments[x.name].T if self.output_dim > 1: - for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j): + for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j): self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T @@ -265,7 +277,7 @@ if False: def __init__(self, subkerns, operations, name='sympy_combine'): super(Symcombine, self).__init__(subkerns, name) for subkern, operation in zip(subkerns, operations): - self._sp_k += self._k_double_operate(subkern._sp_k, operation) + self._sym_k += self._k_double_operate(subkern._sym_k, operation) #def _double_operate(self, k, operation): diff --git a/GPy/likelihoods/negative_binomial.py b/GPy/likelihoods/negative_binomial.py index 5bc5b727..1130375b 100644 --- a/GPy/likelihoods/negative_binomial.py +++ b/GPy/likelihoods/negative_binomial.py @@ -6,7 +6,7 @@ try: import sympy as sym sympy_available=True from sympy.utilities.lambdify import lambdify - from GPy.util.symbolic import gammaln, ln_cum_gaussian, cum_gaussian + from GPy.util.symbolic import gammaln, logisticln except ImportError: sympy_available=False @@ -33,12 +33,14 @@ if sympy_available: """ def __init__(self, gp_link=None): if gp_link is None: - gp_link = link_functions.Log() + gp_link = link_functions.Identity() dispersion = sym.Symbol('dispersion', positive=True, real=True) y = sym.Symbol('y', nonnegative=True, integer=True) f = sym.Symbol('f', positive=True, real=True) + gp_link = link_functions.Log() log_pdf=dispersion*sym.log(dispersion) - (dispersion+y)*sym.log(dispersion+f) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*sym.log(f) + #log_pdf= -(dispersion+y)*logisticln(f-sym.log(dispersion)) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*(f-sym.log(dispersion)) super(Negative_binomial, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Negative_binomial') # TODO: Check this. diff --git a/GPy/likelihoods/symbolic.py b/GPy/likelihoods/symbolic.py index 5d052119..65c90183 100644 --- a/GPy/likelihoods/symbolic.py +++ b/GPy/likelihoods/symbolic.py @@ -5,14 +5,15 @@ try: import sympy as sym sympy_available=True from sympy.utilities.lambdify import lambdify + from GPy.util.symbolic import stabilise except ImportError: sympy_available=False import numpy as np import link_functions from scipy import stats, integrate -from scipy.special import gammaln, gamma, erf, polygamma -from GPy.util.functions import cum_gaussian, ln_cum_gaussian +from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma +from GPy.util.functions import normcdf, normcdfln, logistic, logisticln from likelihood import Likelihood from ..core.parameterization import Param @@ -33,7 +34,17 @@ if sympy_available: if log_pdf is None: raise ValueError, "You must provide an argument for the log pdf." - self.func_modules = func_modules + [{'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma, 'cum_gaussian':cum_gaussian, 'ln_cum_gaussian':ln_cum_gaussian}, 'numpy'] + self.func_modules = func_modules + self.func_modules += [{'gamma':gamma, + 'gammaln':gammaln, + 'erf':erf, 'erfc':erfc, + 'erfcx':erfcx, + 'polygamma':polygamma, + 'normcdf':normcdf, + 'normcdfln':normcdfln, + 'logistic':logistic, + 'logisticln':logisticln}, + 'numpy'] super(Symbolic, self).__init__(gp_link, name=name) self.missing_data = False @@ -58,7 +69,7 @@ if sympy_available: sym_vars = [e for e in self._sym_missing_log_pdf.atoms() if e.is_Symbol] sym_f = [e for e in sym_vars if e.name=='f'] if not sym_f: - raise ValueError('No variable f in missing log pdf.') + raise ValueError('No variable f in missing data log pdf.') sym_y = [e for e in sym_vars if e.name=='y'] if sym_y: raise ValueError('Data is present in missing data portion of likelihood.') @@ -74,15 +85,15 @@ if sympy_available: derivative_arguments = self._sym_f + self._sym_theta # Do symbolic work to compute derivatives. - self._log_pdf_derivatives = {theta.name : sym.diff(self._sym_log_pdf,theta).simplify() for theta in derivative_arguments} - self._log_pdf_second_derivatives = {theta.name : sym.diff(self._log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments} - self._log_pdf_third_derivatives = {theta.name : sym.diff(self._log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} + self._log_pdf_derivatives = {theta.name : stabilise(sym.diff(self._sym_log_pdf,theta)) for theta in derivative_arguments} + self._log_pdf_second_derivatives = {theta.name : stabilise(sym.diff(self._log_pdf_derivatives['f'],theta)) for theta in derivative_arguments} + self._log_pdf_third_derivatives = {theta.name : stabilise(sym.diff(self._log_pdf_second_derivatives['f'],theta)) for theta in derivative_arguments} if self.missing_data: # Do symbolic work to compute derivatives. - self._missing_log_pdf_derivatives = {theta.name : sym.diff(self._sym_missing_log_pdf,theta).simplify() for theta in derivative_arguments} - self._missing_log_pdf_second_derivatives = {theta.name : sym.diff(self._missing_log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments} - self._missing_log_pdf_third_derivatives = {theta.name : sym.diff(self._missing_log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} + self._missing_log_pdf_derivatives = {theta.name : stabilise(sym.diff(self._sym_missing_log_pdf,theta)) for theta in derivative_arguments} + self._missing_log_pdf_second_derivatives = {theta.name : stabilise(sym.diff(self._missing_log_pdf_derivatives['f'],theta)) for theta in derivative_arguments} + self._missing_log_pdf_third_derivatives = {theta.name : stabilise(sym.diff(self._missing_log_pdf_second_derivatives['f'],theta)) for theta in derivative_arguments} # Add parameters to the model. @@ -96,7 +107,7 @@ if sympy_available: self.add_parameters(getattr(self, theta.name)) - # Is there some way to check whether the pdf is log + # TODO: Is there an easy way to check whether the pdf is log # concave? For the moment, need user to specify. self.log_concave = log_concave @@ -112,16 +123,15 @@ if sympy_available: # functions accordingly. self._log_pdf_function = lambdify(self.arg_list, self._sym_log_pdf, self.func_modules) - # compute code for derivatives (for implicit likelihood terms - # we need up to 3rd derivatives) - setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], self.func_modules) for key in self._log_pdf_derivatives.keys()}) - setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], self.func_modules) for key in self._log_pdf_second_derivatives.keys()}) - setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], self.func_modules) for key in self._log_pdf_third_derivatives.keys()}) + # compute code for derivatives + self._derivative_code = {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], self.func_modules) for key in self._log_pdf_derivatives.keys()} + self._second_derivative_code = {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], self.func_modules) for key in self._log_pdf_second_derivatives.keys()} + self._third_derivative_code = {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], self.func_modules) for key in self._log_pdf_third_derivatives.keys()} if self.missing_data: - setattr(self, '_missing_first_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_derivatives[key], self.func_modules) for key in self._missing_log_pdf_derivatives.keys()}) - setattr(self, '_missing_second_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_second_derivatives[key], self.func_modules) for key in self._missing_log_pdf_second_derivatives.keys()}) - setattr(self, '_missing_third_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_third_derivatives[key], self.func_modules) for key in self._missing_log_pdf_third_derivatives.keys()}) + self._missing_derivative_code = {key: lambdify(self.arg_list, self._missing_log_pdf_derivatives[key], self.func_modules) for key in self._missing_log_pdf_derivatives.keys()} + self._missing_second_derivative_code = {key: lambdify(self.arg_list, self._missing_log_pdf_second_derivatives[key], self.func_modules) for key in self._missing_log_pdf_second_derivatives.keys()} + self._missing_third_derivative_code = {key: lambdify(self.arg_list, self._missing_log_pdf_third_derivatives[key], self.func_modules) for key in self._missing_log_pdf_third_derivatives.keys()} # TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta @@ -210,9 +220,9 @@ if sympy_available: assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape self._arguments_update(inv_link_f, y) if self.missing_data: - return np.where(np.isnan(y), self._missing_first_derivative_code['f'](**self._missing_argments), self._first_derivative_code['f'](**self._argments)) + return np.where(np.isnan(y), self._missing_derivative_code['f'](**self._missing_argments), self._derivative_code['f'](**self._argments)) else: - return np.where(np.isnan(y), 0., self._first_derivative_code['f'](**self._arguments)) + return np.where(np.isnan(y), 0., self._derivative_code['f'](**self._arguments)) def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): """ @@ -255,9 +265,9 @@ if sympy_available: g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta))) for i, theta in enumerate(self._sym_theta): if self.missing_data: - g[:, i:i+1] = np.where(np.isnan(y), self._missing_first_derivative_code[theta.name](**self._arguments), self._first_derivative_code[theta.name](**self._arguments)) + g[:, i:i+1] = np.where(np.isnan(y), self._missing_derivative_code[theta.name](**self._arguments), self._derivative_code[theta.name](**self._arguments)) else: - g[:, i:i+1] = np.where(np.isnan(y), 0., self._first_derivative_code[theta.name](**self._arguments)) + g[:, i:i+1] = np.where(np.isnan(y), 0., self._derivative_code[theta.name](**self._arguments)) return g.sum(0) def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None): diff --git a/GPy/mappings/__init__.py b/GPy/mappings/__init__.py index 97573aba..33f1d6b0 100644 --- a/GPy/mappings/__init__.py +++ b/GPy/mappings/__init__.py @@ -1,7 +1,17 @@ -# Copyright (c) 2013, GPy authors (see AUTHORS.txt). +# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) from kernel import Kernel from linear import Linear from mlp import MLP #from rbf import RBF +# TODO need to fix this in a config file. +try: + import sympy as sym + sympy_available=True +except ImportError: + sympy_available=False + +if sympy_available: + # These are likelihoods that rely on symbolic. + from symbolic import Symbolic diff --git a/GPy/mappings/linear.py b/GPy/mappings/linear.py index 075b8556..24a45511 100644 --- a/GPy/mappings/linear.py +++ b/GPy/mappings/linear.py @@ -1,11 +1,11 @@ -# Copyright (c) 2013, GPy authors (see AUTHORS.txt). +# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from ..core.mapping import Mapping +from ..core.mapping import Bijective_mapping from ..core.parameterization import Param -class Linear(Mapping): +class Linear(Bijective_mapping): """ Mapping based on a linear model. @@ -20,8 +20,8 @@ class Linear(Mapping): """ - def __init__(self, input_dim=1, output_dim=1, name='linear_map'): - Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name) + def __init__(self, input_dim=1, output_dim=1, name='linear'): + Bijective_mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name) self.W = Param('W',np.array((self.input_dim, self.output_dim))) self.bias = Param('bias',np.array(self.output_dim)) self.add_parameters(self.W, self.bias) @@ -29,10 +29,15 @@ class Linear(Mapping): def f(self, X): return np.dot(X,self.W) + self.bias + def g(self, f): + V = np.linalg.solve(np.dot(self.W.T, self.W), W.T) + return np.dot(f-self.bias, V) + def df_dtheta(self, dL_df, X): df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T df_dbias = (dL_df.sum(0)) return np.hstack((df_dW.flatten(), df_dbias)) - def dL_dX(self, dL_df, X): - return (dL_df[:, None, :]*self.W[None, :, :]).sum(2) + def dL_dX(self, partial, X): + """The gradient of L with respect to the inputs to the mapping, where L is a function that is dependent on the output of the mapping, f.""" + return (partial[:, None, :]*self.W[None, :, :]).sum(2) diff --git a/GPy/util/functions.py b/GPy/util/functions.py index a9ee1084..1cb1f7cb 100644 --- a/GPy/util/functions.py +++ b/GPy/util/functions.py @@ -1,18 +1,21 @@ import numpy as np -from scipy.special import erf, erfcx +from scipy.special import erf, erfc, erfcx import sys epsilon = sys.float_info.epsilon lim_val = -np.log(epsilon) -def cum_gaussian(x): - g=0.5*(1+erf(x/np.sqrt(2))) +def logisticln(x): + return np.where(x-lim_val, -np.log(1+np.exp(-x)), -x), -np.log(1+epsilon)) + +def logistic(x): + return np.where(x-lim_val, 1/(1+np.exp(-x)), epsilon/(epsilon+1)), 1/(1+epsilon)) + +def normcdf(x): + g=0.5*erfc(-x/np.sqrt(2)) return np.where(g==0, epsilon, np.where(g==1, 1-epsilon, g)) -def ln_cum_gaussian(x): - return np.where(x < 0, -.5*x*x + np.log(.5) + np.log(erfcx(-np.sqrt(2)/2*x)), np.log(cum_gaussian(x))) +def normcdfln(x): + return np.where(x < 0, -.5*x*x + np.log(.5) + np.log(erfcx(-x/np.sqrt(2))), np.log(normcdf(x))) def clip_exp(x): - if any(x>=lim_val) or any(x<=-lim_val): - return np.where(x-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val)) - else: - return np.exp(x) + return np.where(x-lim_val, np.exp(x), epsilon), 1/epsilon) diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py index 5b3ac312..31aa1375 100644 --- a/GPy/util/symbolic.py +++ b/GPy/util/symbolic.py @@ -1,6 +1,56 @@ +import sympy as sym from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma,polygamma +def stabilise(e): + """Attempt to find the most numerically stable form of an expression""" + return e #sym.expand(e) + +class logistic(Function): + """The logistic function as a symbolic function.""" + nargs = 1 + def fdiff(self, argindex=1): + x = self.args[0] + return logistic(x)*(1-logistic(x)) + + @classmethod + def eval(cls, x): + if x.is_Number: + return 1/(1+exp(-x)) + +class logisticln(Function): + """The log logistic, which can often be computed with more precision than the simply taking log(logistic(x)) when x is small or large.""" + nargs = 1 + + def fdiff(self, argindex=1): + x = self.args[0] + return 1-logistic(x) + + @classmethod + def eval(cls, x): + if x.is_Number: + return -np.log(1+exp(-x)) + +class erfc(Function): + """The complementary error function, erfc(x) = 1-erf(x). Used as a helper function, particularly for erfcx, the scaled complementary error function. and the normal distributions cdf.""" + nargs = 1 + + @classmethod + def eval(cls, arg): + return 1-erf(arg) + +class erfcx(Function): + nargs = 1 + def fdiff(self, argindex=1): + x = self.args[0] + return x*erfcx(x)-2/sqrt(pi) + + @classmethod + def eval(cls, x): + if x.is_Number: + return exp(x**2)*erfc(x) + class gammaln(Function): + """The log of the gamma function, which is often needed instead of log(gamma(x)) for better accuracy for large x.""" nargs = 1 def fdiff(self, argindex=1): @@ -13,22 +63,26 @@ class gammaln(Function): return log(gamma(x)) -class ln_cum_gaussian(Function): +class normcdfln(Function): + """The log of the normal cdf. Can often be computed with better accuracy than log(normcdf(x)), particulary when x is either small or large.""" nargs = 1 def fdiff(self, argindex=1): x = self.args[0] - return exp(-ln_cum_gaussian(x) - 0.5*x*x)/sqrt(2*pi) + #return -erfcx(-x/sqrt(2))/sqrt(2*pi) + #return exp(-normcdfln(x) - 0.5*x*x)/sqrt(2*pi) + return sqrt(2/pi)*1/erfcx(-x/sqrt(2)) @classmethod def eval(cls, x): if x.is_Number: - return log(cum_gaussian(x)) + return log(normcdf(x)) def _eval_is_real(self): return self.args[0].is_real -class cum_gaussian(Function): +class normcdf(Function): + """The cumulative distribution function of the standard normal. Provided as a convenient helper function. It is computed throught -0.5*erfc(-x/sqrt(2)).""" nargs = 1 def fdiff(self, argindex=1): x = self.args[0] @@ -37,12 +91,30 @@ class cum_gaussian(Function): @classmethod def eval(cls, x): if x.is_Number: - return 0.5*(1+erf(sqrt(2)/2*x)) + return 0.5*(erfc(-x/sqrt(2))) def _eval_is_real(self): return self.args[0].is_real -class gaussian(Function): +class normalln(Function): + """The log of the standard normal distribution.""" + nargs = 1 + def fdiff(self, argindex=1): + x = self.args[0] + return -x + + @classmethod + def eval(cls, x): + if x.is_Number: + return 0.5*sqrt(2*pi) - 0.5*x*x + + + def _eval_is_real(self): + return True + + +class normal(Function): + """The standard normal distribution. Provided as a convenience function.""" nargs = 1 @classmethod def eval(cls, x): @@ -273,17 +345,5 @@ class h(Function): # *(erf(tprime/l - d_j/2.*l) # + erf(d_j/2.*l)))) -class erfc(Function): - nargs = 1 - - @classmethod - def eval(cls, arg): - return 1-erf(arg) -class erfcx(Function): - nargs = 1 - - @classmethod - def eval(cls, arg): - return erfc(arg)*exp(arg*arg)