Partial changes to symbolic, including adding mapping covariance and beginning to unify code generation.

This commit is contained in:
Neil Lawrence 2014-04-07 10:31:13 +02:00
parent 19b3784389
commit 9b5a1edb23
9 changed files with 252 additions and 132 deletions

View file

@ -65,6 +65,14 @@ class Mapping(Parameterized):
else: else:
raise NameError, "matplotlib package has not been imported." raise NameError, "matplotlib package has not been imported."
class Bijective_mapping(Mapping):
"""This is a mapping that is bijective, i.e. you can go from X to f and also back from f to X. The inverse mapping is called g()."""
def __init__(self, input_dim, output_dim, name='bijective_mapping'):
super(Bijective_apping, self).__init__(name=name)
def g(self, f):
"""Inverse mapping from output domain of the function to the inputs."""
raise NotImplementedError
from model import Model from model import Model

View file

@ -3,7 +3,6 @@ from _src.rbf import RBF
from _src.linear import Linear, LinearFull from _src.linear import Linear, LinearFull
from _src.static import Bias, White from _src.static import Bias, White
from _src.brownian import Brownian from _src.brownian import Brownian
from _src.symbolic import Symbolic
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from _src.mlp import MLP from _src.mlp import MLP
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
@ -12,3 +11,14 @@ from _src.coregionalize import Coregionalize
from _src.ssrbf import SSRBF # TODO: ZD: did you remove this? from _src.ssrbf import SSRBF # TODO: ZD: did you remove this?
from _src.ODE_UY import ODE_UY from _src.ODE_UY import ODE_UY
# TODO: put this in an init file somewhere
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
if sympy_available:
from _src.symbolic import Symbolic
from _src.heat_eqinit import Heat_eqinit

View file

@ -1,15 +1,17 @@
# Check Matthew Rocklin's blog post. # Check Matthew Rocklin's blog post.
try: try:
import sympy as sp import sympy as sym
sympy_available=True sympy_available=True
from sympy.utilities.lambdify import lambdify from sympy.utilities.lambdify import lambdify
from GPy.util.symbolic import stabilise
except ImportError: except ImportError:
sympy_available=False sympy_available=False
import numpy as np import numpy as np
from kern import Kern from kern import Kern
from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
from GPy.util.functions import normcdf, normcdfln, logistic, logisticln
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Symbolic(Kern): class Symbolic(Kern):
""" """
@ -26,28 +28,41 @@ class Symbolic(Kern):
- to handle multiple inputs, call them x_1, z_1, etc - to handle multiple inputs, call them x_1, z_1, etc
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
""" """
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None): def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None, func_modules=[]):
if k is None: if k is None:
raise ValueError, "You must provide an argument for the covariance function." raise ValueError, "You must provide an argument for the covariance function."
super(Sympykern, self).__init__(input_dim, active_dims, name)
self._sp_k = k self.func_modules = func_modules
self.func_modules += [{'gamma':gamma,
'gammaln':gammaln,
'erf':erf, 'erfc':erfc,
'erfcx':erfcx,
'polygamma':polygamma,
'normcdf':normcdf,
'normcdfln':normcdfln,
'logistic':logistic,
'logisticln':logisticln},
'numpy']
super(Symbolic, self).__init__(input_dim, active_dims, name)
self._sym_k = k
# pull the variable names out of the symbolic covariance function. # pull the variable names out of the symbolic covariance function.
sp_vars = [e for e in k.atoms() if e.is_Symbol] sym_vars = [e for e in k.atoms() if e.is_Symbol]
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:])) self._sym_x= sorted([e for e in sym_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:])) self._sym_z= sorted([e for e in sym_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
# Check that variable names make sense. # Check that variable names make sense.
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)]) assert all([x.name=='x_%i'%i for i,x in enumerate(self._sym_x)])
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)]) assert all([z.name=='z_%i'%i for i,z in enumerate(self._sym_z)])
assert len(self._sp_x)==len(self._sp_z) assert len(self._sym_x)==len(self._sym_z)
x_dim=len(self._sp_x) x_dim=len(self._sym_x)
self._sp_kdiag = k self._sym_kdiag = k
for x, z in zip(self._sp_x, self._sp_z): for x, z in zip(self._sym_x, self._sym_z):
self._sp_kdiag = self._sp_kdiag.subs(z, x) self._sym_kdiag = self._sym_kdiag.subs(z, x)
# If it is a multi-output covariance, add an input for indexing the outputs. # If it is a multi-output covariance, add an input for indexing the outputs.
self._real_input_dim = x_dim self._real_input_dim = x_dim
@ -56,22 +71,22 @@ class Symbolic(Kern):
self.output_dim = output_dim self.output_dim = output_dim
# extract parameter names from the covariance # extract parameter names from the covariance
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name) thetas = sorted([e for e in sym_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
# Look for parameters with index (subscripts), they are associated with different outputs. # Look for parameters with index (subscripts), they are associated with different outputs.
if self.output_dim>1: if self.output_dim>1:
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name) self._sym_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name) self._sym_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
# Make sure parameter appears with both indices! # Make sure parameter appears with both indices!
assert len(self._sp_theta_i)==len(self._sp_theta_j) assert len(self._sym_theta_i)==len(self._sym_theta_j)
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)]) assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j)])
# Extract names of shared parameters (those without a subscript) # Extract names of shared parameters (those without a subscript)
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j] self._sym_theta = [theta for theta in thetas if theta not in self._sym_theta_i and theta not in self._sym_theta_j]
self.num_split_params = len(self._sp_theta_i) self.num_split_params = len(self._sym_theta_i)
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i] self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sym_theta_i]
# Add split parameters to the model. # Add split parameters to the model.
for theta in self._split_theta_names: for theta in self._split_theta_names:
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted? # TODO: what if user has passed a parameter vector, how should that be stored and interpreted?
@ -79,18 +94,18 @@ class Symbolic(Kern):
self.add_parameter(getattr(self, theta)) self.add_parameter(getattr(self, theta))
self.num_shared_params = len(self._sp_theta) self.num_shared_params = len(self._sym_theta)
for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j): for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j):
self._sp_kdiag = self._sp_kdiag.subs(theta_j, theta_i) self._sym_kdiag = self._sym_kdiag.subs(theta_j, theta_i)
else: else:
self.num_split_params = 0 self.num_split_params = 0
self._split_theta_names = [] self._split_theta_names = []
self._sp_theta = thetas self._sym_theta = thetas
self.num_shared_params = len(self._sp_theta) self.num_shared_params = len(self._sym_theta)
# Add parameters to the model. # Add parameters to the model.
for theta in self._sp_theta: for theta in self._sym_theta:
val = 1.0 val = 1.0
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class. # TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
if param is not None: if param is not None:
@ -100,25 +115,25 @@ class Symbolic(Kern):
self.add_parameters(getattr(self, theta.name)) self.add_parameters(getattr(self, theta.name))
# Differentiate with respect to parameters. # Differentiate with respect to parameters.
derivative_arguments = self._sp_x + self._sp_theta derivative_arguments = self._sym_x + self._sym_theta
if self.output_dim > 1: if self.output_dim > 1:
derivative_arguments += self._sp_theta_i derivative_arguments += self._sym_theta_i
self.derivatives = {theta.name : sp.diff(self._sp_k,theta).simplify() for theta in derivative_arguments} self.derivatives = {theta.name : stabilise(sym.diff(self._sym_k,theta)) for theta in derivative_arguments}
self.diag_derivatives = {theta.name : sp.diff(self._sp_kdiag,theta).simplify() for theta in derivative_arguments} self.diag_derivatives = {theta.name : stabilise(sym.diff(self._sym_kdiag,theta)) for theta in derivative_arguments}
# This gives the parameters for the arg list. # This gives the parameters for the arg list.
self.arg_list = self._sp_x + self._sp_z + self._sp_theta self.arg_list = self._sym_x + self._sym_z + self._sym_theta
self.diag_arg_list = self._sp_x + self._sp_theta self.diag_arg_list = self._sym_x + self._sym_theta
if self.output_dim > 1: if self.output_dim > 1:
self.arg_list += self._sp_theta_i + self._sp_theta_j self.arg_list += self._sym_theta_i + self._sym_theta_j
self.diag_arg_list += self._sp_theta_i self.diag_arg_list += self._sym_theta_i
# Check if there are additional linear operators on the covariance. # Check if there are additional linear operators on the covariance.
self._sp_operators = operators self._sym_operators = operators
# TODO: Deal with linear operators # TODO: Deal with linear operators
#if self._sp_operators: #if self._sym_operators:
# for operator in self._sp_operators: # for operator in self._sym_operators:
# psi_stats aren't yet implemented. # psi_stats aren't yet implemented.
if False: if False:
@ -128,17 +143,14 @@ class Symbolic(Kern):
self._gen_code() self._gen_code()
def __add__(self,other): def __add__(self,other):
return spkern(self._sp_k+other._sp_k) return spkern(self._sym_k+other._sym_k)
def _gen_code(self): def _gen_code(self):
#fn_theano = theano_function([self.arg_lists], [self._sp_k + self.derivatives], dims={x: 1}, dtypes={x_0: 'float64', z_0: 'float64'}) #fn_theano = theano_function([self.arg_lists], [self._sym_k + self.derivatives], dims={x: 1}, dtypes={x_0: 'float64', z_0: 'float64'})
self._K_function = lambdify(self.arg_list, self._sp_k, 'numpy') self._K_function = lambdify(self.arg_list, self._sym_k, self.func_modules)
for key in self.derivatives.keys(): self._K_derivatives_code = {key: lambdify(self.arg_list, self.derivatives[key], self.func_modules) for key in self.derivatives.keys()}
setattr(self, '_K_diff_' + key, lambdify(self.arg_list, self.derivatives[key], 'numpy')) self._Kdiag_function = lambdify(self.diag_arg_list, self._sym_kdiag, self.func_modules)
self._Kdiag_derivatives_code = {key: lambdify(self.diag_arg_list, self.diag_derivatives[key], self.func_modules) for key in self.diag_derivatives.keys()}
self._Kdiag_function = lambdify(self.diag_arg_list, self._sp_kdiag, 'numpy')
for key in self.derivatives.keys():
setattr(self, '_Kdiag_diff_' + key, lambdify(self.diag_arg_list, self.diag_derivatives[key], 'numpy'))
def K(self,X,X2=None): def K(self,X,X2=None):
self._K_computations(X, X2) self._K_computations(X, X2)
@ -157,8 +169,8 @@ class Symbolic(Kern):
#if self._X is None or X.base is not self._X.base or X2 is not None: #if self._X is None or X.base is not self._X.base or X2 is not None:
self._K_computations(X, X2) self._K_computations(X, X2)
gradients_X = np.zeros((X.shape[0], X.shape[1])) gradients_X = np.zeros((X.shape[0], X.shape[1]))
for i, x in enumerate(self._sp_x): for i, x in enumerate(self._sym_x):
gf = getattr(self, '_K_diff_' + x.name) gf = self._K_derivatives_code[x.name]
gradients_X[:, i] = (gf(**self._arguments)*dL_dK).sum(1) gradients_X[:, i] = (gf(**self._arguments)*dL_dK).sum(1)
if X2 is None: if X2 is None:
gradients_X *= 2 gradients_X *= 2
@ -167,25 +179,25 @@ class Symbolic(Kern):
def gradients_X_diag(self, dL_dK, X): def gradients_X_diag(self, dL_dK, X):
self._K_computations(X) self._K_computations(X)
dX = np.zeros_like(X) dX = np.zeros_like(X)
for i, x in enumerate(self._sp_x): for i, x in enumerate(self._sym_x):
gf = getattr(self, '_Kdiag_diff_' + x.name) gf = self._Kdiag_derivatives_code[x.name]
dX[:, i] = gf(**self._diag_arguments)*dL_dK dX[:, i] = gf(**self._diag_arguments)*dL_dK
return dX return dX
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
# Need to extract parameters to local variables first # Need to extract parameters to local variables first
self._K_computations(X, X2) self._K_computations(X, X2)
for theta in self._sp_theta: for theta in self._sym_theta:
parameter = getattr(self, theta.name) parameter = getattr(self, theta.name)
gf = getattr(self, '_K_diff_' + theta.name) gf = self._K_derivatives_code[theta.name]
gradient = (gf(**self._arguments)*dL_dK).sum() gradient = (gf(**self._arguments)*dL_dK).sum()
if X2 is not None: if X2 is not None:
gradient += (gf(**self._reverse_arguments)*dL_dK).sum() gradient += (gf(**self._reverse_arguments)*dL_dK).sum()
setattr(parameter, 'gradient', gradient) setattr(parameter, 'gradient', gradient)
if self.output_dim>1: if self.output_dim>1:
for theta in self._sp_theta_i: for theta in self._sym_theta_i:
parameter = getattr(self, theta.name[:-2]) parameter = getattr(self, theta.name[:-2])
gf = getattr(self, '_K_diff_' + theta.name) gf = self._K_derivatives_code[theta.name]
A = gf(**self._arguments)*dL_dK A = gf(**self._arguments)*dL_dK
gradient = np.asarray([A[np.where(self._output_ind==i)].T.sum() gradient = np.asarray([A[np.where(self._output_ind==i)].T.sum()
for i in np.arange(self.output_dim)]) for i in np.arange(self.output_dim)])
@ -200,14 +212,14 @@ class Symbolic(Kern):
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self._K_computations(X) self._K_computations(X)
for theta in self._sp_theta: for theta in self._sym_theta:
parameter = getattr(self, theta.name) parameter = getattr(self, theta.name)
gf = getattr(self, '_Kdiag_diff_' + theta.name) gf = self._Kdiag_derivatives_code[theta.name]
setattr(parameter, 'gradient', (gf(**self._diag_arguments)*dL_dKdiag).sum()) setattr(parameter, 'gradient', (gf(**self._diag_arguments)*dL_dKdiag).sum())
if self.output_dim>1: if self.output_dim>1:
for theta in self._sp_theta_i: for theta in self._sym_theta_i:
parameter = getattr(self, theta.name[:-2]) parameter = getattr(self, theta.name[:-2])
gf = getattr(self, '_Kdiag_diff_' + theta.name) gf = self._Kdiag_derivatives_code[theta.name]
a = gf(**self._diag_arguments)*dL_dKdiag a = gf(**self._diag_arguments)*dL_dKdiag
setattr(parameter, 'gradient', setattr(parameter, 'gradient',
np.asarray([a[np.where(self._output_ind==i)].sum() np.asarray([a[np.where(self._output_ind==i)].sum()
@ -220,40 +232,40 @@ class Symbolic(Kern):
# parameter updates here. # parameter updates here.
self._arguments = {} self._arguments = {}
self._diag_arguments = {} self._diag_arguments = {}
for i, x in enumerate(self._sp_x): for i, x in enumerate(self._sym_x):
self._arguments[x.name] = X[:, i][:, None] self._arguments[x.name] = X[:, i][:, None]
self._diag_arguments[x.name] = X[:, i][:, None] self._diag_arguments[x.name] = X[:, i][:, None]
if self.output_dim > 1: if self.output_dim > 1:
self._output_ind = np.asarray(X[:, -1], dtype='int') self._output_ind = np.asarray(X[:, -1], dtype='int')
for i, theta in enumerate(self._sp_theta_i): for i, theta in enumerate(self._sym_theta_i):
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind])[:, None] self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind])[:, None]
self._diag_arguments[theta.name] = self._arguments[theta.name] self._diag_arguments[theta.name] = self._arguments[theta.name]
for theta in self._sp_theta: for theta in self._sym_theta:
self._arguments[theta.name] = np.asarray(getattr(self, theta.name)) self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
self._diag_arguments[theta.name] = self._arguments[theta.name] self._diag_arguments[theta.name] = self._arguments[theta.name]
if X2 is not None: if X2 is not None:
for i, z in enumerate(self._sp_z): for i, z in enumerate(self._sym_z):
self._arguments[z.name] = X2[:, i][None, :] self._arguments[z.name] = X2[:, i][None, :]
if self.output_dim > 1: if self.output_dim > 1:
self._output_ind2 = np.asarray(X2[:, -1], dtype='int') self._output_ind2 = np.asarray(X2[:, -1], dtype='int')
for i, theta in enumerate(self._sp_theta_j): for i, theta in enumerate(self._sym_theta_j):
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind2])[None, :] self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind2])[None, :]
else: else:
for z in self._sp_z: for z in self._sym_z:
self._arguments[z.name] = self._arguments['x_'+z.name[2:]].T self._arguments[z.name] = self._arguments['x_'+z.name[2:]].T
if self.output_dim > 1: if self.output_dim > 1:
self._output_ind2 = self._output_ind self._output_ind2 = self._output_ind
for theta in self._sp_theta_j: for theta in self._sym_theta_j:
self._arguments[theta.name] = self._arguments[theta.name[:-2] + '_i'].T self._arguments[theta.name] = self._arguments[theta.name[:-2] + '_i'].T
if X2 is not None: if X2 is not None:
# These arguments are needed in gradients when X2 is not equal to X. # These arguments are needed in gradients when X2 is not equal to X.
self._reverse_arguments = self._arguments self._reverse_arguments = self._arguments
for x, z in zip(self._sp_x, self._sp_z): for x, z in zip(self._sym_x, self._sym_z):
self._reverse_arguments[x.name] = self._arguments[z.name].T self._reverse_arguments[x.name] = self._arguments[z.name].T
self._reverse_arguments[z.name] = self._arguments[x.name].T self._reverse_arguments[z.name] = self._arguments[x.name].T
if self.output_dim > 1: if self.output_dim > 1:
for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j): for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j):
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
@ -265,7 +277,7 @@ if False:
def __init__(self, subkerns, operations, name='sympy_combine'): def __init__(self, subkerns, operations, name='sympy_combine'):
super(Symcombine, self).__init__(subkerns, name) super(Symcombine, self).__init__(subkerns, name)
for subkern, operation in zip(subkerns, operations): for subkern, operation in zip(subkerns, operations):
self._sp_k += self._k_double_operate(subkern._sp_k, operation) self._sym_k += self._k_double_operate(subkern._sym_k, operation)
#def _double_operate(self, k, operation): #def _double_operate(self, k, operation):

View file

@ -6,7 +6,7 @@ try:
import sympy as sym import sympy as sym
sympy_available=True sympy_available=True
from sympy.utilities.lambdify import lambdify from sympy.utilities.lambdify import lambdify
from GPy.util.symbolic import gammaln, ln_cum_gaussian, cum_gaussian from GPy.util.symbolic import gammaln, logisticln
except ImportError: except ImportError:
sympy_available=False sympy_available=False
@ -33,12 +33,14 @@ if sympy_available:
""" """
def __init__(self, gp_link=None): def __init__(self, gp_link=None):
if gp_link is None: if gp_link is None:
gp_link = link_functions.Log() gp_link = link_functions.Identity()
dispersion = sym.Symbol('dispersion', positive=True, real=True) dispersion = sym.Symbol('dispersion', positive=True, real=True)
y = sym.Symbol('y', nonnegative=True, integer=True) y = sym.Symbol('y', nonnegative=True, integer=True)
f = sym.Symbol('f', positive=True, real=True) f = sym.Symbol('f', positive=True, real=True)
gp_link = link_functions.Log()
log_pdf=dispersion*sym.log(dispersion) - (dispersion+y)*sym.log(dispersion+f) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*sym.log(f) log_pdf=dispersion*sym.log(dispersion) - (dispersion+y)*sym.log(dispersion+f) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*sym.log(f)
#log_pdf= -(dispersion+y)*logisticln(f-sym.log(dispersion)) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*(f-sym.log(dispersion))
super(Negative_binomial, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Negative_binomial') super(Negative_binomial, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Negative_binomial')
# TODO: Check this. # TODO: Check this.

View file

@ -5,14 +5,15 @@ try:
import sympy as sym import sympy as sym
sympy_available=True sympy_available=True
from sympy.utilities.lambdify import lambdify from sympy.utilities.lambdify import lambdify
from GPy.util.symbolic import stabilise
except ImportError: except ImportError:
sympy_available=False sympy_available=False
import numpy as np import numpy as np
import link_functions import link_functions
from scipy import stats, integrate from scipy import stats, integrate
from scipy.special import gammaln, gamma, erf, polygamma from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
from GPy.util.functions import cum_gaussian, ln_cum_gaussian from GPy.util.functions import normcdf, normcdfln, logistic, logisticln
from likelihood import Likelihood from likelihood import Likelihood
from ..core.parameterization import Param from ..core.parameterization import Param
@ -33,7 +34,17 @@ if sympy_available:
if log_pdf is None: if log_pdf is None:
raise ValueError, "You must provide an argument for the log pdf." raise ValueError, "You must provide an argument for the log pdf."
self.func_modules = func_modules + [{'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma, 'cum_gaussian':cum_gaussian, 'ln_cum_gaussian':ln_cum_gaussian}, 'numpy'] self.func_modules = func_modules
self.func_modules += [{'gamma':gamma,
'gammaln':gammaln,
'erf':erf, 'erfc':erfc,
'erfcx':erfcx,
'polygamma':polygamma,
'normcdf':normcdf,
'normcdfln':normcdfln,
'logistic':logistic,
'logisticln':logisticln},
'numpy']
super(Symbolic, self).__init__(gp_link, name=name) super(Symbolic, self).__init__(gp_link, name=name)
self.missing_data = False self.missing_data = False
@ -58,7 +69,7 @@ if sympy_available:
sym_vars = [e for e in self._sym_missing_log_pdf.atoms() if e.is_Symbol] sym_vars = [e for e in self._sym_missing_log_pdf.atoms() if e.is_Symbol]
sym_f = [e for e in sym_vars if e.name=='f'] sym_f = [e for e in sym_vars if e.name=='f']
if not sym_f: if not sym_f:
raise ValueError('No variable f in missing log pdf.') raise ValueError('No variable f in missing data log pdf.')
sym_y = [e for e in sym_vars if e.name=='y'] sym_y = [e for e in sym_vars if e.name=='y']
if sym_y: if sym_y:
raise ValueError('Data is present in missing data portion of likelihood.') raise ValueError('Data is present in missing data portion of likelihood.')
@ -74,15 +85,15 @@ if sympy_available:
derivative_arguments = self._sym_f + self._sym_theta derivative_arguments = self._sym_f + self._sym_theta
# Do symbolic work to compute derivatives. # Do symbolic work to compute derivatives.
self._log_pdf_derivatives = {theta.name : sym.diff(self._sym_log_pdf,theta).simplify() for theta in derivative_arguments} self._log_pdf_derivatives = {theta.name : stabilise(sym.diff(self._sym_log_pdf,theta)) for theta in derivative_arguments}
self._log_pdf_second_derivatives = {theta.name : sym.diff(self._log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments} self._log_pdf_second_derivatives = {theta.name : stabilise(sym.diff(self._log_pdf_derivatives['f'],theta)) for theta in derivative_arguments}
self._log_pdf_third_derivatives = {theta.name : sym.diff(self._log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} self._log_pdf_third_derivatives = {theta.name : stabilise(sym.diff(self._log_pdf_second_derivatives['f'],theta)) for theta in derivative_arguments}
if self.missing_data: if self.missing_data:
# Do symbolic work to compute derivatives. # Do symbolic work to compute derivatives.
self._missing_log_pdf_derivatives = {theta.name : sym.diff(self._sym_missing_log_pdf,theta).simplify() for theta in derivative_arguments} self._missing_log_pdf_derivatives = {theta.name : stabilise(sym.diff(self._sym_missing_log_pdf,theta)) for theta in derivative_arguments}
self._missing_log_pdf_second_derivatives = {theta.name : sym.diff(self._missing_log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments} self._missing_log_pdf_second_derivatives = {theta.name : stabilise(sym.diff(self._missing_log_pdf_derivatives['f'],theta)) for theta in derivative_arguments}
self._missing_log_pdf_third_derivatives = {theta.name : sym.diff(self._missing_log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} self._missing_log_pdf_third_derivatives = {theta.name : stabilise(sym.diff(self._missing_log_pdf_second_derivatives['f'],theta)) for theta in derivative_arguments}
# Add parameters to the model. # Add parameters to the model.
@ -96,7 +107,7 @@ if sympy_available:
self.add_parameters(getattr(self, theta.name)) self.add_parameters(getattr(self, theta.name))
# Is there some way to check whether the pdf is log # TODO: Is there an easy way to check whether the pdf is log
# concave? For the moment, need user to specify. # concave? For the moment, need user to specify.
self.log_concave = log_concave self.log_concave = log_concave
@ -112,16 +123,15 @@ if sympy_available:
# functions accordingly. # functions accordingly.
self._log_pdf_function = lambdify(self.arg_list, self._sym_log_pdf, self.func_modules) self._log_pdf_function = lambdify(self.arg_list, self._sym_log_pdf, self.func_modules)
# compute code for derivatives (for implicit likelihood terms # compute code for derivatives
# we need up to 3rd derivatives) self._derivative_code = {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], self.func_modules) for key in self._log_pdf_derivatives.keys()}
setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], self.func_modules) for key in self._log_pdf_derivatives.keys()}) self._second_derivative_code = {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], self.func_modules) for key in self._log_pdf_second_derivatives.keys()}
setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], self.func_modules) for key in self._log_pdf_second_derivatives.keys()}) self._third_derivative_code = {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], self.func_modules) for key in self._log_pdf_third_derivatives.keys()}
setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], self.func_modules) for key in self._log_pdf_third_derivatives.keys()})
if self.missing_data: if self.missing_data:
setattr(self, '_missing_first_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_derivatives[key], self.func_modules) for key in self._missing_log_pdf_derivatives.keys()}) self._missing_derivative_code = {key: lambdify(self.arg_list, self._missing_log_pdf_derivatives[key], self.func_modules) for key in self._missing_log_pdf_derivatives.keys()}
setattr(self, '_missing_second_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_second_derivatives[key], self.func_modules) for key in self._missing_log_pdf_second_derivatives.keys()}) self._missing_second_derivative_code = {key: lambdify(self.arg_list, self._missing_log_pdf_second_derivatives[key], self.func_modules) for key in self._missing_log_pdf_second_derivatives.keys()}
setattr(self, '_missing_third_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_third_derivatives[key], self.func_modules) for key in self._missing_log_pdf_third_derivatives.keys()}) self._missing_third_derivative_code = {key: lambdify(self.arg_list, self._missing_log_pdf_third_derivatives[key], self.func_modules) for key in self._missing_log_pdf_third_derivatives.keys()}
# TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta # TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta
@ -210,9 +220,9 @@ if sympy_available:
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y) self._arguments_update(inv_link_f, y)
if self.missing_data: if self.missing_data:
return np.where(np.isnan(y), self._missing_first_derivative_code['f'](**self._missing_argments), self._first_derivative_code['f'](**self._argments)) return np.where(np.isnan(y), self._missing_derivative_code['f'](**self._missing_argments), self._derivative_code['f'](**self._argments))
else: else:
return np.where(np.isnan(y), 0., self._first_derivative_code['f'](**self._arguments)) return np.where(np.isnan(y), 0., self._derivative_code['f'](**self._arguments))
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
""" """
@ -255,9 +265,9 @@ if sympy_available:
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta))) g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
for i, theta in enumerate(self._sym_theta): for i, theta in enumerate(self._sym_theta):
if self.missing_data: if self.missing_data:
g[:, i:i+1] = np.where(np.isnan(y), self._missing_first_derivative_code[theta.name](**self._arguments), self._first_derivative_code[theta.name](**self._arguments)) g[:, i:i+1] = np.where(np.isnan(y), self._missing_derivative_code[theta.name](**self._arguments), self._derivative_code[theta.name](**self._arguments))
else: else:
g[:, i:i+1] = np.where(np.isnan(y), 0., self._first_derivative_code[theta.name](**self._arguments)) g[:, i:i+1] = np.where(np.isnan(y), 0., self._derivative_code[theta.name](**self._arguments))
return g.sum(0) return g.sum(0)
def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None): def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):

View file

@ -1,7 +1,17 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernel import Kernel from kernel import Kernel
from linear import Linear from linear import Linear
from mlp import MLP from mlp import MLP
#from rbf import RBF #from rbf import RBF
# TODO need to fix this in a config file.
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
if sympy_available:
# These are likelihoods that rely on symbolic.
from symbolic import Symbolic

View file

@ -1,11 +1,11 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ..core.mapping import Mapping from ..core.mapping import Bijective_mapping
from ..core.parameterization import Param from ..core.parameterization import Param
class Linear(Mapping): class Linear(Bijective_mapping):
""" """
Mapping based on a linear model. Mapping based on a linear model.
@ -20,8 +20,8 @@ class Linear(Mapping):
""" """
def __init__(self, input_dim=1, output_dim=1, name='linear_map'): def __init__(self, input_dim=1, output_dim=1, name='linear'):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name) Bijective_mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
self.W = Param('W',np.array((self.input_dim, self.output_dim))) self.W = Param('W',np.array((self.input_dim, self.output_dim)))
self.bias = Param('bias',np.array(self.output_dim)) self.bias = Param('bias',np.array(self.output_dim))
self.add_parameters(self.W, self.bias) self.add_parameters(self.W, self.bias)
@ -29,10 +29,15 @@ class Linear(Mapping):
def f(self, X): def f(self, X):
return np.dot(X,self.W) + self.bias return np.dot(X,self.W) + self.bias
def g(self, f):
V = np.linalg.solve(np.dot(self.W.T, self.W), W.T)
return np.dot(f-self.bias, V)
def df_dtheta(self, dL_df, X): def df_dtheta(self, dL_df, X):
df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
df_dbias = (dL_df.sum(0)) df_dbias = (dL_df.sum(0))
return np.hstack((df_dW.flatten(), df_dbias)) return np.hstack((df_dW.flatten(), df_dbias))
def dL_dX(self, dL_df, X): def dL_dX(self, partial, X):
return (dL_df[:, None, :]*self.W[None, :, :]).sum(2) """The gradient of L with respect to the inputs to the mapping, where L is a function that is dependent on the output of the mapping, f."""
return (partial[:, None, :]*self.W[None, :, :]).sum(2)

View file

@ -1,18 +1,21 @@
import numpy as np import numpy as np
from scipy.special import erf, erfcx from scipy.special import erf, erfc, erfcx
import sys import sys
epsilon = sys.float_info.epsilon epsilon = sys.float_info.epsilon
lim_val = -np.log(epsilon) lim_val = -np.log(epsilon)
def cum_gaussian(x): def logisticln(x):
g=0.5*(1+erf(x/np.sqrt(2))) return np.where(x<lim_val, np.where(x>-lim_val, -np.log(1+np.exp(-x)), -x), -np.log(1+epsilon))
def logistic(x):
return np.where(x<lim_val, np.where(x>-lim_val, 1/(1+np.exp(-x)), epsilon/(epsilon+1)), 1/(1+epsilon))
def normcdf(x):
g=0.5*erfc(-x/np.sqrt(2))
return np.where(g==0, epsilon, np.where(g==1, 1-epsilon, g)) return np.where(g==0, epsilon, np.where(g==1, 1-epsilon, g))
def ln_cum_gaussian(x): def normcdfln(x):
return np.where(x < 0, -.5*x*x + np.log(.5) + np.log(erfcx(-np.sqrt(2)/2*x)), np.log(cum_gaussian(x))) return np.where(x < 0, -.5*x*x + np.log(.5) + np.log(erfcx(-x/np.sqrt(2))), np.log(normcdf(x)))
def clip_exp(x): def clip_exp(x):
if any(x>=lim_val) or any(x<=-lim_val): return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), epsilon), 1/epsilon)
return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val))
else:
return np.exp(x)

View file

@ -1,6 +1,56 @@
import sympy as sym
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma,polygamma from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma,polygamma
def stabilise(e):
"""Attempt to find the most numerically stable form of an expression"""
return e #sym.expand(e)
class logistic(Function):
"""The logistic function as a symbolic function."""
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return logistic(x)*(1-logistic(x))
@classmethod
def eval(cls, x):
if x.is_Number:
return 1/(1+exp(-x))
class logisticln(Function):
"""The log logistic, which can often be computed with more precision than the simply taking log(logistic(x)) when x is small or large."""
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return 1-logistic(x)
@classmethod
def eval(cls, x):
if x.is_Number:
return -np.log(1+exp(-x))
class erfc(Function):
"""The complementary error function, erfc(x) = 1-erf(x). Used as a helper function, particularly for erfcx, the scaled complementary error function. and the normal distributions cdf."""
nargs = 1
@classmethod
def eval(cls, arg):
return 1-erf(arg)
class erfcx(Function):
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return x*erfcx(x)-2/sqrt(pi)
@classmethod
def eval(cls, x):
if x.is_Number:
return exp(x**2)*erfc(x)
class gammaln(Function): class gammaln(Function):
"""The log of the gamma function, which is often needed instead of log(gamma(x)) for better accuracy for large x."""
nargs = 1 nargs = 1
def fdiff(self, argindex=1): def fdiff(self, argindex=1):
@ -13,22 +63,26 @@ class gammaln(Function):
return log(gamma(x)) return log(gamma(x))
class ln_cum_gaussian(Function): class normcdfln(Function):
"""The log of the normal cdf. Can often be computed with better accuracy than log(normcdf(x)), particulary when x is either small or large."""
nargs = 1 nargs = 1
def fdiff(self, argindex=1): def fdiff(self, argindex=1):
x = self.args[0] x = self.args[0]
return exp(-ln_cum_gaussian(x) - 0.5*x*x)/sqrt(2*pi) #return -erfcx(-x/sqrt(2))/sqrt(2*pi)
#return exp(-normcdfln(x) - 0.5*x*x)/sqrt(2*pi)
return sqrt(2/pi)*1/erfcx(-x/sqrt(2))
@classmethod @classmethod
def eval(cls, x): def eval(cls, x):
if x.is_Number: if x.is_Number:
return log(cum_gaussian(x)) return log(normcdf(x))
def _eval_is_real(self): def _eval_is_real(self):
return self.args[0].is_real return self.args[0].is_real
class cum_gaussian(Function): class normcdf(Function):
"""The cumulative distribution function of the standard normal. Provided as a convenient helper function. It is computed throught -0.5*erfc(-x/sqrt(2))."""
nargs = 1 nargs = 1
def fdiff(self, argindex=1): def fdiff(self, argindex=1):
x = self.args[0] x = self.args[0]
@ -37,12 +91,30 @@ class cum_gaussian(Function):
@classmethod @classmethod
def eval(cls, x): def eval(cls, x):
if x.is_Number: if x.is_Number:
return 0.5*(1+erf(sqrt(2)/2*x)) return 0.5*(erfc(-x/sqrt(2)))
def _eval_is_real(self): def _eval_is_real(self):
return self.args[0].is_real return self.args[0].is_real
class gaussian(Function): class normalln(Function):
"""The log of the standard normal distribution."""
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return -x
@classmethod
def eval(cls, x):
if x.is_Number:
return 0.5*sqrt(2*pi) - 0.5*x*x
def _eval_is_real(self):
return True
class normal(Function):
"""The standard normal distribution. Provided as a convenience function."""
nargs = 1 nargs = 1
@classmethod @classmethod
def eval(cls, x): def eval(cls, x):
@ -273,17 +345,5 @@ class h(Function):
# *(erf(tprime/l - d_j/2.*l) # *(erf(tprime/l - d_j/2.*l)
# + erf(d_j/2.*l)))) # + erf(d_j/2.*l))))
class erfc(Function):
nargs = 1
@classmethod
def eval(cls, arg):
return 1-erf(arg)
class erfcx(Function):
nargs = 1
@classmethod
def eval(cls, arg):
return erfc(arg)*exp(arg*arg)