mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-21 14:05:14 +02:00
Merge branch 'params' of github.com:SheffieldML/GPy into params
This commit is contained in:
commit
90ba9a44eb
9 changed files with 929 additions and 526 deletions
|
|
@ -2,15 +2,21 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import sys
|
||||
import re
|
||||
from ..core.parameterization import Parameterized
|
||||
import numpy as np
|
||||
import sympy as sym
|
||||
from ..core.parameterization import Param
|
||||
from sympy.utilities.lambdify import lambdastr, _imp_namespace, _get_namespace
|
||||
from sympy.utilities.iterables import numbered_symbols
|
||||
from sympy import exp
|
||||
from numpy import exp
|
||||
from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
|
||||
from GPy.util.functions import normcdf, normcdfln, logistic, logisticln
|
||||
def getFromDict(dataDict, mapList):
|
||||
return reduce(lambda d, k: d[k], mapList, dataDict)
|
||||
|
||||
def setInDict(dataDict, mapList, value):
|
||||
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
|
||||
|
||||
class Symbolic_core():
|
||||
"""
|
||||
|
|
@ -21,24 +27,42 @@ class Symbolic_core():
|
|||
# Base class init, do some basic derivatives etc.
|
||||
|
||||
# Func_modules sets up the right mapping for functions.
|
||||
self.func_modules = func_modules
|
||||
self.func_modules += [{'gamma':gamma,
|
||||
'gammaln':gammaln,
|
||||
'erf':erf, 'erfc':erfc,
|
||||
'erfcx':erfcx,
|
||||
'polygamma':polygamma,
|
||||
'normcdf':normcdf,
|
||||
'normcdfln':normcdfln,
|
||||
'logistic':logistic,
|
||||
'logisticln':logisticln},
|
||||
'numpy']
|
||||
func_modules += [{'gamma':gamma,
|
||||
'gammaln':gammaln,
|
||||
'erf':erf, 'erfc':erfc,
|
||||
'erfcx':erfcx,
|
||||
'polygamma':polygamma,
|
||||
'normcdf':normcdf,
|
||||
'normcdfln':normcdfln,
|
||||
'logistic':logistic,
|
||||
'logisticln':logisticln},
|
||||
'numpy']
|
||||
|
||||
self._set_expressions(expressions)
|
||||
self._set_variables(cacheable)
|
||||
self._set_derivatives(derivatives)
|
||||
self._set_parameters(parameters)
|
||||
self.namespace = [globals(), self.__dict__]
|
||||
# Convert the expressions to a list for common sub expression elimination
|
||||
# We should find the following type of expressions: 'function', 'derivative', 'second_derivative', 'third_derivative'.
|
||||
self.update_expression_list()
|
||||
|
||||
# Apply any global stabilisation operations to expressions.
|
||||
self.global_stabilize()
|
||||
|
||||
# Helper functions to get data in and out of dictionaries.
|
||||
# this code from http://stackoverflow.com/questions/14692690/access-python-nested-dictionary-items-via-a-list-of-keys
|
||||
|
||||
self.extract_sub_expressions()
|
||||
self._gen_code()
|
||||
self._set_namespace(func_modules)
|
||||
|
||||
def _set_namespace(self, namespaces):
|
||||
"""Set the name space for use when calling eval. This needs to contain all the relvant functions for mapping from symbolic python to the numerical python. It also contains variables, cached portions etc."""
|
||||
self.namespace = {}
|
||||
for m in namespaces[::-1]:
|
||||
buf = _get_namespace(m)
|
||||
self.namespace.update(buf)
|
||||
self.namespace.update(self.__dict__)
|
||||
|
||||
def _set_expressions(self, expressions):
|
||||
"""Extract expressions and variables from the user provided expressions."""
|
||||
|
|
@ -79,7 +103,7 @@ class Symbolic_core():
|
|||
|
||||
# Do symbolic work to compute derivatives.
|
||||
for key, func in self.expressions.items():
|
||||
self.expressions[key]['derivative'] = {theta.name : sym.diff(func['function'],theta) for theta in derivative_arguments}
|
||||
self.expressions[key]['derivative'] = {theta.name : self.stabilize(sym.diff(func['function'],theta)) for theta in derivative_arguments}
|
||||
|
||||
def _set_parameters(self, parameters):
|
||||
"""Add parameters to the model and initialize with given values."""
|
||||
|
|
@ -92,32 +116,45 @@ class Symbolic_core():
|
|||
# Add parameter.
|
||||
|
||||
self.add_parameters(Param(theta.name, val, None))
|
||||
#setattr(self, theta.name, )
|
||||
#self._set_attribute(theta.name, )
|
||||
|
||||
def eval_parameters_changed(self):
|
||||
# TODO: place checks for inf/nan in here
|
||||
# do all the precomputation codes.
|
||||
for variable, code in sorted(self.code['parameters_change'].iteritems()):
|
||||
setattr(self, variable, eval(code, *self.namespace))
|
||||
self.eval_update_cache()
|
||||
|
||||
def eval_update_cache(self, **kwargs):
|
||||
# TODO: place checks for inf/nan in here
|
||||
# for all provided keywords
|
||||
for variable, value in kwargs.items():
|
||||
|
||||
for var in sorted(self.code['parameters_changed'].keys(), key=lambda x: int(re.findall(r'\d+$', x)[0])):
|
||||
code = self.code['parameters_changed'][var]
|
||||
self._set_attribute(var, eval(code, self.namespace))
|
||||
|
||||
for var, value in kwargs.items():
|
||||
# update their cached values
|
||||
if value is not None:
|
||||
if variable == 'X' or variable == 'F' or variable == 'Mu':
|
||||
for i, theta in enumerate(self.variables[variable]):
|
||||
setattr(self, theta.name, value[:, i][:, None])
|
||||
elif variable.name == 'Z':
|
||||
for i, theta in enumerate(self.variables[variable]):
|
||||
setattr(self, theta.name, value[:, i][None, :])
|
||||
if var == 'X' or var == 'F' or var == 'M':
|
||||
value = np.atleast_2d(value)
|
||||
for i, theta in enumerate(self.variables[var]):
|
||||
self._set_attribute(theta.name, value[:, i][:, None])
|
||||
elif var == 'Y':
|
||||
# Y values can be missing.
|
||||
value = np.atleast_2d(value)
|
||||
for i, theta in enumerate(self.variables[var]):
|
||||
self._set_attribute('missing' + str(i), np.isnan(value[:, i]))
|
||||
self._set_attribute(theta.name, value[:, i][:, None])
|
||||
elif var == 'Z':
|
||||
value = np.atleast_2d(value)
|
||||
for i, theta in enumerate(self.variables[var]):
|
||||
self._set_attribute(theta.name, value[:, i][None, :])
|
||||
else:
|
||||
setattr(self, theta.name, value[:, i])
|
||||
|
||||
for variable, code in sorted(self.code['update_cache'].iteritems()):
|
||||
setattr(self, variable, eval(code, *self.namespace))
|
||||
value = np.atleast_1d(value)
|
||||
for i, theta in enumerate(self.variables[var]):
|
||||
self._set_attribute(theta.name, value[i])
|
||||
for var in sorted(self.code['update_cache'].keys(), key=lambda x: int(re.findall(r'\d+$', x)[0])):
|
||||
code = self.code['update_cache'][var]
|
||||
self._set_attribute(var, eval(code, self.namespace))
|
||||
|
||||
def eval_update_gradients(self, function, partial, **kwargs):
|
||||
# TODO: place checks for inf/nan in here
|
||||
|
|
@ -126,7 +163,7 @@ class Symbolic_core():
|
|||
code = self.code[function]['derivative'][theta.name]
|
||||
setattr(getattr(self, theta.name),
|
||||
'gradient',
|
||||
(partial*eval(code, *self.namespace)).sum())
|
||||
(partial*eval(code, self.namespace)).sum())
|
||||
|
||||
def eval_gradients_X(self, function, partial, **kwargs):
|
||||
if kwargs.has_key('X'):
|
||||
|
|
@ -134,17 +171,17 @@ class Symbolic_core():
|
|||
self.eval_update_cache(**kwargs)
|
||||
for i, theta in enumerate(self.variables['X']):
|
||||
code = self.code[function]['derivative'][theta.name]
|
||||
gradients_X[:, i:i+1] = partial*eval(code, *self.namespace)
|
||||
gradients_X[:, i:i+1] = partial*eval(code, self.namespace)
|
||||
return gradients_X
|
||||
|
||||
def eval_function(self, function, **kwargs):
|
||||
self.eval_update_cache(**kwargs)
|
||||
return eval(self.code[function]['function'], *self.namespace)
|
||||
return eval(self.code[function]['function'], self.namespace)
|
||||
|
||||
def code_parameters_changed(self):
|
||||
# do all the precomputation codes.
|
||||
lcode = ''
|
||||
for variable, code in sorted(self.code['parameters_change'].iteritems()):
|
||||
for variable, code in sorted(self.code['parameters_changed'].iteritems()):
|
||||
lcode += self._print_code(variable) + ' = ' + self._print_code(code) + '\n'
|
||||
return lcode
|
||||
|
||||
|
|
@ -159,6 +196,7 @@ class Symbolic_core():
|
|||
else:
|
||||
reorder = ''
|
||||
for i, theta in enumerate(self.variables[var]):
|
||||
lcode+= "\t" + var + '= np.atleast_2d(' + var + ')'
|
||||
lcode+= "\t" + self._print_code(theta.name) + ' = ' + var + '[:, ' + str(i) + "]" + reorder + "\n"
|
||||
|
||||
for variable, code in sorted(self.code['update_cache'].iteritems()):
|
||||
|
|
@ -173,13 +211,15 @@ class Symbolic_core():
|
|||
lcode += self._print_code(theta.name) + '.gradient = (partial*(' + self._print_code(code) + ')).sum()\n'
|
||||
return lcode
|
||||
|
||||
def code_gradients_X(self, function):
|
||||
lcode = 'gradients_X = np.zeros_like(X)\n'
|
||||
def code_gradients_cacheable(self, function, variable):
|
||||
if variable not in self.cacheable:
|
||||
raise RuntimeError, variable + ' must be a cacheable.'
|
||||
lcode = 'gradients_' + variable + ' = np.zeros_like(' + variable + ')\n'
|
||||
lcode += 'self.update_cache(' + ', '.join(self.cacheable) + ')\n'
|
||||
for i, theta in enumerate(self.variables['X']):
|
||||
for i, theta in enumerate(self.variables[variable]):
|
||||
code = self.code[function]['derivative'][theta.name]
|
||||
lcode += 'gradients_X[:, ' + str(i) + ':' + str(i) + '+1] = partial*' + self._print_code(code) + '\n'
|
||||
lcode += 'return gradients_X\n'
|
||||
lcode += 'gradients_' + variable + '[:, ' + str(i) + ':' + str(i) + '+1] = partial*' + self._print_code(code) + '\n'
|
||||
lcode += 'return gradients_' + variable + '\n'
|
||||
return lcode
|
||||
|
||||
def code_function(self, function):
|
||||
|
|
@ -187,58 +227,21 @@ class Symbolic_core():
|
|||
lcode += 'return ' + self._print_code(self.code[function]['function'])
|
||||
return lcode
|
||||
|
||||
def stabilise(self):
|
||||
def stabilize(self, expr):
|
||||
"""Stabilize the code in the model."""
|
||||
# this code is applied to all expressions in the model in an attempt to sabilize them.
|
||||
# this code is applied to expressions in the model in an attempt to sabilize them.
|
||||
return expr
|
||||
|
||||
def global_stabilize(self):
|
||||
"""Stabilize all code in the model."""
|
||||
pass
|
||||
|
||||
def _gen_namespace(self, modules=None, use_imps=True):
|
||||
"""Gets the relevant namespaces for the given expressions."""
|
||||
from sympy.core.symbol import Symbol
|
||||
|
||||
# If the user hasn't specified any modules, use what is available.
|
||||
module_provided = True
|
||||
if modules is None:
|
||||
module_provided = False
|
||||
# Use either numpy (if available) or python.math where possible.
|
||||
# XXX: This leads to different behaviour on different systems and
|
||||
# might be the reason for irreproducible errors.
|
||||
modules = ["math", "mpmath", "sympy"]
|
||||
|
||||
try:
|
||||
_import("numpy")
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
modules.insert(1, "numpy")
|
||||
def _set_attribute(self, name, value):
|
||||
"""Make sure namespace gets updated when setting attributes."""
|
||||
setattr(self, name, value)
|
||||
self.namespace.update({name: getattr(self, name)})
|
||||
|
||||
|
||||
# Get the needed namespaces.
|
||||
namespaces = []
|
||||
# First find any function implementations
|
||||
if use_imps:
|
||||
for expr in self._expression_list:
|
||||
namespaces.append(_imp_namespace(expr))
|
||||
# Check for dict before iterating
|
||||
if isinstance(modules, (dict, str)) or not hasattr(modules, '__iter__'):
|
||||
namespaces.append(modules)
|
||||
else:
|
||||
namespaces += list(modules)
|
||||
# fill namespace with first having highest priority
|
||||
namespace = {}
|
||||
for m in namespaces[::-1]:
|
||||
buf = _get_namespace(m)
|
||||
namespace.update(buf)
|
||||
for expr in self._expression_list:
|
||||
if hasattr(expr, "atoms"):
|
||||
#Try if you can extract symbols from the expression.
|
||||
#Move on if expr.atoms in not implemented.
|
||||
syms = expr.atoms(Symbol)
|
||||
for term in syms:
|
||||
namespace.update({str(term): term})
|
||||
|
||||
|
||||
return namespace
|
||||
def update_expression_list(self):
|
||||
"""Extract a list of expressions from the dictionary of expressions."""
|
||||
self.expression_list = [] # code arrives in dictionary, but is passed in this list
|
||||
|
|
@ -250,123 +253,141 @@ class Symbolic_core():
|
|||
self.expression_list.append(texpressions)
|
||||
self.expression_keys.append([fname, type])
|
||||
self.expression_order.append(1)
|
||||
self.code[fname] = {type: ''}
|
||||
elif type[-10:] == 'derivative':
|
||||
self.code[fname] = {type:{}}
|
||||
for dtype, expression in texpressions.items():
|
||||
self.expression_list.append(expression)
|
||||
self.expression_keys.append([fname, type, dtype])
|
||||
if type[:-10] == 'first' or type[:-10] == '':
|
||||
if type[:-10] == 'first_' or type[:-10] == '':
|
||||
self.expression_order.append(3) #sym.count_ops(self.expressions[type][dtype]))
|
||||
elif type[:-10] == 'second':
|
||||
elif type[:-10] == 'second_':
|
||||
self.expression_order.append(4) #sym.count_ops(self.expressions[type][dtype]))
|
||||
elif type[:-10] == 'third':
|
||||
elif type[:-10] == 'third_':
|
||||
self.expression_order.append(5) #sym.count_ops(self.expressions[type][dtype]))
|
||||
self.code[fname][type][dtype] = ''
|
||||
else:
|
||||
self.expression_list.append(fexpressions[type])
|
||||
self.expression_keys.append([fname, type])
|
||||
self.expression_order.append(2)
|
||||
self.code[fname][type] = ''
|
||||
|
||||
# This step may be unecessary.
|
||||
# Not 100% sure if the sub expression elimination is order sensitive. This step orders the list with the 'function' code first and derivatives after.
|
||||
self.expression_order, self.expression_list, self.expression_keys = zip(*sorted(zip(self.expression_order, self.expression_list, self.expression_keys)))
|
||||
|
||||
def extract_sub_expressions(self, cache_prefix='cache', sub_prefix='sub', prefix='XoXoXoX'):
|
||||
# Do the common sub expression elimination.
|
||||
common_sub_expressions, expression_substituted_list = sym.cse(self.expression_list, numbered_symbols(prefix=prefix))
|
||||
|
||||
def _gen_code(self, cache_prefix = 'cache', sub_prefix = 'sub', prefix='XoXoXoX'):
|
||||
"""Generate code for the list of expressions provided using the common sub-expression eliminator to separate out portions that are computed multiple times."""
|
||||
# This is the dictionary that stores all the generated code.
|
||||
self.code = {}
|
||||
self.variables[cache_prefix] = []
|
||||
self.variables[sub_prefix] = []
|
||||
|
||||
# Convert the expressions to a list for common sub expression elimination
|
||||
# We should find the following type of expressions: 'function', 'derivative', 'second_derivative', 'third_derivative'.
|
||||
self.update_expression_list()
|
||||
|
||||
# Apply any global stabilisation operations to expressions.
|
||||
self.stabilise()
|
||||
|
||||
# Helper functions to get data in and out of dictionaries.
|
||||
# this code from http://stackoverflow.com/questions/14692690/access-python-nested-dictionary-items-via-a-list-of-keys
|
||||
def getFromDict(dataDict, mapList):
|
||||
return reduce(lambda d, k: d[k], mapList, dataDict)
|
||||
def setInDict(dataDict, mapList, value):
|
||||
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
|
||||
|
||||
|
||||
# Do the common sub expression elimination
|
||||
subexpressions, expression_substituted_list = sym.cse(self.expression_list, numbered_symbols(prefix=prefix))
|
||||
cacheable_list = []
|
||||
# Create dictionary of new sub expressions
|
||||
sub_expression_dict = {}
|
||||
for var, void in common_sub_expressions:
|
||||
sub_expression_dict[var.name] = var
|
||||
|
||||
# Sort out any expression that's dependent on something that scales with data size (these are listed in cacheable).
|
||||
self.expressions['parameters_change'] = []
|
||||
self.expressions['update_cache'] = []
|
||||
cache_expressions_list = []
|
||||
sub_expression_list = []
|
||||
for expr in subexpressions:
|
||||
arg_list = [e for e in expr[1].atoms() if e.is_Symbol]
|
||||
cacheable_list = []
|
||||
params_change_list = []
|
||||
# common_sube_expressions contains a list of paired tuples with the new variable and what it equals
|
||||
for var, expr in common_sub_expressions:
|
||||
arg_list = [e for e in expr.atoms() if e.is_Symbol]
|
||||
# List any cacheable dependencies of the sub-expression
|
||||
cacheable_symbols = [e for e in arg_list if e in cacheable_list or e in self.cacheable_vars]
|
||||
if cacheable_symbols:
|
||||
self.expressions['update_cache'].append((expr[0].name, self._expr2code(arg_list, expr[1])))
|
||||
# list which ensures dependencies are cacheable.
|
||||
cacheable_list.append(expr[0])
|
||||
cache_expressions_list.append(expr[0].name)
|
||||
cacheable_list.append(var)
|
||||
else:
|
||||
self.expressions['parameters_change'].append((expr[0].name, self._expr2code(arg_list, expr[1])))
|
||||
sub_expression_list.append(expr[0].name)
|
||||
params_change_list.append(var)
|
||||
|
||||
replace_dict = {}
|
||||
for i, expr in enumerate(cacheable_list):
|
||||
sym_var = sym.var(cache_prefix + str(i))
|
||||
self.variables[cache_prefix].append(sym_var)
|
||||
replace_dict[expr.name] = sym_var
|
||||
|
||||
for i, expr in enumerate(params_change_list):
|
||||
sym_var = sym.var(sub_prefix + str(i))
|
||||
self.variables[sub_prefix].append(sym_var)
|
||||
replace_dict[expr.name] = sym_var
|
||||
|
||||
for replace, void in common_sub_expressions:
|
||||
for expr, keys in zip(expression_substituted_list, self.expression_keys):
|
||||
setInDict(self.expressions, keys, expr.subs(replace, replace_dict[replace.name]))
|
||||
for void, expr in common_sub_expressions:
|
||||
expr = expr.subs(replace, replace_dict[replace.name])
|
||||
|
||||
# Replace original code with code including subexpressions.
|
||||
for expr, keys in zip(expression_substituted_list, self.expression_keys):
|
||||
arg_list = [e for e in expr.atoms() if e.is_Symbol]
|
||||
setInDict(self.code, keys, self._expr2code(arg_list, expr))
|
||||
setInDict(self.expressions, keys, expr)
|
||||
for keys in self.expression_keys:
|
||||
for replace, void in common_sub_expressions:
|
||||
setInDict(self.expressions, keys, getFromDict(self.expressions, keys).subs(replace, replace_dict[replace.name]))
|
||||
|
||||
# Create variable names for cache and sub expression portions
|
||||
cache_dict = {}
|
||||
self.variables[cache_prefix] = []
|
||||
for i, sub in enumerate(cache_expressions_list):
|
||||
name = cache_prefix + str(i)
|
||||
cache_dict[sub] = name
|
||||
self.variables[cache_prefix].append(sym.var(name))
|
||||
self.expressions['parameters_changed'] = {}
|
||||
self.expressions['update_cache'] = {}
|
||||
for var, expr in common_sub_expressions:
|
||||
for replace, void in common_sub_expressions:
|
||||
expr = expr.subs(replace, replace_dict[replace.name])
|
||||
if var in cacheable_list:
|
||||
self.expressions['update_cache'][replace_dict[var.name].name] = expr
|
||||
else:
|
||||
self.expressions['parameters_changed'][replace_dict[var.name].name] = expr
|
||||
|
||||
sub_dict = {}
|
||||
self.variables[sub_prefix] = []
|
||||
for i, sub in enumerate(sub_expression_list):
|
||||
name = sub_prefix + str(i)
|
||||
sub_dict[sub] = name
|
||||
self.variables[sub_prefix].append(sym.var(name))
|
||||
|
||||
# Replace sub expressions in main code with either cacheN or subN.
|
||||
for key, val in cache_dict.iteritems():
|
||||
for keys in self.expression_keys:
|
||||
setInDict(self.code, keys,
|
||||
getFromDict(self.code,keys).replace(key, val))
|
||||
|
||||
for key, val in sub_dict.iteritems():
|
||||
for keys in self.expression_keys:
|
||||
setInDict(self.code, keys,
|
||||
getFromDict(self.code,keys).replace(key, val))
|
||||
# for var, expr in common_sub_expressions:
|
||||
# if var in list(cacheable_list):
|
||||
# self.expressions['update_cache'].append({var.name: expr.subarg_list = [e for e in sub_expr_pair[1].atoms() if e.is_Symbol]
|
||||
# cacheable_symbols = [e for e in arg_list if e in cacheable_list or e in self.cacheable_vars]
|
||||
# if cacheable_symbols:
|
||||
# self.expressions['update_cache'].append((sub_expr_pair[0].name, self._expr2code(arg_list, sub_expr_pair[1])))
|
||||
# # list which ensures dependencies are cacheable.
|
||||
# cacheable_list.append(sub_expr_pair[0])
|
||||
# cache_expressions_list.append(sub_expr_pair[0].name)
|
||||
# else:
|
||||
# self.expressions['parameters_change'].append((sub_expr_pair[0].name, self._expr2code(arg_list, sub_expr_pair[1])))
|
||||
# sub_expression_list.append(sub_expr_pair[0].name)
|
||||
|
||||
# Set up precompute code as either cacheN or subN.
|
||||
self.code['update_cache'] = {}
|
||||
for key, val in self.expressions['update_cache']:
|
||||
expr = val
|
||||
for key2, val2 in cache_dict.iteritems():
|
||||
expr = expr.replace(key2, val2)
|
||||
for key2, val2 in sub_dict.iteritems():
|
||||
expr = expr.replace(key2, val2)
|
||||
self.code['update_cache'][cache_dict[key]] = expr
|
||||
def _gen_code(self):
|
||||
"""Generate code for the list of expressions provided using the common sub-expression eliminator to separate out portions that are computed multiple times."""
|
||||
# This is the dictionary that stores all the generated code.
|
||||
|
||||
self.expressions['update_cache'] = dict(self.expressions['update_cache'])
|
||||
self.code['parameters_change'] = {}
|
||||
for key, val in self.expressions['parameters_change']:
|
||||
expr = val
|
||||
for key2, val2 in cache_dict.iteritems():
|
||||
expr = expr.replace(key2, val2)
|
||||
for key2, val2 in sub_dict.iteritems():
|
||||
expr = expr.replace(key2, val2)
|
||||
self.code['parameters_change'][sub_dict[key]] = expr
|
||||
self.expressions['parameters_change'] = dict(self.expressions['parameters_change'])
|
||||
self.code = {}
|
||||
def match_key(expr):
|
||||
if type(expr) is dict:
|
||||
code = {}
|
||||
for key in expr.keys():
|
||||
code[key] = match_key(expr[key])
|
||||
else:
|
||||
arg_list = [e for e in expr.atoms() if e.is_Symbol]
|
||||
code = self._expr2code(arg_list, expr)
|
||||
return code
|
||||
|
||||
self.code = match_key(self.expressions)
|
||||
|
||||
|
||||
# for keys in self.expression_keys:
|
||||
# expr = getFromDict(self.expressions, keys)
|
||||
# arg_list = [e for e in expr.atoms() if e.is_Symbol]
|
||||
# setInDict(self.code, keys, self._expr2code(arg_list, expr))
|
||||
|
||||
# # Set up precompute code as either cacheN or subN.
|
||||
# self.code['update_cache'] = {}
|
||||
# for key, val in self.expressions['update_cache']:
|
||||
# expr = val
|
||||
# for key2, val2 in cache_dict.iteritems():
|
||||
# expr = expr.replace(key2, val2.name)
|
||||
# for key2, val2 in sub_dict.iteritems():
|
||||
# expr = expr.replace(key2, val2.name)
|
||||
# self.code['update_cache'][cache_dict[key]] = expr
|
||||
|
||||
# self.expressions['update_cache'] = dict(self.expressions['update_cache'])
|
||||
# self.code['parameters_change'] = {}
|
||||
# for key, val in self.expressions['parameters_change']:
|
||||
# expr = val
|
||||
# for key2, val2 in cache_dict.iteritems():
|
||||
# expr = expr.replace(key2, val2.name)
|
||||
# for key2, val2 in sub_dict.iteritems():
|
||||
# expr = expr.replace(key2, val2.name)
|
||||
# self.code['parameters_change'][sub_dict[key]] = expr
|
||||
# self.expressions['parameters_change'] = dict(self.expressions['parameters_change'])
|
||||
|
||||
def _expr2code(self, arg_list, expr):
|
||||
"""Convert the given symbolic expression into code."""
|
||||
|
|
|
|||
|
|
@ -15,8 +15,8 @@ except ImportError:
|
|||
if sympy_available:
|
||||
# These are likelihoods that rely on symbolic.
|
||||
from symbolic import Symbolic
|
||||
#from sstudent_t import SstudentT
|
||||
from sstudent_t import SstudentT
|
||||
from negative_binomial import Negative_binomial
|
||||
#from skew_normal import Skew_normal
|
||||
#from skew_exponential import Skew_exponential
|
||||
from skew_normal import Skew_normal
|
||||
from skew_exponential import Skew_exponential
|
||||
#from null_category import Null_category
|
||||
|
|
|
|||
|
|
@ -16,33 +16,33 @@ import link_functions
|
|||
from symbolic import Symbolic
|
||||
from scipy import stats
|
||||
|
||||
if sympy_available:
|
||||
class Negative_binomial(Symbolic):
|
||||
"""
|
||||
Negative binomial
|
||||
class Negative_binomial(Symbolic):
|
||||
"""
|
||||
Negative binomial
|
||||
|
||||
.. math::
|
||||
p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
|
||||
.. math::
|
||||
p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
|
||||
|
||||
.. Note::
|
||||
Y takes non zero integer values..
|
||||
link function should have a positive domain, e.g. log (default).
|
||||
.. Note::
|
||||
Y takes non zero integer values..
|
||||
link function should have a positive domain, e.g. log (default).
|
||||
|
||||
.. See also::
|
||||
symbolic.py, for the parent class
|
||||
"""
|
||||
def __init__(self, gp_link=None):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Identity()
|
||||
.. See also::
|
||||
symbolic.py, for the parent class
|
||||
"""
|
||||
def __init__(self, gp_link=None, dispersion=1.0):
|
||||
parameters = {'dispersion':dispersion}
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Identity()
|
||||
|
||||
dispersion = sym.Symbol('dispersion', positive=True, real=True)
|
||||
y = sym.Symbol('y', nonnegative=True, integer=True)
|
||||
f = sym.Symbol('f', positive=True, real=True)
|
||||
gp_link = link_functions.Log()
|
||||
log_pdf=dispersion*sym.log(dispersion) - (dispersion+y)*sym.log(dispersion+f) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*sym.log(f)
|
||||
#log_pdf= -(dispersion+y)*logisticln(f-sym.log(dispersion)) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*(f-sym.log(dispersion))
|
||||
super(Negative_binomial, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Negative_binomial')
|
||||
dispersion = sym.Symbol('dispersion', positive=True, real=True)
|
||||
y_0 = sym.Symbol('y_0', nonnegative=True, integer=True)
|
||||
f_0 = sym.Symbol('f_0', positive=True, real=True)
|
||||
gp_link = link_functions.Log()
|
||||
log_pdf=dispersion*sym.log(dispersion) - (dispersion+y_0)*sym.log(dispersion+f_0) + gammaln(y_0+dispersion) - gammaln(y_0+1) - gammaln(dispersion) + y_0*sym.log(f_0)
|
||||
#log_pdf= -(dispersion+y)*logisticln(f-sym.log(dispersion)) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*(f-sym.log(dispersion))
|
||||
super(Negative_binomial, self).__init__(log_pdf=log_pdf, parameters=parameters, gp_link=gp_link, name='Negative_binomial')
|
||||
|
||||
# TODO: Check this.
|
||||
self.log_concave = False
|
||||
# TODO: Check this.
|
||||
self.log_concave = False
|
||||
|
||||
|
|
|
|||
|
|
@ -1,316 +1,279 @@
|
|||
# Copyright (c) 2014 GPy Authors
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
try:
|
||||
import sympy as sym
|
||||
sympy_available=True
|
||||
from sympy.utilities.lambdify import lambdify
|
||||
except ImportError:
|
||||
sympy_available=False
|
||||
|
||||
import sympy as sym
|
||||
import numpy as np
|
||||
import link_functions
|
||||
from scipy import stats, integrate
|
||||
from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
|
||||
from GPy.util.functions import normcdf, normcdfln, logistic, logisticln
|
||||
from likelihood import Likelihood
|
||||
from ..core.parameterization import Param
|
||||
from ..core.symbolic import Symbolic_core
|
||||
|
||||
|
||||
if sympy_available:
|
||||
class Symbolic(Likelihood):
|
||||
class Symbolic(Likelihood, Symbolic_core):
|
||||
"""
|
||||
Symbolic likelihood.
|
||||
|
||||
Likelihood where the form of the likelihood is provided by a sympy expression.
|
||||
|
||||
"""
|
||||
def __init__(self, log_pdf=None, logZ=None, missing_log_pdf=None, gp_link=None, name='symbolic', log_concave=False, parameters=None, func_modules=[]):
|
||||
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Identity()
|
||||
|
||||
if log_pdf is None:
|
||||
raise ValueError, "You must provide an argument for the log pdf."
|
||||
|
||||
Likelihood.__init__(self, gp_link, name=name)
|
||||
functions = {'log_pdf':log_pdf}
|
||||
self.cacheable = ['F', 'Y']
|
||||
|
||||
self.missing_data = False
|
||||
if missing_log_pdf:
|
||||
self.missing_data = True
|
||||
functions['missing_log_pdf']=missing_log_pdf
|
||||
|
||||
self.ep_analytic = False
|
||||
if logZ:
|
||||
self.ep_analytic = True
|
||||
functions['logZ'] = logZ
|
||||
self.cacheable += ['M', 'V']
|
||||
|
||||
Symbolic_core.__init__(self, functions, cacheable=self.cacheable, derivatives = ['F', 'theta'], parameters=parameters, func_modules=func_modules)
|
||||
|
||||
# TODO: Is there an easy way to check whether the pdf is log
|
||||
self.log_concave = log_concave
|
||||
|
||||
|
||||
|
||||
def _set_derivatives(self, derivatives):
|
||||
# these are arguments for computing derivatives.
|
||||
print "Whoop"
|
||||
Symbolic_core._set_derivatives(self, derivatives)
|
||||
|
||||
# add second and third derivatives for Laplace approximation.
|
||||
derivative_arguments = []
|
||||
if derivatives is not None:
|
||||
for derivative in derivatives:
|
||||
derivative_arguments += self.variables[derivative]
|
||||
exprs = ['log_pdf']
|
||||
if self.missing_data:
|
||||
exprs.append('missing_log_pdf')
|
||||
for expr in exprs:
|
||||
self.expressions[expr]['second_derivative'] = {theta.name : self.stabilize(sym.diff(self.expressions[expr]['derivative']['f_0'], theta)) for theta in derivative_arguments}
|
||||
self.expressions[expr]['third_derivative'] = {theta.name : self.stabilize(sym.diff(self.expressions[expr]['second_derivative']['f_0'], theta)) for theta in derivative_arguments}
|
||||
if self.ep_analytic:
|
||||
derivative_arguments = [M]
|
||||
# add second derivative for EP
|
||||
exprs = ['logZ']
|
||||
if self.missing_data:
|
||||
exprs.append('missing_logZ')
|
||||
for expr in exprs:
|
||||
self.expressions[expr]['second_derivative'] = {theta.name : self.stabilize(sym.diff(self.expressions[expr]['derivative'], theta)) for theta in derivative_arguments}
|
||||
|
||||
|
||||
def eval_update_cache(self, Y, **kwargs):
|
||||
# TODO: place checks for inf/nan in here
|
||||
# for all provided keywords
|
||||
Symbolic_core.eval_update_cache(self, Y=Y, **kwargs)
|
||||
# Y = np.atleast_2d(Y)
|
||||
# for variable, code in sorted(self.code['parameters_changed'].iteritems()):
|
||||
# self._set_attribute(variable, eval(code, self.namespace))
|
||||
# for i, theta in enumerate(self.variables['Y']):
|
||||
# missing = np.isnan(Y[:, i])
|
||||
# self._set_attribute('missing_' + str(i), missing)
|
||||
# self._set_attribute(theta.name, value[missing, i][:, None])
|
||||
# for variable, value in kwargs.items():
|
||||
# # update their cached values
|
||||
# if value is not None:
|
||||
# if variable == 'F' or variable == 'M' or variable == 'V' or variable == 'Y_metadata':
|
||||
# for i, theta in enumerate(self.variables[variable]):
|
||||
# self._set_attribute(theta.name, value[:, i][:, None])
|
||||
# else:
|
||||
# self._set_attribute(theta.name, value[:, i])
|
||||
# for variable, code in sorted(self.code['update_cache'].iteritems()):
|
||||
# self._set_attribute(variable, eval(code, self.namespace))
|
||||
|
||||
|
||||
def parameters_changed(self):
|
||||
pass
|
||||
|
||||
def update_gradients(self, grads):
|
||||
"""
|
||||
Symbolic likelihood.
|
||||
Pull out the gradients, be careful as the order must match the order
|
||||
in which the parameters are added
|
||||
"""
|
||||
# The way the Laplace approximation is run requires the
|
||||
# covariance function to compute the true gradient (because it
|
||||
# is dependent on the mode). This means we actually compute
|
||||
# the gradient outside this object. This function would
|
||||
# normally ask the object to update its gradients internally,
|
||||
# but here it provides them externally, because they are
|
||||
# computed in the inference code. TODO: Thought: How does this
|
||||
# effect EP? Shouldn't this be done by a separate
|
||||
# Laplace-approximation specific call?
|
||||
for theta, grad in zip(self.variables['theta'], grads):
|
||||
parameter = getattr(self, theta.name)
|
||||
setattr(parameter, 'gradient', grad)
|
||||
|
||||
Likelihood where the form of the likelihood is provided by a sympy expression.
|
||||
def pdf_link(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given inverse link of f.
|
||||
|
||||
:param f: inverse link of latent variables.
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
return np.exp(self.logpdf_link(f, y, Y_metadata=None))
|
||||
|
||||
def logpdf_link(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood Function given inverse link of latent variables.
|
||||
|
||||
:param f: latent variables (inverse link of f)
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
|
||||
"""
|
||||
def __init__(self, log_pdf=None, logZ=None, missing_log_pdf=None, gp_link=None, name='symbolic', log_concave=False, param=None, func_modules=[]):
|
||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
||||
if self.missing_data:
|
||||
missing_flag = np.isnan(y)
|
||||
not_missing_flag = np.logical_not(missing_flag)
|
||||
ll = self.eval_function('missing_log_pdf', F=f[missing_flag]).sum()
|
||||
ll += self.eval_function('log_pdf', F=f[not_missing_flag], Y=y[not_missing_flag], Y_metadata=Y_metadata[not_missing_flag]).sum()
|
||||
else:
|
||||
ll = self.eval_function('log_pdf', F=f, Y=y, Y_metadata=Y_metadata).sum()
|
||||
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Identity()
|
||||
return ll
|
||||
|
||||
if log_pdf is None:
|
||||
raise ValueError, "You must provide an argument for the log pdf."
|
||||
def dlogpdf_dlink(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of log likelihood with respect to the inverse link function.
|
||||
|
||||
self.func_modules = func_modules
|
||||
self.func_modules += [{'gamma':gamma,
|
||||
'gammaln':gammaln,
|
||||
'erf':erf, 'erfc':erfc,
|
||||
'erfcx':erfcx,
|
||||
'polygamma':polygamma,
|
||||
'normcdf':normcdf,
|
||||
'normcdfln':normcdfln,
|
||||
'logistic':logistic,
|
||||
'logisticln':logisticln},
|
||||
'numpy']
|
||||
:param f: latent variables (inverse link of f)
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata
|
||||
:returns: gradient of likelihood with respect to each point.
|
||||
:rtype: Nx1 array
|
||||
|
||||
super(Symbolic, self).__init__(gp_link, name=name)
|
||||
self.missing_data = False
|
||||
self._sym_log_pdf = log_pdf
|
||||
if missing_log_pdf:
|
||||
self.missing_data = True
|
||||
self._sym_missing_log_pdf = missing_log_pdf
|
||||
"""
|
||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
||||
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
|
||||
if self.missing_data:
|
||||
return np.where(np.isnan(y),
|
||||
eval(self.code['missing_log_pdf']['derivative']['f_0'], self.namespace),
|
||||
eval(self.code['log_pdf']['derivative']['f_0'], self.namespace))
|
||||
else:
|
||||
return np.where(np.isnan(y),
|
||||
0.,
|
||||
eval(self.code['log_pdf']['derivative']['f_0'], self.namespace))
|
||||
|
||||
# pull the variable names out of the symbolic pdf
|
||||
sym_vars = [e for e in self._sym_log_pdf.atoms() if e.is_Symbol]
|
||||
self._sym_f = [e for e in sym_vars if e.name=='f']
|
||||
if not self._sym_f:
|
||||
raise ValueError('No variable f in log pdf.')
|
||||
self._sym_y = [e for e in sym_vars if e.name=='y']
|
||||
if not self._sym_y:
|
||||
raise ValueError('No variable y in log pdf.')
|
||||
self._sym_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name)
|
||||
def d2logpdf_dlink2(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian of log likelihood given inverse link of latent variables with respect to that inverse link.
|
||||
i.e. second derivative logpdf at y given inv_link(f_i) and inv_link(f_j) w.r.t inv_link(f_i) and inv_link(f_j).
|
||||
|
||||
theta_names = [theta.name for theta in self._sym_theta]
|
||||
|
||||
:param f: inverse link of the latent variables.
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: Diagonal of Hessian matrix (second derivative of likelihood evaluated at points f)
|
||||
:rtype: Nx1 array
|
||||
|
||||
.. Note::
|
||||
Returns diagonal of Hessian, since every where else it is
|
||||
0, as the likelihood factorizes over cases (the
|
||||
distribution for y_i depends only on link(f_i) not on
|
||||
link(f_(j!=i))
|
||||
"""
|
||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
||||
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
|
||||
if self.missing_data:
|
||||
return np.where(np.isnan(y),
|
||||
eval(self.code['missing_log_pdf']['second_derivative']['f_0'], self.namespace),
|
||||
eval(self.code['log_pdf']['second_derivative']['f_0'], self.namespace))
|
||||
else:
|
||||
return np.where(np.isnan(y),
|
||||
0.,
|
||||
eval(self.code['log_pdf']['second_derivative']['f_0'], self.namespace))
|
||||
|
||||
def d3logpdf_dlink3(self, f, y, Y_metadata=None):
|
||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
||||
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
|
||||
if self.missing_data:
|
||||
return np.where(np.isnan(y),
|
||||
eval(self.code['missing_log_pdf']['third_derivative']['f_0'], self.namespace),
|
||||
eval(self.code['log_pdf']['third_derivative']['f_0'], self.namespace))
|
||||
else:
|
||||
return np.where(np.isnan(y),
|
||||
0.,
|
||||
eval(self.code['log_pdf']['third_derivative']['f_0'], self.namespace))
|
||||
|
||||
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
||||
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
|
||||
g = np.zeros((np.atleast_1d(y).shape[0], len(self.variables['theta'])))
|
||||
for i, theta in enumerate(self.variables['theta']):
|
||||
if self.missing_data:
|
||||
# pull the variable names out of missing data
|
||||
sym_vars = [e for e in self._sym_missing_log_pdf.atoms() if e.is_Symbol]
|
||||
sym_f = [e for e in sym_vars if e.name=='f']
|
||||
if not sym_f:
|
||||
raise ValueError('No variable f in missing data log pdf.')
|
||||
sym_y = [e for e in sym_vars if e.name=='y']
|
||||
if sym_y:
|
||||
raise ValueError('Data is present in missing data portion of likelihood.')
|
||||
# additional missing data parameters
|
||||
missing_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='missing' or e.name in theta_names)],key=lambda e:e.name)
|
||||
self._sym_theta += missing_theta
|
||||
self._sym_theta = sorted(self._sym_theta, key=lambda e:e.name)
|
||||
|
||||
# These are all the arguments need to compute likelihoods.
|
||||
self.arg_list = self._sym_y + self._sym_f + self._sym_theta
|
||||
|
||||
# these are arguments for computing derivatives.
|
||||
derivative_arguments = self._sym_f + self._sym_theta
|
||||
|
||||
# Do symbolic work to compute derivatives.
|
||||
self._log_pdf_derivatives = {theta.name : stabilise(sym.diff(self._sym_log_pdf,theta)) for theta in derivative_arguments}
|
||||
self._log_pdf_second_derivatives = {theta.name : stabilise(sym.diff(self._log_pdf_derivatives['f'],theta)) for theta in derivative_arguments}
|
||||
self._log_pdf_third_derivatives = {theta.name : stabilise(sym.diff(self._log_pdf_second_derivatives['f'],theta)) for theta in derivative_arguments}
|
||||
|
||||
if self.missing_data:
|
||||
# Do symbolic work to compute derivatives.
|
||||
self._missing_log_pdf_derivatives = {theta.name : stabilise(sym.diff(self._sym_missing_log_pdf,theta)) for theta in derivative_arguments}
|
||||
self._missing_log_pdf_second_derivatives = {theta.name : stabilise(sym.diff(self._missing_log_pdf_derivatives['f'],theta)) for theta in derivative_arguments}
|
||||
self._missing_log_pdf_third_derivatives = {theta.name : stabilise(sym.diff(self._missing_log_pdf_second_derivatives['f'],theta)) for theta in derivative_arguments}
|
||||
|
||||
|
||||
# Add parameters to the model.
|
||||
for theta in self._sym_theta:
|
||||
val = 1.0
|
||||
# TODO: need to decide how to handle user passing values for the se parameter vectors.
|
||||
if param is not None:
|
||||
if param.has_key(theta.name):
|
||||
val = param[theta.name]
|
||||
setattr(self, theta.name, Param(theta.name, val, None))
|
||||
self.add_parameters(getattr(self, theta.name))
|
||||
|
||||
|
||||
# TODO: Is there an easy way to check whether the pdf is log
|
||||
# concave? For the moment, need user to specify.
|
||||
self.log_concave = log_concave
|
||||
|
||||
# initialise code arguments
|
||||
self._arguments = {}
|
||||
|
||||
# generate the code for the pdf and derivatives
|
||||
self._gen_code()
|
||||
|
||||
def list_functions(self):
|
||||
"""Return a list of all symbolic functions in the model and their names."""
|
||||
def _gen_code(self):
|
||||
"""Generate the code from the symbolic parts that will be used for likleihod computation."""
|
||||
# TODO: Check here whether theano is available and set up
|
||||
# functions accordingly.
|
||||
symbolic_functions = [self._sym_log_pdf]
|
||||
deriv_list = [self._log_pdf_derivatives, self._log_pdf_second_derivatives, self._log_pdf_third_derivatives]
|
||||
symbolic_functions += [deriv[key] for key in sorted(deriv.keys()) for deriv in deriv_list]
|
||||
if self.missing_data:
|
||||
symbolic_functions+=[self._sym_missing_log_pdf]
|
||||
deriv_list = [self._missing_log_pdf_derivatives, self._missing_log_pdf_second_derivatives, self._missing_log_pdf_third_derivatives]
|
||||
symbolic_functions += [deriv[key] for key in sorted(deriv.keys()) for deriv in deriv_list]
|
||||
# self._log_pdf_function = lambdify(self.arg_list, self._sym_log_pdf, self.func_modules)
|
||||
|
||||
# # compute code for derivatives
|
||||
# self._derivative_code = {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], self.func_modules) for key in self._log_pdf_derivatives.keys()}
|
||||
# self._second_derivative_code = {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], self.func_modules) for key in self._log_pdf_second_derivatives.keys()}
|
||||
# self._third_derivative_code = {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], self.func_modules) for key in self._log_pdf_third_derivatives.keys()}
|
||||
|
||||
# if self.missing_data:
|
||||
# self._missing_derivative_code = {key: lambdify(self.arg_list, self._missing_log_pdf_derivatives[key], self.func_modules) for key in self._missing_log_pdf_derivatives.keys()}
|
||||
# self._missing_second_derivative_code = {key: lambdify(self.arg_list, self._missing_log_pdf_second_derivatives[key], self.func_modules) for key in self._missing_log_pdf_second_derivatives.keys()}
|
||||
# self._missing_third_derivative_code = {key: lambdify(self.arg_list, self._missing_log_pdf_third_derivatives[key], self.func_modules) for key in self._missing_log_pdf_third_derivatives.keys()}
|
||||
|
||||
# TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta
|
||||
|
||||
def parameters_changed(self):
|
||||
pass
|
||||
|
||||
def update_gradients(self, grads):
|
||||
"""
|
||||
Pull out the gradients, be careful as the order must match the order
|
||||
in which the parameters are added
|
||||
"""
|
||||
# The way the Laplace approximation is run requires the
|
||||
# covariance function to compute the true gradient (because it
|
||||
# is dependent on the mode). This means we actually compute
|
||||
# the gradient outside this object. This function would
|
||||
# normally ask the object to update its gradients internally,
|
||||
# but here it provides them externally, because they are
|
||||
# computed in the inference code. TODO: Thought: How does this
|
||||
# effect EP? Shouldn't this be done by a separate
|
||||
# Laplace-approximation specific call?
|
||||
for grad, theta in zip(grads, self._sym_theta):
|
||||
parameter = getattr(self, theta.name)
|
||||
setattr(parameter, 'gradient', grad)
|
||||
|
||||
def _arguments_update(self, f, y):
|
||||
"""Set up argument lists for the derivatives."""
|
||||
# If we do make use of Theano, then at this point we would
|
||||
# need to do a lot of precomputation to ensure that the
|
||||
# likelihoods and gradients are computed together, then check
|
||||
# for parameter changes before updating.
|
||||
for i, fvar in enumerate(self._sym_f):
|
||||
self._arguments[fvar.name] = f
|
||||
for i, yvar in enumerate(self._sym_y):
|
||||
self._arguments[yvar.name] = y
|
||||
for theta in self._sym_theta:
|
||||
self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
|
||||
|
||||
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given inverse link of f.
|
||||
|
||||
:param inv_link_f: inverse link of latent variables.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
return np.exp(self.logpdf_link(inv_link_f, y, Y_metadata=None))
|
||||
|
||||
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood Function given inverse link of latent variables.
|
||||
|
||||
:param inv_inv_link_f: latent variables (inverse link of f)
|
||||
:type inv_inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
if self.missing_data:
|
||||
ll = np.where(np.isnan(y), self._missing_log_pdf_function(**self._missing_arguments), self._log_pdf_function(**self._arguments))
|
||||
g[:, i:i+1] = np.where(np.isnan(y),
|
||||
eval(self.code['missing_log_pdf']['derivative'][theta.name], self.namespace),
|
||||
eval(self.code['log_pdf']['derivative'][theta.name], self.namespace))
|
||||
else:
|
||||
ll = np.where(np.isnan(y), 0., self._log_pdf_function(**self._arguments))
|
||||
return np.sum(ll)
|
||||
g[:, i:i+1] = np.where(np.isnan(y),
|
||||
0.,
|
||||
eval(self.code['log_pdf']['derivative'][theta.name], self.namespace))
|
||||
return g.sum(0)
|
||||
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of log likelihood with respect to the inverse link function.
|
||||
|
||||
:param inv_inv_link_f: latent variables (inverse link of f)
|
||||
:type inv_inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata
|
||||
:returns: gradient of likelihood with respect to each point.
|
||||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
||||
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
|
||||
g = np.zeros((np.atleast_1d(y).shape[0], len(self.variables['theta'])))
|
||||
for i, theta in enumerate(self.variables['theta']):
|
||||
if self.missing_data:
|
||||
return np.where(np.isnan(y), self._missing_derivative_code['f'](**self._missing_argments), self._derivative_code['f'](**self._argments))
|
||||
g[:, i:i+1] = np.where(np.isnan(y),
|
||||
eval(self.code['missing_log_pdf']['second_derivative'][theta.name], self.namespace),
|
||||
eval(self.code['log_pdf']['second_derivative'][theta.name], self.namespace))
|
||||
else:
|
||||
return np.where(np.isnan(y), 0., self._derivative_code['f'](**self._arguments))
|
||||
g[:, i:i+1] = np.where(np.isnan(y),
|
||||
0.,
|
||||
eval(self.code['log_pdf']['second_derivative'][theta.name], self.namespace))
|
||||
return g
|
||||
|
||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian of log likelihood given inverse link of latent variables with respect to that inverse link.
|
||||
i.e. second derivative logpdf at y given inv_link(f_i) and inv_link(f_j) w.r.t inv_link(f_i) and inv_link(f_j).
|
||||
|
||||
|
||||
:param inv_link_f: inverse link of the latent variables.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: Diagonal of Hessian matrix (second derivative of likelihood evaluated at points f)
|
||||
:rtype: Nx1 array
|
||||
|
||||
.. Note::
|
||||
Returns diagonal of Hessian, since every where else it is
|
||||
0, as the likelihood factorizes over cases (the
|
||||
distribution for y_i depends only on link(f_i) not on
|
||||
link(f_(j!=i))
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
|
||||
assert np.atleast_1d(f).shape == np.atleast_1d(y).shape
|
||||
self.eval_update_cache(F=f, Y=y, Y_metadata=Y_metadata)
|
||||
g = np.zeros((np.atleast_1d(y).shape[0], len(self.variables['theta'])))
|
||||
for i, theta in enumerate(self.variables['theta']):
|
||||
if self.missing_data:
|
||||
return np.where(np.isnan(y), self._missing_second_derivative_code['f'](**self._missing_argments), self._second_derivative_code['f'](**self._argments))
|
||||
g[:, i:i+1] = np.where(np.isnan(y),
|
||||
eval(self.code['missing_log_pdf']['third_derivative'][theta.name], self.namespace),
|
||||
eval(self.code['log_pdf']['third_derivative'][theta.name], self.namespace))
|
||||
else:
|
||||
return np.where(np.isnan(y), 0., self._second_derivative_code['f'](**self._arguments))
|
||||
g[:, i:i+1] = np.where(np.isnan(y),
|
||||
0.,
|
||||
eval(self.code['log_pdf']['third_derivative'][theta.name], self.namespace))
|
||||
return g
|
||||
|
||||
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
if self.missing_data:
|
||||
return np.where(np.isnan(y), self._missing_third_derivative_code['f'](**self._missing_argments), self._third_derivative_code['f'](**self._argments))
|
||||
else:
|
||||
return np.where(np.isnan(y), 0., self._third_derivative_code['f'](**self._arguments))
|
||||
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
|
||||
for i, theta in enumerate(self._sym_theta):
|
||||
if self.missing_data:
|
||||
g[:, i:i+1] = np.where(np.isnan(y), self._missing_derivative_code[theta.name](**self._arguments), self._derivative_code[theta.name](**self._arguments))
|
||||
else:
|
||||
g[:, i:i+1] = np.where(np.isnan(y), 0., self._derivative_code[theta.name](**self._arguments))
|
||||
return g.sum(0)
|
||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
|
||||
for i, theta in enumerate(self._sym_theta):
|
||||
if self.missing_data:
|
||||
g[:, i:i+1] = np.where(np.isnan(y), self._missing_second_derivative_code[theta.name](**self._arguments), self._second_derivative_code[theta.name](**self._arguments))
|
||||
else:
|
||||
g[:, i:i+1] = np.where(np.isnan(y), 0., self._second_derivative_code[theta.name](**self._arguments))
|
||||
return g
|
||||
def conditional_mean(self, gp):
|
||||
raise NotImplementedError
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
|
||||
for i, theta in enumerate(self._sym_theta):
|
||||
if self.missing_data:
|
||||
g[:, i:i+1] = np.where(np.isnan(y), self._missing_third_derivative_code[theta.name](**self._arguments), self._third_derivative_code[theta.name](**self._arguments))
|
||||
else:
|
||||
g[:, i:i+1] = np.where(np.isnan(y), 0., self._third_derivative_code[theta.name](**self._arguments))
|
||||
return g
|
||||
def conditional_variance(self, gp):
|
||||
raise NotImplementedError
|
||||
|
||||
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def conditional_mean(self, gp):
|
||||
raise NotImplementedError
|
||||
|
||||
def conditional_variance(self, gp):
|
||||
raise NotImplementedError
|
||||
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ class Symbolic(Mapping, Symbolic_core):
|
|||
self.parameters_changed()
|
||||
|
||||
def _initialize_cache(self):
|
||||
self.x_0 = np.random.normal(size=(3, self.input_dim))
|
||||
self._set_attribute('x_0', np.random.normal(size=(3, self.input_dim)))
|
||||
|
||||
|
||||
def parameters_changed(self):
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -12,6 +12,7 @@ For a quick start, you can have a look at one of the tutorials:
|
|||
* `A kernel overview <tuto_kernel_overview.html>`_
|
||||
* `Writing new kernels <tuto_creating_new_kernels.html>`_
|
||||
* `Writing new models <tuto_creating_new_models.html>`_
|
||||
* `Parameterization handles <tuto_parameterized.html>`_
|
||||
|
||||
You may also be interested by some examples in the GPy/examples folder.
|
||||
|
||||
|
|
|
|||
|
|
@ -8,57 +8,44 @@ In GPy all models inherit from the base class :py:class:`~GPy.core.parameterized
|
|||
|
||||
The :py:class:`~GPy.core.model.Model` class provides parameter introspection, objective function and optimization.
|
||||
|
||||
In order to fully use all functionality of :py:class:`~GPy.core.model.Model` some methods need to be implemented / overridden. In order to explain the functionality of those methods we will use a wrapper to the numpy ``rosen`` function, which holds input parameters :math:`\mathbf{X}`. Where :math:`\mathbf{X}\in\mathbb{R}^{N\times 1}`.
|
||||
In order to fully use all functionality of
|
||||
:py:class:`~GPy.core.model.Model` some methods need to be implemented
|
||||
/ overridden. And the model needs to be told its parameters, such
|
||||
that it can provide optimized parameter distribution and handling.
|
||||
In order to explain the functionality of those methods
|
||||
we will use a wrapper to the numpy ``rosen`` function, which holds
|
||||
input parameters :math:`\mathbf{X}`. Where
|
||||
:math:`\mathbf{X}\in\mathbb{R}^{N\times 1}`.
|
||||
|
||||
Obligatory methods
|
||||
==================
|
||||
|
||||
:py:meth:`~GPy.core.model.Model.__init__` :
|
||||
Initialize the model with the given parameters. In our example we have to store shape information of :math:`\mathbf X` and the parameters themselves::
|
||||
Initialize the model with the given parameters. These need to
|
||||
be added to the model by calling
|
||||
`self.add_parameter(<param>)`, where param needs to be a
|
||||
parameter handle (See parameterized_ for details).::
|
||||
|
||||
self.X = X
|
||||
self.num_inputs = self.X.shape[0]
|
||||
assert self.X.ndim == 1, only vector inputs allowed
|
||||
|
||||
:py:meth:`~GPy.core.model.Model._get_params` :
|
||||
Return parameters of the model as a flattened numpy array-like. So, in our example we have to return the input parameters::
|
||||
|
||||
return self.X.flatten()
|
||||
|
||||
:py:meth:`~GPy.core.model.Model._set_params` :
|
||||
Set parameters, which have been fetched through :py:meth:`~GPy.core.model.Model._get_params`. In other words, "invert" the functionality of :py:meth:`~GPy.core.model.Model._get_params`::
|
||||
|
||||
self.X = params[:self.num_inputs*self.input_dim].reshape(self.num_inputs)
|
||||
self.X = GPy.core.Param("input", X)
|
||||
self.add_parameter(self.X)
|
||||
|
||||
:py:meth:`~GPy.core.model.Model.log_likelihood` :
|
||||
Returns the log-likelihood of the new model. For our example this is just the call to ``rosen``::
|
||||
Returns the log-likelihood of the new model. For our example
|
||||
this is just the call to ``rosen`` and as we want to minimize
|
||||
it, we need to negate the objective.::
|
||||
|
||||
return scipy.optimize.rosen(self.X)
|
||||
return -scipy.optimize.rosen(self.X)
|
||||
|
||||
:py:meth:`~GPy.core.model.Model._log_likelihood_gradients` :
|
||||
Returns the gradients with respect to all parameters::
|
||||
:py:meth:`~GPy.core.model.Model.parameters_changed` :
|
||||
Updates the internal state of the model and sets the gradient of
|
||||
each parameter handle in the hierarchy with respect to the
|
||||
log_likelihod. Thus here we need to put the negative derivative of
|
||||
the rosenbrock function:
|
||||
|
||||
return scipy.optimize.rosen_der(self.X)
|
||||
self.X.gradient = -scipy.optimize.rosen_der(self.X)
|
||||
|
||||
|
||||
Optional methods
|
||||
================
|
||||
|
||||
If you want some special functionality please provide the following methods:
|
||||
|
||||
Using the pickle functionality
|
||||
------------------------------
|
||||
|
||||
To be able to use the pickle functionality ``m.pickle(<path>)`` the methods ``getstate(self)`` and ``setstate(self, state)`` have to be provided. The convention for a ``state`` in ``GPy`` is a list of all parameters, which are needed to restore the model. All classes provided in ``GPy`` follow this convention, thus you can just append to the state of the inherited class and call the inherited class' ``setstate`` with the appropriate state.
|
||||
|
||||
:py:meth:`~GPy.core.model.Model.getstate` :
|
||||
This method returns a state of the model, following the memento pattern. As we are inheriting from :py:class:`~GPy.core.model.Model`, we have to return the state of Model as well. In out example we have `X` and `num_inputs` as state::
|
||||
|
||||
return Model.getstate(self) + [self.X, self.num_inputs]
|
||||
|
||||
:py:meth:`~GPy.core.model.Model.setstate` :
|
||||
This method restores this model with the given ``state``::
|
||||
|
||||
self.num_inputs = state.pop()
|
||||
self.X = state.pop()
|
||||
return Model.setstate(self, state)
|
||||
Currently none.
|
||||
|
|
|
|||
23
doc/tuto_parameterized.rst
Normal file
23
doc/tuto_parameterized.rst
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
.. _parameterized:
|
||||
|
||||
*******************
|
||||
Parameterization handling
|
||||
*******************
|
||||
|
||||
Parameterization in GPy is done through so called parameter handles. The parameter handles are handles to parameters of a model of any kind. A parameter handle can be constrained, fixed, randomized and others. All parameters in GPy have a name, with which they can be accessed in the model. The most common way of accesssing a parameter programmatically though, is by variable name.
|
||||
|
||||
Parameter handles
|
||||
==============
|
||||
|
||||
A parameter handle in GPy is a handle on a parameter, as the name suggests. A parameter can be constrained, fixed, randomized and more (See e.g. `working with models`). This gives the freedom to the model to handle parameter distribution and model updates as efficiently as possible. All parameter handles share a common memory space, which is just a flat numpy array, stored in the highest parent of a model hierarchy.
|
||||
In the following we will introduce and elucidate the different parameter handles which exist in GPy.
|
||||
|
||||
:py:class:`~GPy.core.parameterization.parameterized.Parameterized`
|
||||
==========
|
||||
|
||||
A parameterized object itself holds parameter handles and is just a summarization of the parameters below. It can use those parameters to change the internal state of the model and GPy ensures those parameters to allways hold the right value when in an optimization routine or any other update.
|
||||
|
||||
:py:class:`~GPy.core.parameterization.param.Param`
|
||||
===========
|
||||
|
||||
The lowest level of parameter is a numpy array. This Param class inherits all functionality of a numpy array and can simply be used as if it where a numpy array. These parameters can be accessed in the same way as a numpy array is indexed.
|
||||
Loading…
Add table
Add a link
Reference in a new issue