mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-10 12:32:40 +02:00
Merge branch 'params' of github.com:SheffieldML/GPy into params
This commit is contained in:
commit
b6c4c39261
21 changed files with 955 additions and 587 deletions
|
|
@ -65,6 +65,14 @@ class Mapping(Parameterized):
|
||||||
else:
|
else:
|
||||||
raise NameError, "matplotlib package has not been imported."
|
raise NameError, "matplotlib package has not been imported."
|
||||||
|
|
||||||
|
class Bijective_mapping(Mapping):
|
||||||
|
"""This is a mapping that is bijective, i.e. you can go from X to f and also back from f to X. The inverse mapping is called g()."""
|
||||||
|
def __init__(self, input_dim, output_dim, name='bijective_mapping'):
|
||||||
|
super(Bijective_apping, self).__init__(name=name)
|
||||||
|
|
||||||
|
def g(self, f):
|
||||||
|
"""Inverse mapping from output domain of the function to the inputs."""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
from model import Model
|
from model import Model
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -184,7 +184,7 @@ class ParameterIndexOperationsView(object):
|
||||||
def remove(self, prop, indices):
|
def remove(self, prop, indices):
|
||||||
removed = self._param_index_ops.remove(prop, numpy.array(indices)+self._offset)
|
removed = self._param_index_ops.remove(prop, numpy.array(indices)+self._offset)
|
||||||
if removed.size > 0:
|
if removed.size > 0:
|
||||||
return removed - self._size + 1
|
return removed-self._offset
|
||||||
return removed
|
return removed
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -312,7 +312,8 @@ class Indexable(object):
|
||||||
This does not need to account for shaped parameters, as it
|
This does not need to account for shaped parameters, as it
|
||||||
basically just sums up the parameter sizes which come before param.
|
basically just sums up the parameter sizes which come before param.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError, "shouldnt happen, offset required from non parameterization object?"
|
return 0
|
||||||
|
#raise NotImplementedError, "shouldnt happen, offset required from non parameterization object?"
|
||||||
|
|
||||||
def _raveled_index_for(self, param):
|
def _raveled_index_for(self, param):
|
||||||
"""
|
"""
|
||||||
|
|
@ -320,7 +321,8 @@ class Indexable(object):
|
||||||
that is an int array, containing the indexes for the flattened
|
that is an int array, containing the indexes for the flattened
|
||||||
param inside this parameterized logic.
|
param inside this parameterized logic.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
|
return param._raveled_index()
|
||||||
|
#raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
|
||||||
|
|
||||||
|
|
||||||
class Constrainable(Nameable, Indexable, Observable):
|
class Constrainable(Nameable, Indexable, Observable):
|
||||||
|
|
@ -368,10 +370,10 @@ class Constrainable(Nameable, Indexable, Observable):
|
||||||
if value is not None:
|
if value is not None:
|
||||||
self[:] = value
|
self[:] = value
|
||||||
reconstrained = self.unconstrain()
|
reconstrained = self.unconstrain()
|
||||||
self._add_to_index_operations(self.constraints, reconstrained, __fixed__, warning)
|
index = self._add_to_index_operations(self.constraints, reconstrained, __fixed__, warning)
|
||||||
rav_i = self._highest_parent_._raveled_index_for(self)
|
self._highest_parent_._set_fixed(self, index)
|
||||||
self._highest_parent_._set_fixed(rav_i)
|
|
||||||
self.notify_observers(self, None if trigger_parent else -np.inf)
|
self.notify_observers(self, None if trigger_parent else -np.inf)
|
||||||
|
return index
|
||||||
fix = constrain_fixed
|
fix = constrain_fixed
|
||||||
|
|
||||||
def unconstrain_fixed(self):
|
def unconstrain_fixed(self):
|
||||||
|
|
@ -379,7 +381,8 @@ class Constrainable(Nameable, Indexable, Observable):
|
||||||
This parameter will no longer be fixed.
|
This parameter will no longer be fixed.
|
||||||
"""
|
"""
|
||||||
unconstrained = self.unconstrain(__fixed__)
|
unconstrained = self.unconstrain(__fixed__)
|
||||||
self._highest_parent_._set_unfixed(unconstrained)
|
self._highest_parent_._set_unfixed(self, unconstrained)
|
||||||
|
return unconstrained
|
||||||
unfix = unconstrain_fixed
|
unfix = unconstrain_fixed
|
||||||
|
|
||||||
def _ensure_fixes(self):
|
def _ensure_fixes(self):
|
||||||
|
|
@ -388,14 +391,16 @@ class Constrainable(Nameable, Indexable, Observable):
|
||||||
# Param: ones(self._realsize_
|
# Param: ones(self._realsize_
|
||||||
if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
|
if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
|
||||||
|
|
||||||
def _set_fixed(self, index):
|
def _set_fixed(self, param, index):
|
||||||
self._ensure_fixes()
|
self._ensure_fixes()
|
||||||
self._fixes_[index] = FIXED
|
offset = self._offset_for(param)
|
||||||
|
self._fixes_[index+offset] = FIXED
|
||||||
if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED
|
if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED
|
||||||
|
|
||||||
def _set_unfixed(self, index):
|
def _set_unfixed(self, param, index):
|
||||||
self._ensure_fixes()
|
self._ensure_fixes()
|
||||||
self._fixes_[index] = UNFIXED
|
offset = self._offset_for(param)
|
||||||
|
self._fixes_[index+offset] = UNFIXED
|
||||||
if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED
|
if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED
|
||||||
|
|
||||||
def _connect_fixes(self):
|
def _connect_fixes(self):
|
||||||
|
|
@ -469,8 +474,9 @@ class Constrainable(Nameable, Indexable, Observable):
|
||||||
"""
|
"""
|
||||||
self.param_array[...] = transform.initialize(self.param_array)
|
self.param_array[...] = transform.initialize(self.param_array)
|
||||||
reconstrained = self.unconstrain()
|
reconstrained = self.unconstrain()
|
||||||
self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
|
added = self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
|
||||||
self.notify_observers(self, None if trigger_parent else -np.inf)
|
self.notify_observers(self, None if trigger_parent else -np.inf)
|
||||||
|
return added
|
||||||
|
|
||||||
def unconstrain(self, *transforms):
|
def unconstrain(self, *transforms):
|
||||||
"""
|
"""
|
||||||
|
|
@ -549,7 +555,9 @@ class Constrainable(Nameable, Indexable, Observable):
|
||||||
if warning and reconstrained.size > 0:
|
if warning and reconstrained.size > 0:
|
||||||
# TODO: figure out which parameters have changed and only print those
|
# TODO: figure out which parameters have changed and only print those
|
||||||
print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name)
|
print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name)
|
||||||
which.add(what, self._raveled_index())
|
index = self._raveled_index()
|
||||||
|
which.add(what, index)
|
||||||
|
return index
|
||||||
|
|
||||||
def _remove_from_index_operations(self, which, transforms):
|
def _remove_from_index_operations(self, which, transforms):
|
||||||
"""
|
"""
|
||||||
|
|
@ -561,9 +569,10 @@ class Constrainable(Nameable, Indexable, Observable):
|
||||||
removed = np.empty((0,), dtype=int)
|
removed = np.empty((0,), dtype=int)
|
||||||
for t in transforms:
|
for t in transforms:
|
||||||
unconstrained = which.remove(t, self._raveled_index())
|
unconstrained = which.remove(t, self._raveled_index())
|
||||||
|
print unconstrained
|
||||||
removed = np.union1d(removed, unconstrained)
|
removed = np.union1d(removed, unconstrained)
|
||||||
if t is __fixed__:
|
if t is __fixed__:
|
||||||
self._highest_parent_._set_unfixed(unconstrained)
|
self._highest_parent_._set_unfixed(self, unconstrained)
|
||||||
|
|
||||||
return removed
|
return removed
|
||||||
|
|
||||||
|
|
|
||||||
349
GPy/core/symbolic.py
Normal file
349
GPy/core/symbolic.py
Normal file
|
|
@ -0,0 +1,349 @@
|
||||||
|
# Copyright (c) 2014, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from ..core.parameterization import Parameterized
|
||||||
|
import numpy as np
|
||||||
|
import sympy as sym
|
||||||
|
from ..core.parameterization import Param
|
||||||
|
from sympy.utilities.lambdify import lambdastr, _imp_namespace, _get_namespace
|
||||||
|
from sympy.utilities.iterables import numbered_symbols
|
||||||
|
from sympy import exp
|
||||||
|
from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
|
||||||
|
from GPy.util.functions import normcdf, normcdfln, logistic, logisticln
|
||||||
|
|
||||||
|
class Symbolic_core():
|
||||||
|
"""
|
||||||
|
Base model symbolic class.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, expression, cacheable, derivatives=None, param=None, func_modules=[]):
|
||||||
|
# Base class init, do some basic derivatives etc.
|
||||||
|
|
||||||
|
# Func_modules sets up the right mapping for functions.
|
||||||
|
self.func_modules = func_modules
|
||||||
|
self.func_modules += [{'gamma':gamma,
|
||||||
|
'gammaln':gammaln,
|
||||||
|
'erf':erf, 'erfc':erfc,
|
||||||
|
'erfcx':erfcx,
|
||||||
|
'polygamma':polygamma,
|
||||||
|
'normcdf':normcdf,
|
||||||
|
'normcdfln':normcdfln,
|
||||||
|
'logistic':logistic,
|
||||||
|
'logisticln':logisticln},
|
||||||
|
'numpy']
|
||||||
|
|
||||||
|
self.expressions = {}
|
||||||
|
self.expressions['function'] = expression
|
||||||
|
self.cacheable = cacheable
|
||||||
|
|
||||||
|
# pull the parameters and inputs out of the symbolic pdf
|
||||||
|
self.variables = {}
|
||||||
|
vars = [e for e in expression.atoms() if e.is_Symbol]
|
||||||
|
|
||||||
|
# inputs are assumed to be those things that are
|
||||||
|
# cacheable. I.e. those things that aren't stored within the
|
||||||
|
# object except as cached. For covariance functions this is X
|
||||||
|
# and Z, for likelihoods F and for mapping functions X.
|
||||||
|
self.cacheable_vars = [] # list of everything that's cacheable
|
||||||
|
for var in cacheable:
|
||||||
|
self.variables[var] = [e for e in vars if e.name.split('_')[0]==var.lower()]
|
||||||
|
self.cacheable_vars += self.variables[var]
|
||||||
|
for var in cacheable:
|
||||||
|
if not self.variables[var]:
|
||||||
|
raise ValueError('Variable ' + var + ' was specified as cacheable but is not in expression. Expected to find symbols of the form ' + var.lower() + '_0 to represent ' + var)
|
||||||
|
|
||||||
|
# things that aren't cacheable are assumed to be parameters.
|
||||||
|
self.variables['theta'] = sorted([e for e in vars if not e in self.cacheable_vars],key=lambda e:e.name)
|
||||||
|
|
||||||
|
# these are arguments for computing derivatives.
|
||||||
|
derivative_arguments = []
|
||||||
|
if derivatives is not None:
|
||||||
|
for derivative in derivatives:
|
||||||
|
derivative_arguments += self.variables[derivative]
|
||||||
|
|
||||||
|
# Do symbolic work to compute derivatives.
|
||||||
|
self.expressions['derivative'] = {theta.name : sym.diff(self.expressions['function'],theta) for theta in derivative_arguments}
|
||||||
|
# Add parameters to the model.
|
||||||
|
for theta in self.variables['theta']:
|
||||||
|
val = 1.0
|
||||||
|
# TODO: need to decide how to handle user passing values for the se parameter vectors.
|
||||||
|
if param is not None:
|
||||||
|
if param.has_key(theta.name):
|
||||||
|
val = param[theta.name]
|
||||||
|
# Add parameter.
|
||||||
|
setattr(self, theta.name, Param(theta.name, val, None))
|
||||||
|
self.add_parameters(getattr(self, theta.name))
|
||||||
|
|
||||||
|
self.namespace = [globals(), self.__dict__]
|
||||||
|
self._gen_code()
|
||||||
|
|
||||||
|
def eval_parameters_changed(self):
|
||||||
|
# TODO: place checks for inf/nan in here
|
||||||
|
# do all the precomputation codes.
|
||||||
|
for variable, code in sorted(self.code['params_change'].iteritems()):
|
||||||
|
setattr(self, variable, eval(code, *self.namespace))
|
||||||
|
self.eval_update_cache()
|
||||||
|
|
||||||
|
def eval_update_cache(self, X=None):
|
||||||
|
# TODO: place checks for inf/nan in here
|
||||||
|
if X is not None:
|
||||||
|
for i, theta in enumerate(self.variables['X']):
|
||||||
|
setattr(self, theta.name, X[:, i][:, None])
|
||||||
|
|
||||||
|
for variable, code in sorted(self.code['update_cache'].iteritems()):
|
||||||
|
setattr(self, variable, eval(code, *self.namespace))
|
||||||
|
|
||||||
|
def eval_update_gradients(self, partial, X):
|
||||||
|
# TODO: place checks for inf/nan in here
|
||||||
|
for theta in self.variables['theta']:
|
||||||
|
code = self.code['derivative'][theta.name]
|
||||||
|
setattr(getattr(self, theta.name),
|
||||||
|
'gradient',
|
||||||
|
(partial*eval(code, *self.namespace)).sum())
|
||||||
|
|
||||||
|
def eval_gradients_X(self, partial, X):
|
||||||
|
gradients_X = np.zeros_like(X)
|
||||||
|
self.eval_update_cache(X)
|
||||||
|
for i, theta in enumerate(self.variables['X']):
|
||||||
|
code = self.code['derivative'][theta.name]
|
||||||
|
gradients_X[:, i:i+1] = partial*eval(code, *self.namespace)
|
||||||
|
return gradients_X
|
||||||
|
|
||||||
|
def eval_f(self, X):
|
||||||
|
self.eval_update_cache(X)
|
||||||
|
return eval(self.code['function'], *self.namespace)
|
||||||
|
|
||||||
|
def code_parameters_changed(self):
|
||||||
|
# do all the precomputation codes.
|
||||||
|
lcode = ''
|
||||||
|
for variable, code in sorted(self.code['params_change'].iteritems()):
|
||||||
|
lcode += variable + ' = ' + self._print_code(code) + '\n'
|
||||||
|
return lcode
|
||||||
|
|
||||||
|
def code_update_cache(self):
|
||||||
|
lcode = 'if X is not None:\n'
|
||||||
|
for i, theta in enumerate(self.variables['X']):
|
||||||
|
lcode+= "\t" + self._print_code(theta.name) + ' = X[:, ' + str(i) + "][:, None]\n"
|
||||||
|
|
||||||
|
for variable, code in sorted(self.code['update_cache'].iteritems()):
|
||||||
|
lcode+= self._print_code(variable) + ' = ' + self._print_code(code) + "\n"
|
||||||
|
|
||||||
|
return lcode
|
||||||
|
|
||||||
|
def code_update_gradients(self):
|
||||||
|
lcode = ''
|
||||||
|
for theta in self.variables['theta']:
|
||||||
|
code = self.code['derivative'][theta.name]
|
||||||
|
lcode += self._print_code(theta.name) + '.gradient = (partial*(' + self._print_code(code) + ')).sum()\n'
|
||||||
|
return lcode
|
||||||
|
|
||||||
|
def code_gradients_X(self):
|
||||||
|
lcode = 'gradients_X = np.zeros_like(X)\n'
|
||||||
|
lcode += 'self.update_cache(X)\n'
|
||||||
|
for i, theta in enumerate(self.variables['X']):
|
||||||
|
code = self.code['derivative'][theta.name]
|
||||||
|
lcode += 'gradients_X[:, ' + str(i) + ':' + str(i) + '+1] = partial*' + self._print_code(code) + '\n'
|
||||||
|
lcode += 'return gradients_X\n'
|
||||||
|
return lcode
|
||||||
|
|
||||||
|
def code_f(self):
|
||||||
|
lcode = 'self.update_cache(X)\n'
|
||||||
|
lcode += 'return ' + self._print_code(self.code['function'])
|
||||||
|
return lcode
|
||||||
|
|
||||||
|
def stabilise(self):
|
||||||
|
"""Stabilize the code in the model."""
|
||||||
|
# this code is applied to all expressions in the model in an attempt to sabilize them.
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _gen_namespace(self, modules=None, use_imps=True):
|
||||||
|
"""Gets the relevant namespaces for the given expressions."""
|
||||||
|
from sympy.core.symbol import Symbol
|
||||||
|
|
||||||
|
# If the user hasn't specified any modules, use what is available.
|
||||||
|
module_provided = True
|
||||||
|
if modules is None:
|
||||||
|
module_provided = False
|
||||||
|
# Use either numpy (if available) or python.math where possible.
|
||||||
|
# XXX: This leads to different behaviour on different systems and
|
||||||
|
# might be the reason for irreproducible errors.
|
||||||
|
modules = ["math", "mpmath", "sympy"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
_import("numpy")
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
modules.insert(1, "numpy")
|
||||||
|
|
||||||
|
|
||||||
|
# Get the needed namespaces.
|
||||||
|
namespaces = []
|
||||||
|
# First find any function implementations
|
||||||
|
if use_imps:
|
||||||
|
for expr in self._expression_list:
|
||||||
|
namespaces.append(_imp_namespace(expr))
|
||||||
|
# Check for dict before iterating
|
||||||
|
if isinstance(modules, (dict, str)) or not hasattr(modules, '__iter__'):
|
||||||
|
namespaces.append(modules)
|
||||||
|
else:
|
||||||
|
namespaces += list(modules)
|
||||||
|
# fill namespace with first having highest priority
|
||||||
|
namespace = {}
|
||||||
|
for m in namespaces[::-1]:
|
||||||
|
buf = _get_namespace(m)
|
||||||
|
namespace.update(buf)
|
||||||
|
for expr in self._expression_list:
|
||||||
|
if hasattr(expr, "atoms"):
|
||||||
|
#Try if you can extract symbols from the expression.
|
||||||
|
#Move on if expr.atoms in not implemented.
|
||||||
|
syms = expr.atoms(Symbol)
|
||||||
|
for term in syms:
|
||||||
|
namespace.update({str(term): term})
|
||||||
|
|
||||||
|
|
||||||
|
return namespace
|
||||||
|
def update_expression_list(self):
|
||||||
|
"""Extract a list of expressions from the dictionary of expressions."""
|
||||||
|
self.expression_list = [] # code arrives in dictionary, but is passed in this list
|
||||||
|
self.expression_keys = [] # Keep track of the dictionary keys.
|
||||||
|
self.expression_order = [] # This may be unecessary. It's to give ordering for cse
|
||||||
|
for key in self.expressions.keys():
|
||||||
|
if key == 'function':
|
||||||
|
self.expression_list.append(self.expressions[key])
|
||||||
|
self.expression_keys.append([key])
|
||||||
|
self.expression_order.append(1)
|
||||||
|
self.code[key] = ''
|
||||||
|
elif key[-10:] == 'derivative':
|
||||||
|
self.code[key] = {}
|
||||||
|
for dkey in self.expressions[key].keys():
|
||||||
|
self.expression_list.append(self.expressions[key][dkey])
|
||||||
|
self.expression_keys.append([key, dkey])
|
||||||
|
if key[:-10] == 'first' or key[:-10] == '':
|
||||||
|
self.expression_order.append(3) #sym.count_ops(self.expressions[key][dkey]))
|
||||||
|
elif key[:-10] == 'second':
|
||||||
|
self.expression_order.append(4) #sym.count_ops(self.expressions[key][dkey]))
|
||||||
|
elif key[:-10] == 'third':
|
||||||
|
self.expression_order.append(5) #sym.count_ops(self.expressions[key][dkey]))
|
||||||
|
self.code[key][dkey] = ''
|
||||||
|
else:
|
||||||
|
self.expression_list.append(self.expressions[key])
|
||||||
|
self.expression_keys.append([key])
|
||||||
|
self.expression_order.append(2)
|
||||||
|
self.code[key] = ''
|
||||||
|
|
||||||
|
# This step may be unecessary.
|
||||||
|
# Not 100% sure if the sub expression elimination is order sensitive. This step orders the list with the 'function' code first and derivatives after.
|
||||||
|
self.expression_order, self.expression_list, self.expression_keys = zip(*sorted(zip(self.expression_order, self.expression_list, self.expression_keys)))
|
||||||
|
|
||||||
|
|
||||||
|
def _gen_code(self, cache_prefix = 'cache', sub_prefix = 'sub', prefix='XoXoXoX'):
|
||||||
|
"""Generate code for the list of expressions provided using the common sub-expression eliminator to separate out portions that are computed multiple times."""
|
||||||
|
# This is the dictionary that stores all the generated code.
|
||||||
|
self.code = {}
|
||||||
|
|
||||||
|
# Convert the expressions to a list for common sub expression elimination
|
||||||
|
# We should find the following type of expressions: 'function', 'derivative', 'second_derivative', 'third_derivative'.
|
||||||
|
self.update_expression_list()
|
||||||
|
|
||||||
|
# Apply any global stabilisation operations to expressions.
|
||||||
|
self.stabilise()
|
||||||
|
|
||||||
|
# Helper functions to get data in and out of dictionaries.
|
||||||
|
# this code from http://stackoverflow.com/questions/14692690/access-python-nested-dictionary-items-via-a-list-of-keys
|
||||||
|
def getFromDict(dataDict, mapList):
|
||||||
|
return reduce(lambda d, k: d[k], mapList, dataDict)
|
||||||
|
def setInDict(dataDict, mapList, value):
|
||||||
|
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
|
||||||
|
|
||||||
|
|
||||||
|
# Do the common sub expression elimination
|
||||||
|
subexpressions, expression_substituted_list = sym.cse(self.expression_list, numbered_symbols(prefix=prefix))
|
||||||
|
cacheable_list = []
|
||||||
|
|
||||||
|
# Sort out any expression that's dependent on something that scales with data size (these are listed in cacheable).
|
||||||
|
self.expressions['params_change'] = []
|
||||||
|
self.expressions['update_cache'] = []
|
||||||
|
cache_expressions_list = []
|
||||||
|
sub_expression_list = []
|
||||||
|
for expr in subexpressions:
|
||||||
|
arg_list = [e for e in expr[1].atoms() if e.is_Symbol]
|
||||||
|
cacheable_symbols = [e for e in arg_list if e in cacheable_list or e in self.cacheable_vars]
|
||||||
|
if cacheable_symbols:
|
||||||
|
self.expressions['update_cache'].append((expr[0].name, self._expr2code(arg_list, expr[1])))
|
||||||
|
# list which ensures dependencies are cacheable.
|
||||||
|
cacheable_list.append(expr[0])
|
||||||
|
cache_expressions_list.append(expr[0].name)
|
||||||
|
else:
|
||||||
|
self.expressions['params_change'].append((expr[0].name, self._expr2code(arg_list, expr[1])))
|
||||||
|
sub_expression_list.append(expr[0].name)
|
||||||
|
|
||||||
|
# Replace original code with code including subexpressions.
|
||||||
|
for expr, keys in zip(expression_substituted_list, self.expression_keys):
|
||||||
|
arg_list = [e for e in expr.atoms() if e.is_Symbol]
|
||||||
|
setInDict(self.code, keys, self._expr2code(arg_list, expr))
|
||||||
|
setInDict(self.expressions, keys, expr)
|
||||||
|
|
||||||
|
# Create variable names for cache and sub expression portions
|
||||||
|
cache_dict = {}
|
||||||
|
self.variables[cache_prefix] = []
|
||||||
|
for i, sub in enumerate(cache_expressions_list):
|
||||||
|
name = cache_prefix + str(i)
|
||||||
|
cache_dict[sub] = name
|
||||||
|
self.variables[cache_prefix].append(sym.var(name))
|
||||||
|
|
||||||
|
sub_dict = {}
|
||||||
|
self.variables[sub_prefix] = []
|
||||||
|
for i, sub in enumerate(sub_expression_list):
|
||||||
|
name = sub_prefix + str(i)
|
||||||
|
sub_dict[sub] = name
|
||||||
|
self.variables[sub_prefix].append(sym.var(name))
|
||||||
|
|
||||||
|
# Replace sub expressions in main code with either cacheN or subN.
|
||||||
|
for key, val in cache_dict.iteritems():
|
||||||
|
for keys in self.expression_keys:
|
||||||
|
setInDict(self.code, keys,
|
||||||
|
getFromDict(self.code,keys).replace(key, val))
|
||||||
|
|
||||||
|
for key, val in sub_dict.iteritems():
|
||||||
|
for keys in self.expression_keys:
|
||||||
|
setInDict(self.code, keys,
|
||||||
|
getFromDict(self.code,keys).replace(key, val))
|
||||||
|
|
||||||
|
# Set up precompute code as either cacheN or subN.
|
||||||
|
self.code['update_cache'] = {}
|
||||||
|
for key, val in self.expressions['update_cache']:
|
||||||
|
expr = val
|
||||||
|
for key2, val2 in cache_dict.iteritems():
|
||||||
|
expr = expr.replace(key2, val2)
|
||||||
|
for key2, val2 in sub_dict.iteritems():
|
||||||
|
expr = expr.replace(key2, val2)
|
||||||
|
self.code['update_cache'][cache_dict[key]] = expr
|
||||||
|
|
||||||
|
self.expressions['update_cache'] = dict(self.expressions['update_cache'])
|
||||||
|
self.code['params_change'] = {}
|
||||||
|
for key, val in self.expressions['params_change']:
|
||||||
|
expr = val
|
||||||
|
for key2, val2 in cache_dict.iteritems():
|
||||||
|
expr = expr.replace(key2, val2)
|
||||||
|
for key2, val2 in sub_dict.iteritems():
|
||||||
|
expr = expr.replace(key2, val2)
|
||||||
|
self.code['params_change'][sub_dict[key]] = expr
|
||||||
|
self.expressions['params_change'] = dict(self.expressions['params_change'])
|
||||||
|
|
||||||
|
def _expr2code(self, arg_list, expr):
|
||||||
|
"""Convert the given symbolic expression into code."""
|
||||||
|
code = lambdastr(arg_list, expr)
|
||||||
|
function_code = code.split(':')[1].strip()
|
||||||
|
#for arg in arg_list:
|
||||||
|
# function_code = function_code.replace(arg.name, 'self.'+arg.name)
|
||||||
|
|
||||||
|
return function_code
|
||||||
|
|
||||||
|
def _print_code(self, code):
|
||||||
|
"""Prepare code for string writing."""
|
||||||
|
for key in self.variables.keys():
|
||||||
|
for arg in self.variables[key]:
|
||||||
|
code = code.replace(arg.name, 'self.'+arg.name)
|
||||||
|
return code
|
||||||
|
|
@ -192,17 +192,22 @@ class VarDTC(object):
|
||||||
|
|
||||||
class VarDTCMissingData(object):
|
class VarDTCMissingData(object):
|
||||||
const_jitter = 1e-6
|
const_jitter = 1e-6
|
||||||
def __init__(self, limit=1):
|
def __init__(self, limit=1, inan=None):
|
||||||
from ...util.caching import Cacher
|
from ...util.caching import Cacher
|
||||||
self._Y = Cacher(self._subarray_computations, limit)
|
self._Y = Cacher(self._subarray_computations, limit)
|
||||||
|
self._inan = inan
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def set_limit(self, limit):
|
def set_limit(self, limit):
|
||||||
self._Y.limit = limit
|
self._Y.limit = limit
|
||||||
|
|
||||||
def _subarray_computations(self, Y):
|
def _subarray_computations(self, Y):
|
||||||
|
if self._inan is None:
|
||||||
inan = np.isnan(Y)
|
inan = np.isnan(Y)
|
||||||
has_none = inan.any()
|
has_none = inan.any()
|
||||||
|
else:
|
||||||
|
inan = self._inan
|
||||||
|
has_none = True
|
||||||
if has_none:
|
if has_none:
|
||||||
from ...util.subarray_and_sorting import common_subarrays
|
from ...util.subarray_and_sorting import common_subarrays
|
||||||
self._subarray_indices = []
|
self._subarray_indices = []
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@ from _src.rbf import RBF
|
||||||
from _src.linear import Linear, LinearFull
|
from _src.linear import Linear, LinearFull
|
||||||
from _src.static import Bias, White
|
from _src.static import Bias, White
|
||||||
from _src.brownian import Brownian
|
from _src.brownian import Brownian
|
||||||
from _src.symbolic import Symbolic
|
|
||||||
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
||||||
from _src.mlp import MLP
|
from _src.mlp import MLP
|
||||||
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
|
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
|
||||||
|
|
@ -12,3 +11,15 @@ from _src.coregionalize import Coregionalize
|
||||||
from _src.ssrbf import SSRBF # TODO: ZD: did you remove this?
|
from _src.ssrbf import SSRBF # TODO: ZD: did you remove this?
|
||||||
from _src.ODE_UY import ODE_UY
|
from _src.ODE_UY import ODE_UY
|
||||||
|
|
||||||
|
# TODO: put this in an init file somewhere
|
||||||
|
try:
|
||||||
|
import sympy as sym
|
||||||
|
sympy_available=True
|
||||||
|
except ImportError:
|
||||||
|
sympy_available=False
|
||||||
|
|
||||||
|
if sympy_available:
|
||||||
|
from _src.symbolic import Symbolic
|
||||||
|
#from _src.heat_eqinit import Heat_eqinit
|
||||||
|
#from _src.ode1_eq_lfm import Ode1_eq_lfm
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,17 @@
|
||||||
# Check Matthew Rocklin's blog post.
|
# Check Matthew Rocklin's blog post.
|
||||||
try:
|
try:
|
||||||
import sympy as sp
|
import sympy as sym
|
||||||
sympy_available=True
|
sympy_available=True
|
||||||
from sympy.utilities.lambdify import lambdify
|
from sympy.utilities.lambdify import lambdify
|
||||||
|
from GPy.util.symbolic import stabilise
|
||||||
except ImportError:
|
except ImportError:
|
||||||
sympy_available=False
|
sympy_available=False
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from kern import Kern
|
from kern import Kern
|
||||||
|
from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
|
||||||
|
from GPy.util.functions import normcdf, normcdfln, logistic, logisticln, differfln
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
from ...core.parameterization.transformations import Logexp
|
|
||||||
|
|
||||||
class Symbolic(Kern):
|
class Symbolic(Kern):
|
||||||
"""
|
"""
|
||||||
|
|
@ -26,28 +28,42 @@ class Symbolic(Kern):
|
||||||
- to handle multiple inputs, call them x_1, z_1, etc
|
- to handle multiple inputs, call them x_1, z_1, etc
|
||||||
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
|
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
|
||||||
"""
|
"""
|
||||||
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None):
|
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None, func_modules=[]):
|
||||||
|
|
||||||
if k is None:
|
if k is None:
|
||||||
raise ValueError, "You must provide an argument for the covariance function."
|
raise ValueError, "You must provide an argument for the covariance function."
|
||||||
super(Sympykern, self).__init__(input_dim, active_dims, name)
|
|
||||||
|
|
||||||
self._sp_k = k
|
self.func_modules = func_modules
|
||||||
|
self.func_modules += [{'gamma':gamma,
|
||||||
|
'gammaln':gammaln,
|
||||||
|
'erf':erf, 'erfc':erfc,
|
||||||
|
'erfcx':erfcx,
|
||||||
|
'polygamma':polygamma,
|
||||||
|
'differfln':differfln,
|
||||||
|
'normcdf':normcdf,
|
||||||
|
'normcdfln':normcdfln,
|
||||||
|
'logistic':logistic,
|
||||||
|
'logisticln':logisticln},
|
||||||
|
'numpy']
|
||||||
|
|
||||||
|
super(Symbolic, self).__init__(input_dim, active_dims, name)
|
||||||
|
|
||||||
|
self._sym_k = k
|
||||||
|
|
||||||
# pull the variable names out of the symbolic covariance function.
|
# pull the variable names out of the symbolic covariance function.
|
||||||
sp_vars = [e for e in k.atoms() if e.is_Symbol]
|
sym_vars = [e for e in k.atoms() if e.is_Symbol]
|
||||||
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
|
self._sym_x= sorted([e for e in sym_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
|
||||||
self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
|
self._sym_z= sorted([e for e in sym_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
|
||||||
|
|
||||||
# Check that variable names make sense.
|
# Check that variable names make sense.
|
||||||
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
|
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sym_x)])
|
||||||
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
|
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sym_z)])
|
||||||
assert len(self._sp_x)==len(self._sp_z)
|
assert len(self._sym_x)==len(self._sym_z)
|
||||||
x_dim=len(self._sp_x)
|
x_dim=len(self._sym_x)
|
||||||
|
|
||||||
self._sp_kdiag = k
|
self._sym_kdiag = k
|
||||||
for x, z in zip(self._sp_x, self._sp_z):
|
for x, z in zip(self._sym_x, self._sym_z):
|
||||||
self._sp_kdiag = self._sp_kdiag.subs(z, x)
|
self._sym_kdiag = self._sym_kdiag.subs(z, x)
|
||||||
|
|
||||||
# If it is a multi-output covariance, add an input for indexing the outputs.
|
# If it is a multi-output covariance, add an input for indexing the outputs.
|
||||||
self._real_input_dim = x_dim
|
self._real_input_dim = x_dim
|
||||||
|
|
@ -56,22 +72,22 @@ class Symbolic(Kern):
|
||||||
self.output_dim = output_dim
|
self.output_dim = output_dim
|
||||||
|
|
||||||
# extract parameter names from the covariance
|
# extract parameter names from the covariance
|
||||||
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
|
thetas = sorted([e for e in sym_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
|
||||||
|
|
||||||
# Look for parameters with index (subscripts), they are associated with different outputs.
|
# Look for parameters with index (subscripts), they are associated with different outputs.
|
||||||
if self.output_dim>1:
|
if self.output_dim>1:
|
||||||
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
|
self._sym_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
|
||||||
self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
|
self._sym_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
|
||||||
|
|
||||||
# Make sure parameter appears with both indices!
|
# Make sure parameter appears with both indices!
|
||||||
assert len(self._sp_theta_i)==len(self._sp_theta_j)
|
assert len(self._sym_theta_i)==len(self._sym_theta_j)
|
||||||
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
|
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j)])
|
||||||
|
|
||||||
# Extract names of shared parameters (those without a subscript)
|
# Extract names of shared parameters (those without a subscript)
|
||||||
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
|
self._sym_theta = [theta for theta in thetas if theta not in self._sym_theta_i and theta not in self._sym_theta_j]
|
||||||
|
|
||||||
self.num_split_params = len(self._sp_theta_i)
|
self.num_split_params = len(self._sym_theta_i)
|
||||||
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
|
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sym_theta_i]
|
||||||
# Add split parameters to the model.
|
# Add split parameters to the model.
|
||||||
for theta in self._split_theta_names:
|
for theta in self._split_theta_names:
|
||||||
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted?
|
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted?
|
||||||
|
|
@ -79,18 +95,18 @@ class Symbolic(Kern):
|
||||||
self.add_parameter(getattr(self, theta))
|
self.add_parameter(getattr(self, theta))
|
||||||
|
|
||||||
|
|
||||||
self.num_shared_params = len(self._sp_theta)
|
self.num_shared_params = len(self._sym_theta)
|
||||||
for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j):
|
for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j):
|
||||||
self._sp_kdiag = self._sp_kdiag.subs(theta_j, theta_i)
|
self._sym_kdiag = self._sym_kdiag.subs(theta_j, theta_i)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.num_split_params = 0
|
self.num_split_params = 0
|
||||||
self._split_theta_names = []
|
self._split_theta_names = []
|
||||||
self._sp_theta = thetas
|
self._sym_theta = thetas
|
||||||
self.num_shared_params = len(self._sp_theta)
|
self.num_shared_params = len(self._sym_theta)
|
||||||
|
|
||||||
# Add parameters to the model.
|
# Add parameters to the model.
|
||||||
for theta in self._sp_theta:
|
for theta in self._sym_theta:
|
||||||
val = 1.0
|
val = 1.0
|
||||||
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
|
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
|
||||||
if param is not None:
|
if param is not None:
|
||||||
|
|
@ -100,25 +116,25 @@ class Symbolic(Kern):
|
||||||
self.add_parameters(getattr(self, theta.name))
|
self.add_parameters(getattr(self, theta.name))
|
||||||
|
|
||||||
# Differentiate with respect to parameters.
|
# Differentiate with respect to parameters.
|
||||||
derivative_arguments = self._sp_x + self._sp_theta
|
derivative_arguments = self._sym_x + self._sym_theta
|
||||||
if self.output_dim > 1:
|
if self.output_dim > 1:
|
||||||
derivative_arguments += self._sp_theta_i
|
derivative_arguments += self._sym_theta_i
|
||||||
|
|
||||||
self.derivatives = {theta.name : sp.diff(self._sp_k,theta).simplify() for theta in derivative_arguments}
|
self.derivatives = {theta.name : stabilise(sym.diff(self._sym_k,theta)) for theta in derivative_arguments}
|
||||||
self.diag_derivatives = {theta.name : sp.diff(self._sp_kdiag,theta).simplify() for theta in derivative_arguments}
|
self.diag_derivatives = {theta.name : stabilise(sym.diff(self._sym_kdiag,theta)) for theta in derivative_arguments}
|
||||||
|
|
||||||
# This gives the parameters for the arg list.
|
# This gives the parameters for the arg list.
|
||||||
self.arg_list = self._sp_x + self._sp_z + self._sp_theta
|
self.arg_list = self._sym_x + self._sym_z + self._sym_theta
|
||||||
self.diag_arg_list = self._sp_x + self._sp_theta
|
self.diag_arg_list = self._sym_x + self._sym_theta
|
||||||
if self.output_dim > 1:
|
if self.output_dim > 1:
|
||||||
self.arg_list += self._sp_theta_i + self._sp_theta_j
|
self.arg_list += self._sym_theta_i + self._sym_theta_j
|
||||||
self.diag_arg_list += self._sp_theta_i
|
self.diag_arg_list += self._sym_theta_i
|
||||||
|
|
||||||
# Check if there are additional linear operators on the covariance.
|
# Check if there are additional linear operators on the covariance.
|
||||||
self._sp_operators = operators
|
self._sym_operators = operators
|
||||||
# TODO: Deal with linear operators
|
# TODO: Deal with linear operators
|
||||||
#if self._sp_operators:
|
#if self._sym_operators:
|
||||||
# for operator in self._sp_operators:
|
# for operator in self._sym_operators:
|
||||||
|
|
||||||
# psi_stats aren't yet implemented.
|
# psi_stats aren't yet implemented.
|
||||||
if False:
|
if False:
|
||||||
|
|
@ -128,17 +144,14 @@ class Symbolic(Kern):
|
||||||
self._gen_code()
|
self._gen_code()
|
||||||
|
|
||||||
def __add__(self,other):
|
def __add__(self,other):
|
||||||
return spkern(self._sp_k+other._sp_k)
|
return spkern(self._sym_k+other._sym_k)
|
||||||
|
|
||||||
def _gen_code(self):
|
def _gen_code(self):
|
||||||
#fn_theano = theano_function([self.arg_lists], [self._sp_k + self.derivatives], dims={x: 1}, dtypes={x_0: 'float64', z_0: 'float64'})
|
#fn_theano = theano_function([self.arg_lists], [self._sym_k + self.derivatives], dims={x: 1}, dtypes={x_0: 'float64', z_0: 'float64'})
|
||||||
self._K_function = lambdify(self.arg_list, self._sp_k, 'numpy')
|
self._K_function = lambdify(self.arg_list, self._sym_k, self.func_modules)
|
||||||
for key in self.derivatives.keys():
|
self._K_derivatives_code = {key: lambdify(self.arg_list, self.derivatives[key], self.func_modules) for key in self.derivatives.keys()}
|
||||||
setattr(self, '_K_diff_' + key, lambdify(self.arg_list, self.derivatives[key], 'numpy'))
|
self._Kdiag_function = lambdify(self.diag_arg_list, self._sym_kdiag, self.func_modules)
|
||||||
|
self._Kdiag_derivatives_code = {key: lambdify(self.diag_arg_list, self.diag_derivatives[key], self.func_modules) for key in self.diag_derivatives.keys()}
|
||||||
self._Kdiag_function = lambdify(self.diag_arg_list, self._sp_kdiag, 'numpy')
|
|
||||||
for key in self.derivatives.keys():
|
|
||||||
setattr(self, '_Kdiag_diff_' + key, lambdify(self.diag_arg_list, self.diag_derivatives[key], 'numpy'))
|
|
||||||
|
|
||||||
def K(self,X,X2=None):
|
def K(self,X,X2=None):
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
|
|
@ -156,9 +169,9 @@ class Symbolic(Kern):
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
gradients_X = np.zeros((X.shape[0], X.shape[1]))
|
gradients_X = np.zeros_like(X)
|
||||||
for i, x in enumerate(self._sp_x):
|
for i, x in enumerate(self._sym_x):
|
||||||
gf = getattr(self, '_K_diff_' + x.name)
|
gf = self._K_derivatives_code[x.name]
|
||||||
gradients_X[:, i] = (gf(**self._arguments)*dL_dK).sum(1)
|
gradients_X[:, i] = (gf(**self._arguments)*dL_dK).sum(1)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
gradients_X *= 2
|
gradients_X *= 2
|
||||||
|
|
@ -167,25 +180,25 @@ class Symbolic(Kern):
|
||||||
def gradients_X_diag(self, dL_dK, X):
|
def gradients_X_diag(self, dL_dK, X):
|
||||||
self._K_computations(X)
|
self._K_computations(X)
|
||||||
dX = np.zeros_like(X)
|
dX = np.zeros_like(X)
|
||||||
for i, x in enumerate(self._sp_x):
|
for i, x in enumerate(self._sym_x):
|
||||||
gf = getattr(self, '_Kdiag_diff_' + x.name)
|
gf = self._Kdiag_derivatives_code[x.name]
|
||||||
dX[:, i] = gf(**self._diag_arguments)*dL_dK
|
dX[:, i] = gf(**self._diag_arguments)*dL_dK
|
||||||
return dX
|
return dX
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
# Need to extract parameters to local variables first
|
# Need to extract parameters to local variables first
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
for theta in self._sp_theta:
|
for theta in self._sym_theta:
|
||||||
parameter = getattr(self, theta.name)
|
parameter = getattr(self, theta.name)
|
||||||
gf = getattr(self, '_K_diff_' + theta.name)
|
gf = self._K_derivatives_code[theta.name]
|
||||||
gradient = (gf(**self._arguments)*dL_dK).sum()
|
gradient = (gf(**self._arguments)*dL_dK).sum()
|
||||||
if X2 is not None:
|
if X2 is not None:
|
||||||
gradient += (gf(**self._reverse_arguments)*dL_dK).sum()
|
gradient += (gf(**self._reverse_arguments)*dL_dK).sum()
|
||||||
setattr(parameter, 'gradient', gradient)
|
setattr(parameter, 'gradient', gradient)
|
||||||
if self.output_dim>1:
|
if self.output_dim>1:
|
||||||
for theta in self._sp_theta_i:
|
for theta in self._sym_theta_i:
|
||||||
parameter = getattr(self, theta.name[:-2])
|
parameter = getattr(self, theta.name[:-2])
|
||||||
gf = getattr(self, '_K_diff_' + theta.name)
|
gf = self._K_derivatives_code[theta.name]
|
||||||
A = gf(**self._arguments)*dL_dK
|
A = gf(**self._arguments)*dL_dK
|
||||||
gradient = np.asarray([A[np.where(self._output_ind==i)].T.sum()
|
gradient = np.asarray([A[np.where(self._output_ind==i)].T.sum()
|
||||||
for i in np.arange(self.output_dim)])
|
for i in np.arange(self.output_dim)])
|
||||||
|
|
@ -200,14 +213,14 @@ class Symbolic(Kern):
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
self._K_computations(X)
|
self._K_computations(X)
|
||||||
for theta in self._sp_theta:
|
for theta in self._sym_theta:
|
||||||
parameter = getattr(self, theta.name)
|
parameter = getattr(self, theta.name)
|
||||||
gf = getattr(self, '_Kdiag_diff_' + theta.name)
|
gf = self._Kdiag_derivatives_code[theta.name]
|
||||||
setattr(parameter, 'gradient', (gf(**self._diag_arguments)*dL_dKdiag).sum())
|
setattr(parameter, 'gradient', (gf(**self._diag_arguments)*dL_dKdiag).sum())
|
||||||
if self.output_dim>1:
|
if self.output_dim>1:
|
||||||
for theta in self._sp_theta_i:
|
for theta in self._sym_theta_i:
|
||||||
parameter = getattr(self, theta.name[:-2])
|
parameter = getattr(self, theta.name[:-2])
|
||||||
gf = getattr(self, '_Kdiag_diff_' + theta.name)
|
gf = self._Kdiag_derivatives_code[theta.name]
|
||||||
a = gf(**self._diag_arguments)*dL_dKdiag
|
a = gf(**self._diag_arguments)*dL_dKdiag
|
||||||
setattr(parameter, 'gradient',
|
setattr(parameter, 'gradient',
|
||||||
np.asarray([a[np.where(self._output_ind==i)].sum()
|
np.asarray([a[np.where(self._output_ind==i)].sum()
|
||||||
|
|
@ -220,40 +233,40 @@ class Symbolic(Kern):
|
||||||
# parameter updates here.
|
# parameter updates here.
|
||||||
self._arguments = {}
|
self._arguments = {}
|
||||||
self._diag_arguments = {}
|
self._diag_arguments = {}
|
||||||
for i, x in enumerate(self._sp_x):
|
for i, x in enumerate(self._sym_x):
|
||||||
self._arguments[x.name] = X[:, i][:, None]
|
self._arguments[x.name] = X[:, i][:, None]
|
||||||
self._diag_arguments[x.name] = X[:, i][:, None]
|
self._diag_arguments[x.name] = X[:, i][:, None]
|
||||||
if self.output_dim > 1:
|
if self.output_dim > 1:
|
||||||
self._output_ind = np.asarray(X[:, -1], dtype='int')
|
self._output_ind = np.asarray(X[:, -1], dtype='int')
|
||||||
for i, theta in enumerate(self._sp_theta_i):
|
for i, theta in enumerate(self._sym_theta_i):
|
||||||
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind])[:, None]
|
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind])[:, None]
|
||||||
self._diag_arguments[theta.name] = self._arguments[theta.name]
|
self._diag_arguments[theta.name] = self._arguments[theta.name]
|
||||||
for theta in self._sp_theta:
|
for theta in self._sym_theta:
|
||||||
self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
|
self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
|
||||||
self._diag_arguments[theta.name] = self._arguments[theta.name]
|
self._diag_arguments[theta.name] = self._arguments[theta.name]
|
||||||
|
|
||||||
if X2 is not None:
|
if X2 is not None:
|
||||||
for i, z in enumerate(self._sp_z):
|
for i, z in enumerate(self._sym_z):
|
||||||
self._arguments[z.name] = X2[:, i][None, :]
|
self._arguments[z.name] = X2[:, i][None, :]
|
||||||
if self.output_dim > 1:
|
if self.output_dim > 1:
|
||||||
self._output_ind2 = np.asarray(X2[:, -1], dtype='int')
|
self._output_ind2 = np.asarray(X2[:, -1], dtype='int')
|
||||||
for i, theta in enumerate(self._sp_theta_j):
|
for i, theta in enumerate(self._sym_theta_j):
|
||||||
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind2])[None, :]
|
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind2])[None, :]
|
||||||
else:
|
else:
|
||||||
for z in self._sp_z:
|
for z in self._sym_z:
|
||||||
self._arguments[z.name] = self._arguments['x_'+z.name[2:]].T
|
self._arguments[z.name] = self._arguments['x_'+z.name[2:]].T
|
||||||
if self.output_dim > 1:
|
if self.output_dim > 1:
|
||||||
self._output_ind2 = self._output_ind
|
self._output_ind2 = self._output_ind
|
||||||
for theta in self._sp_theta_j:
|
for theta in self._sym_theta_j:
|
||||||
self._arguments[theta.name] = self._arguments[theta.name[:-2] + '_i'].T
|
self._arguments[theta.name] = self._arguments[theta.name[:-2] + '_i'].T
|
||||||
if X2 is not None:
|
if X2 is not None:
|
||||||
# These arguments are needed in gradients when X2 is not equal to X.
|
# These arguments are needed in gradients when X2 is not equal to X.
|
||||||
self._reverse_arguments = self._arguments
|
self._reverse_arguments = self._arguments
|
||||||
for x, z in zip(self._sp_x, self._sp_z):
|
for x, z in zip(self._sym_x, self._sym_z):
|
||||||
self._reverse_arguments[x.name] = self._arguments[z.name].T
|
self._reverse_arguments[x.name] = self._arguments[z.name].T
|
||||||
self._reverse_arguments[z.name] = self._arguments[x.name].T
|
self._reverse_arguments[z.name] = self._arguments[x.name].T
|
||||||
if self.output_dim > 1:
|
if self.output_dim > 1:
|
||||||
for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j):
|
for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j):
|
||||||
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
|
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
|
||||||
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
|
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
|
||||||
|
|
||||||
|
|
@ -265,7 +278,7 @@ if False:
|
||||||
def __init__(self, subkerns, operations, name='sympy_combine'):
|
def __init__(self, subkerns, operations, name='sympy_combine'):
|
||||||
super(Symcombine, self).__init__(subkerns, name)
|
super(Symcombine, self).__init__(subkerns, name)
|
||||||
for subkern, operation in zip(subkerns, operations):
|
for subkern, operation in zip(subkerns, operations):
|
||||||
self._sp_k += self._k_double_operate(subkern._sp_k, operation)
|
self._sym_k += self._k_double_operate(subkern._sym_k, operation)
|
||||||
|
|
||||||
#def _double_operate(self, k, operation):
|
#def _double_operate(self, k, operation):
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ try:
|
||||||
import sympy as sym
|
import sympy as sym
|
||||||
sympy_available=True
|
sympy_available=True
|
||||||
from sympy.utilities.lambdify import lambdify
|
from sympy.utilities.lambdify import lambdify
|
||||||
from GPy.util.symbolic import gammaln, ln_cum_gaussian, cum_gaussian
|
from GPy.util.symbolic import gammaln, logisticln
|
||||||
except ImportError:
|
except ImportError:
|
||||||
sympy_available=False
|
sympy_available=False
|
||||||
|
|
||||||
|
|
@ -33,12 +33,14 @@ if sympy_available:
|
||||||
"""
|
"""
|
||||||
def __init__(self, gp_link=None):
|
def __init__(self, gp_link=None):
|
||||||
if gp_link is None:
|
if gp_link is None:
|
||||||
gp_link = link_functions.Log()
|
gp_link = link_functions.Identity()
|
||||||
|
|
||||||
dispersion = sym.Symbol('dispersion', positive=True, real=True)
|
dispersion = sym.Symbol('dispersion', positive=True, real=True)
|
||||||
y = sym.Symbol('y', nonnegative=True, integer=True)
|
y = sym.Symbol('y', nonnegative=True, integer=True)
|
||||||
f = sym.Symbol('f', positive=True, real=True)
|
f = sym.Symbol('f', positive=True, real=True)
|
||||||
|
gp_link = link_functions.Log()
|
||||||
log_pdf=dispersion*sym.log(dispersion) - (dispersion+y)*sym.log(dispersion+f) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*sym.log(f)
|
log_pdf=dispersion*sym.log(dispersion) - (dispersion+y)*sym.log(dispersion+f) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*sym.log(f)
|
||||||
|
#log_pdf= -(dispersion+y)*logisticln(f-sym.log(dispersion)) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*(f-sym.log(dispersion))
|
||||||
super(Negative_binomial, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Negative_binomial')
|
super(Negative_binomial, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Negative_binomial')
|
||||||
|
|
||||||
# TODO: Check this.
|
# TODO: Check this.
|
||||||
|
|
|
||||||
|
|
@ -11,8 +11,8 @@ except ImportError:
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import link_functions
|
import link_functions
|
||||||
from scipy import stats, integrate
|
from scipy import stats, integrate
|
||||||
from scipy.special import gammaln, gamma, erf, polygamma
|
from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
|
||||||
from GPy.util.functions import cum_gaussian, ln_cum_gaussian
|
from GPy.util.functions import normcdf, normcdfln, logistic, logisticln
|
||||||
from likelihood import Likelihood
|
from likelihood import Likelihood
|
||||||
from ..core.parameterization import Param
|
from ..core.parameterization import Param
|
||||||
|
|
||||||
|
|
@ -33,7 +33,17 @@ if sympy_available:
|
||||||
if log_pdf is None:
|
if log_pdf is None:
|
||||||
raise ValueError, "You must provide an argument for the log pdf."
|
raise ValueError, "You must provide an argument for the log pdf."
|
||||||
|
|
||||||
self.func_modules = func_modules + [{'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma, 'cum_gaussian':cum_gaussian, 'ln_cum_gaussian':ln_cum_gaussian}, 'numpy']
|
self.func_modules = func_modules
|
||||||
|
self.func_modules += [{'gamma':gamma,
|
||||||
|
'gammaln':gammaln,
|
||||||
|
'erf':erf, 'erfc':erfc,
|
||||||
|
'erfcx':erfcx,
|
||||||
|
'polygamma':polygamma,
|
||||||
|
'normcdf':normcdf,
|
||||||
|
'normcdfln':normcdfln,
|
||||||
|
'logistic':logistic,
|
||||||
|
'logisticln':logisticln},
|
||||||
|
'numpy']
|
||||||
|
|
||||||
super(Symbolic, self).__init__(gp_link, name=name)
|
super(Symbolic, self).__init__(gp_link, name=name)
|
||||||
self.missing_data = False
|
self.missing_data = False
|
||||||
|
|
@ -58,7 +68,7 @@ if sympy_available:
|
||||||
sym_vars = [e for e in self._sym_missing_log_pdf.atoms() if e.is_Symbol]
|
sym_vars = [e for e in self._sym_missing_log_pdf.atoms() if e.is_Symbol]
|
||||||
sym_f = [e for e in sym_vars if e.name=='f']
|
sym_f = [e for e in sym_vars if e.name=='f']
|
||||||
if not sym_f:
|
if not sym_f:
|
||||||
raise ValueError('No variable f in missing log pdf.')
|
raise ValueError('No variable f in missing data log pdf.')
|
||||||
sym_y = [e for e in sym_vars if e.name=='y']
|
sym_y = [e for e in sym_vars if e.name=='y']
|
||||||
if sym_y:
|
if sym_y:
|
||||||
raise ValueError('Data is present in missing data portion of likelihood.')
|
raise ValueError('Data is present in missing data portion of likelihood.')
|
||||||
|
|
@ -74,15 +84,15 @@ if sympy_available:
|
||||||
derivative_arguments = self._sym_f + self._sym_theta
|
derivative_arguments = self._sym_f + self._sym_theta
|
||||||
|
|
||||||
# Do symbolic work to compute derivatives.
|
# Do symbolic work to compute derivatives.
|
||||||
self._log_pdf_derivatives = {theta.name : sym.diff(self._sym_log_pdf,theta).simplify() for theta in derivative_arguments}
|
self._log_pdf_derivatives = {theta.name : stabilise(sym.diff(self._sym_log_pdf,theta)) for theta in derivative_arguments}
|
||||||
self._log_pdf_second_derivatives = {theta.name : sym.diff(self._log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments}
|
self._log_pdf_second_derivatives = {theta.name : stabilise(sym.diff(self._log_pdf_derivatives['f'],theta)) for theta in derivative_arguments}
|
||||||
self._log_pdf_third_derivatives = {theta.name : sym.diff(self._log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments}
|
self._log_pdf_third_derivatives = {theta.name : stabilise(sym.diff(self._log_pdf_second_derivatives['f'],theta)) for theta in derivative_arguments}
|
||||||
|
|
||||||
if self.missing_data:
|
if self.missing_data:
|
||||||
# Do symbolic work to compute derivatives.
|
# Do symbolic work to compute derivatives.
|
||||||
self._missing_log_pdf_derivatives = {theta.name : sym.diff(self._sym_missing_log_pdf,theta).simplify() for theta in derivative_arguments}
|
self._missing_log_pdf_derivatives = {theta.name : stabilise(sym.diff(self._sym_missing_log_pdf,theta)) for theta in derivative_arguments}
|
||||||
self._missing_log_pdf_second_derivatives = {theta.name : sym.diff(self._missing_log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments}
|
self._missing_log_pdf_second_derivatives = {theta.name : stabilise(sym.diff(self._missing_log_pdf_derivatives['f'],theta)) for theta in derivative_arguments}
|
||||||
self._missing_log_pdf_third_derivatives = {theta.name : sym.diff(self._missing_log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments}
|
self._missing_log_pdf_third_derivatives = {theta.name : stabilise(sym.diff(self._missing_log_pdf_second_derivatives['f'],theta)) for theta in derivative_arguments}
|
||||||
|
|
||||||
|
|
||||||
# Add parameters to the model.
|
# Add parameters to the model.
|
||||||
|
|
@ -96,7 +106,7 @@ if sympy_available:
|
||||||
self.add_parameters(getattr(self, theta.name))
|
self.add_parameters(getattr(self, theta.name))
|
||||||
|
|
||||||
|
|
||||||
# Is there some way to check whether the pdf is log
|
# TODO: Is there an easy way to check whether the pdf is log
|
||||||
# concave? For the moment, need user to specify.
|
# concave? For the moment, need user to specify.
|
||||||
self.log_concave = log_concave
|
self.log_concave = log_concave
|
||||||
|
|
||||||
|
|
@ -106,22 +116,30 @@ if sympy_available:
|
||||||
# generate the code for the pdf and derivatives
|
# generate the code for the pdf and derivatives
|
||||||
self._gen_code()
|
self._gen_code()
|
||||||
|
|
||||||
|
def list_functions(self):
|
||||||
|
"""Return a list of all symbolic functions in the model and their names."""
|
||||||
def _gen_code(self):
|
def _gen_code(self):
|
||||||
"""Generate the code from the symbolic parts that will be used for likleihod computation."""
|
"""Generate the code from the symbolic parts that will be used for likleihod computation."""
|
||||||
# TODO: Check here whether theano is available and set up
|
# TODO: Check here whether theano is available and set up
|
||||||
# functions accordingly.
|
# functions accordingly.
|
||||||
self._log_pdf_function = lambdify(self.arg_list, self._sym_log_pdf, self.func_modules)
|
symbolic_functions = [self._sym_log_pdf]
|
||||||
|
deriv_list = [self._log_pdf_derivatives, self._log_pdf_second_derivatives, self._log_pdf_third_derivatives]
|
||||||
# compute code for derivatives (for implicit likelihood terms
|
symbolic_functions += [deriv[key] for key in sorted(deriv.keys()) for deriv in deriv_list]
|
||||||
# we need up to 3rd derivatives)
|
|
||||||
setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], self.func_modules) for key in self._log_pdf_derivatives.keys()})
|
|
||||||
setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], self.func_modules) for key in self._log_pdf_second_derivatives.keys()})
|
|
||||||
setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], self.func_modules) for key in self._log_pdf_third_derivatives.keys()})
|
|
||||||
|
|
||||||
if self.missing_data:
|
if self.missing_data:
|
||||||
setattr(self, '_missing_first_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_derivatives[key], self.func_modules) for key in self._missing_log_pdf_derivatives.keys()})
|
symbolic_functions+=[self._sym_missing_log_pdf]
|
||||||
setattr(self, '_missing_second_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_second_derivatives[key], self.func_modules) for key in self._missing_log_pdf_second_derivatives.keys()})
|
deriv_list = [self._missing_log_pdf_derivatives, self._missing_log_pdf_second_derivatives, self._missing_log_pdf_third_derivatives]
|
||||||
setattr(self, '_missing_third_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_third_derivatives[key], self.func_modules) for key in self._missing_log_pdf_third_derivatives.keys()})
|
symbolic_functions += [deriv[key] for key in sorted(deriv.keys()) for deriv in deriv_list]
|
||||||
|
# self._log_pdf_function = lambdify(self.arg_list, self._sym_log_pdf, self.func_modules)
|
||||||
|
|
||||||
|
# # compute code for derivatives
|
||||||
|
# self._derivative_code = {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], self.func_modules) for key in self._log_pdf_derivatives.keys()}
|
||||||
|
# self._second_derivative_code = {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], self.func_modules) for key in self._log_pdf_second_derivatives.keys()}
|
||||||
|
# self._third_derivative_code = {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], self.func_modules) for key in self._log_pdf_third_derivatives.keys()}
|
||||||
|
|
||||||
|
# if self.missing_data:
|
||||||
|
# self._missing_derivative_code = {key: lambdify(self.arg_list, self._missing_log_pdf_derivatives[key], self.func_modules) for key in self._missing_log_pdf_derivatives.keys()}
|
||||||
|
# self._missing_second_derivative_code = {key: lambdify(self.arg_list, self._missing_log_pdf_second_derivatives[key], self.func_modules) for key in self._missing_log_pdf_second_derivatives.keys()}
|
||||||
|
# self._missing_third_derivative_code = {key: lambdify(self.arg_list, self._missing_log_pdf_third_derivatives[key], self.func_modules) for key in self._missing_log_pdf_third_derivatives.keys()}
|
||||||
|
|
||||||
# TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta
|
# TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta
|
||||||
|
|
||||||
|
|
@ -210,9 +228,9 @@ if sympy_available:
|
||||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
self._arguments_update(inv_link_f, y)
|
self._arguments_update(inv_link_f, y)
|
||||||
if self.missing_data:
|
if self.missing_data:
|
||||||
return np.where(np.isnan(y), self._missing_first_derivative_code['f'](**self._missing_argments), self._first_derivative_code['f'](**self._argments))
|
return np.where(np.isnan(y), self._missing_derivative_code['f'](**self._missing_argments), self._derivative_code['f'](**self._argments))
|
||||||
else:
|
else:
|
||||||
return np.where(np.isnan(y), 0., self._first_derivative_code['f'](**self._arguments))
|
return np.where(np.isnan(y), 0., self._derivative_code['f'](**self._arguments))
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
|
|
@ -255,9 +273,9 @@ if sympy_available:
|
||||||
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
|
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
|
||||||
for i, theta in enumerate(self._sym_theta):
|
for i, theta in enumerate(self._sym_theta):
|
||||||
if self.missing_data:
|
if self.missing_data:
|
||||||
g[:, i:i+1] = np.where(np.isnan(y), self._missing_first_derivative_code[theta.name](**self._arguments), self._first_derivative_code[theta.name](**self._arguments))
|
g[:, i:i+1] = np.where(np.isnan(y), self._missing_derivative_code[theta.name](**self._arguments), self._derivative_code[theta.name](**self._arguments))
|
||||||
else:
|
else:
|
||||||
g[:, i:i+1] = np.where(np.isnan(y), 0., self._first_derivative_code[theta.name](**self._arguments))
|
g[:, i:i+1] = np.where(np.isnan(y), 0., self._derivative_code[theta.name](**self._arguments))
|
||||||
return g.sum(0)
|
return g.sum(0)
|
||||||
|
|
||||||
def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
|
def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,17 @@
|
||||||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
from kernel import Kernel
|
from kernel import Kernel
|
||||||
from linear import Linear
|
from linear import Linear
|
||||||
from mlp import MLP
|
from mlp import MLP
|
||||||
#from rbf import RBF
|
#from rbf import RBF
|
||||||
|
# TODO need to fix this in a config file.
|
||||||
|
try:
|
||||||
|
import sympy as sym
|
||||||
|
sympy_available=True
|
||||||
|
except ImportError:
|
||||||
|
sympy_available=False
|
||||||
|
|
||||||
|
if sympy_available:
|
||||||
|
# These are likelihoods that rely on symbolic.
|
||||||
|
from symbolic import Symbolic
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ..core.mapping import Mapping
|
from ..core.mapping import Bijective_mapping
|
||||||
from ..core.parameterization import Param
|
from ..core.parameterization import Param
|
||||||
|
|
||||||
class Linear(Mapping):
|
class Linear(Bijective_mapping):
|
||||||
"""
|
"""
|
||||||
Mapping based on a linear model.
|
Mapping based on a linear model.
|
||||||
|
|
||||||
|
|
@ -20,8 +20,8 @@ class Linear(Mapping):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, input_dim=1, output_dim=1, name='linear_map'):
|
def __init__(self, input_dim=1, output_dim=1, name='linear'):
|
||||||
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
|
Bijective_mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
|
||||||
self.W = Param('W',np.array((self.input_dim, self.output_dim)))
|
self.W = Param('W',np.array((self.input_dim, self.output_dim)))
|
||||||
self.bias = Param('bias',np.array(self.output_dim))
|
self.bias = Param('bias',np.array(self.output_dim))
|
||||||
self.add_parameters(self.W, self.bias)
|
self.add_parameters(self.W, self.bias)
|
||||||
|
|
@ -29,10 +29,15 @@ class Linear(Mapping):
|
||||||
def f(self, X):
|
def f(self, X):
|
||||||
return np.dot(X,self.W) + self.bias
|
return np.dot(X,self.W) + self.bias
|
||||||
|
|
||||||
|
def g(self, f):
|
||||||
|
V = np.linalg.solve(np.dot(self.W.T, self.W), W.T)
|
||||||
|
return np.dot(f-self.bias, V)
|
||||||
|
|
||||||
def df_dtheta(self, dL_df, X):
|
def df_dtheta(self, dL_df, X):
|
||||||
df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
|
df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
|
||||||
df_dbias = (dL_df.sum(0))
|
df_dbias = (dL_df.sum(0))
|
||||||
return np.hstack((df_dW.flatten(), df_dbias))
|
return np.hstack((df_dW.flatten(), df_dbias))
|
||||||
|
|
||||||
def dL_dX(self, dL_df, X):
|
def dL_dX(self, partial, X):
|
||||||
return (dL_df[:, None, :]*self.W[None, :, :]).sum(2)
|
"""The gradient of L with respect to the inputs to the mapping, where L is a function that is dependent on the output of the mapping, f."""
|
||||||
|
return (partial[:, None, :]*self.W[None, :, :]).sum(2)
|
||||||
|
|
|
||||||
56
GPy/mappings/symbolic.py
Normal file
56
GPy/mappings/symbolic.py
Normal file
|
|
@ -0,0 +1,56 @@
|
||||||
|
# Copyright (c) 2014 GPy Authors
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import sympy as sym
|
||||||
|
from ..core.mapping import Mapping, Bijective_mapping
|
||||||
|
from ..core.symbolic import Symbolic_core
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class Symbolic(Mapping, Symbolic_core):
|
||||||
|
"""
|
||||||
|
Symbolic mapping
|
||||||
|
|
||||||
|
Mapping where the form of the mapping is provided by a sympy expression.
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, input_dim, output_dim, f=None, name='symbolic', param=None, func_modules=[]):
|
||||||
|
|
||||||
|
|
||||||
|
if f is None:
|
||||||
|
raise ValueError, "You must provide an argument for the function."
|
||||||
|
|
||||||
|
Mapping.__init__(self, input_dim, output_dim, name=name)
|
||||||
|
Symbolic_core.__init__(self, f, ['X'], derivatives = ['X', 'theta'], param=param, func_modules=func_modules)
|
||||||
|
|
||||||
|
self._initialize_cache()
|
||||||
|
self.parameters_changed()
|
||||||
|
|
||||||
|
def _initialize_cache(self):
|
||||||
|
self.x_0 = np.random.normal(size=(3, self.input_dim))
|
||||||
|
|
||||||
|
|
||||||
|
def parameters_changed(self):
|
||||||
|
self.eval_parameters_changed()
|
||||||
|
|
||||||
|
def update_cache(self, X):
|
||||||
|
self.eval_update_cache(X)
|
||||||
|
|
||||||
|
def update_gradients(self, partial, X):
|
||||||
|
self.eval_update_gradients(partial, X)
|
||||||
|
|
||||||
|
def gradients_X(self, partial, X):
|
||||||
|
return self.eval_gradients_X(partial, X)
|
||||||
|
|
||||||
|
def f(self, X):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
return self.eval_f(X)
|
||||||
|
|
||||||
|
|
||||||
|
def df_dX(self, X):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def df_dtheta(self, X):
|
||||||
|
pass
|
||||||
|
|
@ -24,12 +24,14 @@ class Test(unittest.TestCase):
|
||||||
self.assertDictEqual(self.param_index._properties, {})
|
self.assertDictEqual(self.param_index._properties, {})
|
||||||
|
|
||||||
def test_remove(self):
|
def test_remove(self):
|
||||||
self.param_index.remove(three, np.r_[3:10])
|
removed = self.param_index.remove(three, np.r_[3:10])
|
||||||
|
self.assertListEqual(removed.tolist(), [4, 7])
|
||||||
self.assertListEqual(self.param_index[three].tolist(), [2])
|
self.assertListEqual(self.param_index[three].tolist(), [2])
|
||||||
self.param_index.remove(one, [1])
|
removed = self.param_index.remove(one, [1])
|
||||||
|
self.assertListEqual(removed.tolist(), [])
|
||||||
self.assertListEqual(self.param_index[one].tolist(), [3])
|
self.assertListEqual(self.param_index[one].tolist(), [3])
|
||||||
self.assertListEqual(self.param_index.remove('not in there', []).tolist(), [])
|
self.assertListEqual(self.param_index.remove('not in there', []).tolist(), [])
|
||||||
self.param_index.remove(one, [9])
|
removed = self.param_index.remove(one, [9])
|
||||||
self.assertListEqual(self.param_index[one].tolist(), [3])
|
self.assertListEqual(self.param_index[one].tolist(), [3])
|
||||||
self.assertListEqual(self.param_index.remove('not in there', [2,3,4]).tolist(), [])
|
self.assertListEqual(self.param_index.remove('not in there', [2,3,4]).tolist(), [])
|
||||||
|
|
||||||
|
|
@ -78,6 +80,13 @@ class Test(unittest.TestCase):
|
||||||
self.assertEqual(i, i2)
|
self.assertEqual(i, i2)
|
||||||
self.assertTrue(np.all(v == v2))
|
self.assertTrue(np.all(v == v2))
|
||||||
|
|
||||||
|
def test_indexview_remove(self):
|
||||||
|
removed = self.view.remove(two, [3])
|
||||||
|
self.assertListEqual(removed.tolist(), [3])
|
||||||
|
removed = self.view.remove(three, np.r_[:5])
|
||||||
|
self.assertListEqual(removed.tolist(), [0, 2])
|
||||||
|
|
||||||
|
|
||||||
def test_misc(self):
|
def test_misc(self):
|
||||||
for k,v in self.param_index.copy()._properties.iteritems():
|
for k,v in self.param_index.copy()._properties.iteritems():
|
||||||
self.assertListEqual(self.param_index[k].tolist(), v.tolist())
|
self.assertListEqual(self.param_index[k].tolist(), v.tolist())
|
||||||
|
|
|
||||||
|
|
@ -9,8 +9,6 @@ from GPy.core.parameterization.param import Param
|
||||||
|
|
||||||
verbose = 0
|
verbose = 0
|
||||||
|
|
||||||
np.random.seed(50)
|
|
||||||
|
|
||||||
|
|
||||||
class Kern_check_model(GPy.core.Model):
|
class Kern_check_model(GPy.core.Model):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -153,6 +153,18 @@ class ParameterizedTest(unittest.TestCase):
|
||||||
self.testmodel.randomize()
|
self.testmodel.randomize()
|
||||||
np.testing.assert_equal(variances, self.testmodel['.*var'].values())
|
np.testing.assert_equal(variances, self.testmodel['.*var'].values())
|
||||||
|
|
||||||
|
def test_fix_unfix(self):
|
||||||
|
fixed = self.testmodel.kern.lengthscale.fix()
|
||||||
|
self.assertListEqual(fixed.tolist(), [0])
|
||||||
|
unfixed = self.testmodel.kern.lengthscale.unfix()
|
||||||
|
self.testmodel.kern.lengthscale.constrain_positive()
|
||||||
|
self.assertListEqual(unfixed.tolist(), [0])
|
||||||
|
|
||||||
|
fixed = self.testmodel.kern.fix()
|
||||||
|
self.assertListEqual(fixed.tolist(), [0,1])
|
||||||
|
unfixed = self.testmodel.kern.unfix()
|
||||||
|
self.assertListEqual(unfixed.tolist(), [0,1])
|
||||||
|
|
||||||
def test_printing(self):
|
def test_printing(self):
|
||||||
print self.test1
|
print self.test1
|
||||||
print self.param
|
print self.param
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
|
|
@ -106,9 +106,30 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix
|
||||||
raise ValueError('Tried url ' + url + suffix + ' and received client error ' + str(response.code))
|
raise ValueError('Tried url ' + url + suffix + ' and received client error ' + str(response.code))
|
||||||
elif response.code > 499:
|
elif response.code > 499:
|
||||||
raise ValueError('Tried url ' + url + suffix + ' and received server error ' + str(response.code))
|
raise ValueError('Tried url ' + url + suffix + ' and received server error ' + str(response.code))
|
||||||
# if we wanted to get more sophisticated maybe we should check the response code here again even for successes.
|
|
||||||
with open(save_name, 'wb') as f:
|
with open(save_name, 'wb') as f:
|
||||||
f.write(response.read())
|
meta = response.info()
|
||||||
|
file_size = int(meta.getheaders("Content-Length")[0])
|
||||||
|
status = ""
|
||||||
|
file_size_dl = 0
|
||||||
|
block_sz = 8192
|
||||||
|
line_length=30
|
||||||
|
while True:
|
||||||
|
buff = response.read(block_sz)
|
||||||
|
if not buff:
|
||||||
|
break
|
||||||
|
file_size_dl += len(buff)
|
||||||
|
f.write(buff)
|
||||||
|
sys.stdout.write(" "*(len(status)) + "\r")
|
||||||
|
status = r"[{perc: <{ll}}] {dl:7.3f}/{full:.3f}MB".format(dl=file_size_dl/(1.*1e6),
|
||||||
|
full=file_size/(1.*1e6), ll=line_length,
|
||||||
|
perc="="*int(line_length*float(file_size_dl)/file_size))
|
||||||
|
sys.stdout.write(status)
|
||||||
|
sys.stdout.flush()
|
||||||
|
sys.stdout.write(" "*(len(status)) + "\r")
|
||||||
|
print status
|
||||||
|
# if we wanted to get more sophisticated maybe we should check the response code here again even for successes.
|
||||||
|
#with open(save_name, 'wb') as f:
|
||||||
|
# f.write(response.read())
|
||||||
|
|
||||||
#urllib.urlretrieve(url+suffix, save_name, reporthook)
|
#urllib.urlretrieve(url+suffix, save_name, reporthook)
|
||||||
|
|
||||||
|
|
@ -304,8 +325,8 @@ def football_data(season='1314', data_set='football_data'):
|
||||||
data_set_season = data_set + '_' + season
|
data_set_season = data_set + '_' + season
|
||||||
data_resources[data_set_season] = copy.deepcopy(data_resources[data_set])
|
data_resources[data_set_season] = copy.deepcopy(data_resources[data_set])
|
||||||
data_resources[data_set_season]['urls'][0]+=season + '/'
|
data_resources[data_set_season]['urls'][0]+=season + '/'
|
||||||
start_year = int(year[0:2])
|
start_year = int(season[0:2])
|
||||||
end_year = int(year[2:4])
|
end_year = int(season[2:4])
|
||||||
files = ['E0.csv', 'E1.csv', 'E2.csv', 'E3.csv']
|
files = ['E0.csv', 'E1.csv', 'E2.csv', 'E3.csv']
|
||||||
if start_year>4 and start_year < 93:
|
if start_year>4 and start_year < 93:
|
||||||
files += ['EC.csv']
|
files += ['EC.csv']
|
||||||
|
|
@ -552,6 +573,151 @@ def swiss_roll_generated(num_samples=1000, sigma=0.0):
|
||||||
c = c[so, :]
|
c = c[so, :]
|
||||||
return {'Y':Y, 't':t, 'colors':c}
|
return {'Y':Y, 't':t, 'colors':c}
|
||||||
|
|
||||||
|
def hapmap3(data_set='hapmap3'):
|
||||||
|
"""
|
||||||
|
The HapMap phase three SNP dataset - 1184 samples out of 11 populations.
|
||||||
|
|
||||||
|
SNP_matrix (A) encoding [see Paschou et all. 2007 (PCA-Correlated SNPs...)]:
|
||||||
|
Let (B1,B2) be the alphabetically sorted bases, which occur in the j-th SNP, then
|
||||||
|
|
||||||
|
/ 1, iff SNPij==(B1,B1)
|
||||||
|
Aij = | 0, iff SNPij==(B1,B2)
|
||||||
|
\ -1, iff SNPij==(B2,B2)
|
||||||
|
|
||||||
|
The SNP data and the meta information (such as iid, sex and phenotype) are
|
||||||
|
stored in the dataframe datadf, index is the Individual ID,
|
||||||
|
with following columns for metainfo:
|
||||||
|
|
||||||
|
* family_id -> Family ID
|
||||||
|
* paternal_id -> Paternal ID
|
||||||
|
* maternal_id -> Maternal ID
|
||||||
|
* sex -> Sex (1=male; 2=female; other=unknown)
|
||||||
|
* phenotype -> Phenotype (-9, or 0 for unknown)
|
||||||
|
* population -> Population string (e.g. 'ASW' - 'YRI')
|
||||||
|
* rest are SNP rs (ids)
|
||||||
|
|
||||||
|
More information is given in infodf:
|
||||||
|
|
||||||
|
* Chromosome:
|
||||||
|
- autosomal chromosemes -> 1-22
|
||||||
|
- X X chromosome -> 23
|
||||||
|
- Y Y chromosome -> 24
|
||||||
|
- XY Pseudo-autosomal region of X -> 25
|
||||||
|
- MT Mitochondrial -> 26
|
||||||
|
* Relative Positon (to Chromosome) [base pairs]
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from pandas import read_pickle, DataFrame
|
||||||
|
from sys import stdout
|
||||||
|
import bz2
|
||||||
|
except ImportError as i:
|
||||||
|
raise i, "Need pandas for hapmap dataset, make sure to install pandas (http://pandas.pydata.org/) before loading the hapmap dataset"
|
||||||
|
if not data_available(data_set):
|
||||||
|
download_data(data_set)
|
||||||
|
dirpath = os.path.join(data_path,'hapmap3')
|
||||||
|
hapmap_file_name = 'hapmap3_r2_b36_fwd.consensus.qc.poly'
|
||||||
|
preprocessed_data_paths = [os.path.join(dirpath,hapmap_file_name + file_name) for file_name in \
|
||||||
|
['.snps.pickle',
|
||||||
|
'.info.pickle',
|
||||||
|
'.nan.pickle']]
|
||||||
|
if not reduce(lambda a,b: a and b, map(os.path.exists, preprocessed_data_paths)):
|
||||||
|
if not overide_manual_authorize and not prompt_user("Preprocessing requires ~25GB "
|
||||||
|
"of memory and can take a (very) long time, continue? [Y/n]"):
|
||||||
|
print "Preprocessing required for further usage."
|
||||||
|
return
|
||||||
|
status = "Preprocessing data, please be patient..."
|
||||||
|
print status
|
||||||
|
def write_status(message, progress, status):
|
||||||
|
stdout.write(" "*len(status)); stdout.write("\r"); stdout.flush()
|
||||||
|
status = r"[{perc: <{ll}}] {message: <13s}".format(message=message, ll=20,
|
||||||
|
perc="="*int(20.*progress/100.))
|
||||||
|
stdout.write(status); stdout.flush()
|
||||||
|
return status
|
||||||
|
unpacked_files = [os.path.join(dirpath, hapmap_file_name+ending) for ending in ['.ped', '.map']]
|
||||||
|
if not reduce(lambda a,b: a and b, map(os.path.exists, unpacked_files)):
|
||||||
|
status=write_status('unpacking...', 0, '')
|
||||||
|
curr = 0
|
||||||
|
for newfilepath in unpacked_files:
|
||||||
|
if not os.path.exists(newfilepath):
|
||||||
|
filepath = newfilepath + '.bz2'
|
||||||
|
file_size = os.path.getsize(filepath)
|
||||||
|
with open(newfilepath, 'wb') as new_file, open(filepath, 'rb') as f:
|
||||||
|
decomp = bz2.BZ2Decompressor()
|
||||||
|
file_processed = 0
|
||||||
|
buffsize = 100 * 1024
|
||||||
|
for data in iter(lambda : f.read(buffsize), b''):
|
||||||
|
new_file.write(decomp.decompress(data))
|
||||||
|
file_processed += len(data)
|
||||||
|
status=write_status('unpacking...', curr+12.*file_processed/(file_size), status)
|
||||||
|
curr += 12
|
||||||
|
status=write_status('unpacking...', curr, status)
|
||||||
|
status=write_status('reading .ped...', 25, status)
|
||||||
|
# Preprocess data:
|
||||||
|
snpstrnp = np.loadtxt(unpacked_files[0], dtype=str)
|
||||||
|
status=write_status('reading .map...', 33, status)
|
||||||
|
mapnp = np.loadtxt(unpacked_files[1], dtype=str)
|
||||||
|
status=write_status('reading relationships.txt...', 42, status)
|
||||||
|
# and metainfo:
|
||||||
|
infodf = DataFrame.from_csv(os.path.join(dirpath,'./relationships_w_pops_121708.txt'), header=0, sep='\t')
|
||||||
|
infodf.set_index('IID', inplace=1)
|
||||||
|
status=write_status('filtering nan...', 45, status)
|
||||||
|
snpstr = snpstrnp[:,6:].astype('S1').reshape(snpstrnp.shape[0], -1, 2)
|
||||||
|
inan = snpstr[:,:,0] == '0'
|
||||||
|
status=write_status('filtering reference alleles...', 55, status)
|
||||||
|
ref = np.array(map(lambda x: np.unique(x)[-2:], snpstr.swapaxes(0,1)[:,:,:]))
|
||||||
|
status=write_status('encoding snps...', 70, status)
|
||||||
|
# Encode the information for each gene in {-1,0,1}:
|
||||||
|
status=write_status('encoding snps...', 73, status)
|
||||||
|
snps = (snpstr==ref[None,:,:])
|
||||||
|
status=write_status('encoding snps...', 76, status)
|
||||||
|
snps = (snps*np.array([1,-1])[None,None,:])
|
||||||
|
status=write_status('encoding snps...', 78, status)
|
||||||
|
snps = snps.sum(-1)
|
||||||
|
status=write_status('encoding snps...', 81, status)
|
||||||
|
snps = snps.astype('i8')
|
||||||
|
status=write_status('marking nan values...', 88, status)
|
||||||
|
# put in nan values (masked as -128):
|
||||||
|
snps[inan] = -128
|
||||||
|
status=write_status('setting up meta...', 94, status)
|
||||||
|
# get meta information:
|
||||||
|
metaheader = np.r_[['family_id', 'iid', 'paternal_id', 'maternal_id', 'sex', 'phenotype']]
|
||||||
|
metadf = DataFrame(columns=metaheader, data=snpstrnp[:,:6])
|
||||||
|
metadf.set_index('iid', inplace=1)
|
||||||
|
metadf = metadf.join(infodf.population)
|
||||||
|
metadf.to_pickle(preprocessed_data_paths[1])
|
||||||
|
# put everything together:
|
||||||
|
status=write_status('setting up snps...', 96, status)
|
||||||
|
snpsdf = DataFrame(index=metadf.index, data=snps, columns=mapnp[:,1])
|
||||||
|
with open(preprocessed_data_paths[0], 'wb') as f:
|
||||||
|
pickle.dump(f, snpsdf, protocoll=-1)
|
||||||
|
status=write_status('setting up snps...', 98, status)
|
||||||
|
inandf = DataFrame(index=metadf.index, data=inan, columns=mapnp[:,1])
|
||||||
|
inandf.to_pickle(preprocessed_data_paths[2])
|
||||||
|
status=write_status('done :)', 100, status)
|
||||||
|
print ''
|
||||||
|
else:
|
||||||
|
print "loading snps..."
|
||||||
|
snpsdf = read_pickle(preprocessed_data_paths[0])
|
||||||
|
print "loading metainfo..."
|
||||||
|
metadf = read_pickle(preprocessed_data_paths[1])
|
||||||
|
print "loading nan entries..."
|
||||||
|
inandf = read_pickle(preprocessed_data_paths[2])
|
||||||
|
snps = snpsdf.values
|
||||||
|
populations = metadf.population.values.astype('S3')
|
||||||
|
hapmap = dict(name=data_set,
|
||||||
|
description='The HapMap phase three SNP dataset - '
|
||||||
|
'1184 samples out of 11 populations. inan is a '
|
||||||
|
'boolean array, containing wheather or not the '
|
||||||
|
'given entry is nan (nans are masked as '
|
||||||
|
'-128 in snps).',
|
||||||
|
snpsdf=snpsdf,
|
||||||
|
metadf=metadf,
|
||||||
|
snps=snps,
|
||||||
|
inan=inandf.values,
|
||||||
|
inandf=inandf,
|
||||||
|
populations=populations)
|
||||||
|
return hapmap
|
||||||
|
|
||||||
def swiss_roll_1000():
|
def swiss_roll_1000():
|
||||||
return swiss_roll(num_samples=1000)
|
return swiss_roll(num_samples=1000)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,8 +25,8 @@ data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'],
|
||||||
'size' : 1100584},
|
'size' : 1100584},
|
||||||
'cmu_mocap_full' : {'urls' : ['http://mocap.cs.cmu.edu'],
|
'cmu_mocap_full' : {'urls' : ['http://mocap.cs.cmu.edu'],
|
||||||
'files' : [['allasfamc.zip']],
|
'files' : [['allasfamc.zip']],
|
||||||
'citation' : """Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.
|
'citation' : """Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.'
|
||||||
The database was created with funding from NSF EIA-0196217.""",
|
'The database was created with funding from NSF EIA-0196217.""",
|
||||||
'details' : """CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.""",
|
'details' : """CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.""",
|
||||||
'license' : """From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.""",
|
'license' : """From http://mocap.cs.cmu.edu. This data is free for use in research projects. You may include this data in commercially-sold products, but you may not resell this data directly, even in converted form. If you publish results obtained using this data, we would appreciate it if you would send the citation to your published paper to jkh+mocap@cs.cmu.edu, and also would add this text to your acknowledgments section: The data used in this project was obtained from mocap.cs.cmu.edu. The database was created with funding from NSF EIA-0196217.""",
|
||||||
'size' : None},
|
'size' : None},
|
||||||
|
|
@ -120,8 +120,49 @@ The database was created with funding from NSF EIA-0196217.""",
|
||||||
'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""",
|
'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""",
|
||||||
'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005',
|
'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005',
|
||||||
'license' : None,
|
'license' : None,
|
||||||
'size' : 3410}
|
'size' : 3410},
|
||||||
|
'hapmap3' : {'urls' : ['http://hapmap.ncbi.nlm.nih.gov/downloads/genotypes/latest_phaseIII_ncbi_b36/plink_format/'],
|
||||||
|
'files' : [['hapmap3_r2_b36_fwd.consensus.qc.poly.map.bz2', 'hapmap3_r2_b36_fwd.consensus.qc.poly.ped.bz2', 'relationships_w_pops_121708.txt']],
|
||||||
|
'details' : """
|
||||||
|
HapMap Project: Single Nucleotide Polymorphism sequenced in all human populations.
|
||||||
|
The HapMap phase three SNP dataset - 1184 samples out of 11 populations.
|
||||||
|
See http://www.nature.com/nature/journal/v426/n6968/abs/nature02168.html for details.
|
||||||
|
|
||||||
|
SNP_matrix (A) encoding [see Paschou et all. 2007 (PCA-Correlated SNPs...)]:
|
||||||
|
Let (B1,B2) be the alphabetically sorted bases, which occur in the j-th SNP, then
|
||||||
|
|
||||||
|
/ 1, iff SNPij==(B1,B1)
|
||||||
|
Aij = | 0, iff SNPij==(B1,B2)
|
||||||
|
\ -1, iff SNPij==(B2,B2)
|
||||||
|
|
||||||
|
The SNP data and the meta information (such as iid, sex and phenotype) are
|
||||||
|
stored in the dataframe datadf, index is the Individual ID,
|
||||||
|
with following columns for metainfo:
|
||||||
|
|
||||||
|
* family_id -> Family ID
|
||||||
|
* paternal_id -> Paternal ID
|
||||||
|
* maternal_id -> Maternal ID
|
||||||
|
* sex -> Sex (1=male; 2=female; other=unknown)
|
||||||
|
* phenotype -> Phenotype (-9, or 0 for unknown)
|
||||||
|
* population -> Population string (e.g. 'ASW' - 'YRI')
|
||||||
|
* rest are SNP rs (ids)
|
||||||
|
|
||||||
|
More information is given in infodf:
|
||||||
|
|
||||||
|
* Chromosome:
|
||||||
|
- autosomal chromosemes -> 1-22
|
||||||
|
- X X chromosome -> 23
|
||||||
|
- Y Y chromosome -> 24
|
||||||
|
- XY Pseudo-autosomal region of X -> 25
|
||||||
|
- MT Mitochondrial -> 26
|
||||||
|
* Relative Positon (to Chromosome) [base pairs]
|
||||||
|
|
||||||
|
""",
|
||||||
|
'citation': """Gibbs, Richard A., et al. "The international HapMap project." Nature 426.6968 (2003): 789-796.""",
|
||||||
|
'license' : """International HapMap Project Public Access License (http://hapmap.ncbi.nlm.nih.gov/cgi-perl/registration#licence)""",
|
||||||
|
'size' : 2*1729092237 + 62265},
|
||||||
}
|
}
|
||||||
|
|
||||||
with open('data_resources.json', 'w') as file:
|
with open('data_resources.json', 'w') as f:
|
||||||
json.dump(data_resources, file)
|
print "writing data_resources"
|
||||||
|
json.dump(data_resources, f)
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,25 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy.special import erf, erfcx
|
from scipy.special import erf, erfc, erfcx
|
||||||
import sys
|
import sys
|
||||||
epsilon = sys.float_info.epsilon
|
epsilon = sys.float_info.epsilon
|
||||||
lim_val = -np.log(epsilon)
|
lim_val = -np.log(epsilon)
|
||||||
|
|
||||||
def cum_gaussian(x):
|
def logisticln(x):
|
||||||
g=0.5*(1+erf(x/np.sqrt(2)))
|
return np.where(x<lim_val, np.where(x>-lim_val, -np.log(1+np.exp(-x)), -x), -np.log(1+epsilon))
|
||||||
|
|
||||||
|
def logistic(x):
|
||||||
|
return np.where(x<lim_val, np.where(x>-lim_val, 1/(1+np.exp(-x)), epsilon/(epsilon+1)), 1/(1+epsilon))
|
||||||
|
|
||||||
|
def normcdf(x):
|
||||||
|
g=0.5*erfc(-x/np.sqrt(2))
|
||||||
return np.where(g==0, epsilon, np.where(g==1, 1-epsilon, g))
|
return np.where(g==0, epsilon, np.where(g==1, 1-epsilon, g))
|
||||||
|
|
||||||
def ln_cum_gaussian(x):
|
def normcdfln(x):
|
||||||
return np.where(x < 0, -.5*x*x + np.log(.5) + np.log(erfcx(-np.sqrt(2)/2*x)), np.log(cum_gaussian(x)))
|
return np.where(x < 0, -.5*x*x + np.log(.5) + np.log(erfcx(-x/np.sqrt(2))), np.log(normcdf(x)))
|
||||||
|
|
||||||
def clip_exp(x):
|
def clip_exp(x):
|
||||||
if any(x>=lim_val) or any(x<=-lim_val):
|
return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), epsilon), 1/epsilon)
|
||||||
return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val))
|
|
||||||
else:
|
def differfln(x0, x1):
|
||||||
return np.exp(x)
|
# this is a, hopefully!, a numerically more stable variant of log(erf(x0)-erf(x1)) = log(erfc(x1)-erfc(x0)).
|
||||||
|
return np.where(x0>x1, -x1*x1 + np.log(erfcx(x1)-np.exp(-x0**2+x1**2)*erfcx(x0)), -x0*x0 + np.log(np.exp(-x1**2+x0**2)*erfcx(x1) - erfcx(x0)))
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,54 @@
|
||||||
|
import sys
|
||||||
|
import numpy as np
|
||||||
|
import sympy as sym
|
||||||
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma, polygamma
|
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma, polygamma
|
||||||
|
|
||||||
|
class logistic(Function):
|
||||||
|
"""The logistic function as a symbolic function."""
|
||||||
|
nargs = 1
|
||||||
|
def fdiff(self, argindex=1):
|
||||||
|
x = self.args[0]
|
||||||
|
return logistic(x)*(1-logistic(x))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def eval(cls, x):
|
||||||
|
if x.is_Number:
|
||||||
|
return 1/(1+exp(-x))
|
||||||
|
|
||||||
|
class logisticln(Function):
|
||||||
|
"""The log logistic, which can often be computed with more precision than the simply taking log(logistic(x)) when x is small or large."""
|
||||||
|
nargs = 1
|
||||||
|
|
||||||
|
def fdiff(self, argindex=1):
|
||||||
|
x = self.args[0]
|
||||||
|
return 1-logistic(x)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def eval(cls, x):
|
||||||
|
if x.is_Number:
|
||||||
|
return -np.log(1+exp(-x))
|
||||||
|
|
||||||
|
class erfc(Function):
|
||||||
|
"""The complementary error function, erfc(x) = 1-erf(x). Used as a helper function, particularly for erfcx, the scaled complementary error function. and the normal distributions cdf."""
|
||||||
|
nargs = 1
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def eval(cls, arg):
|
||||||
|
return 1-erf(arg)
|
||||||
|
|
||||||
|
class erfcx(Function):
|
||||||
|
nargs = 1
|
||||||
|
def fdiff(self, argindex=1):
|
||||||
|
x = self.args[0]
|
||||||
|
return x*erfcx(x)-2/sqrt(pi)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def eval(cls, x):
|
||||||
|
if x.is_Number:
|
||||||
|
return exp(x**2)*erfc(x)
|
||||||
|
|
||||||
class gammaln(Function):
|
class gammaln(Function):
|
||||||
|
"""The log of the gamma function, which is often needed instead of log(gamma(x)) for better accuracy for large x."""
|
||||||
nargs = 1
|
nargs = 1
|
||||||
|
|
||||||
def fdiff(self, argindex=1):
|
def fdiff(self, argindex=1):
|
||||||
|
|
@ -13,22 +61,26 @@ class gammaln(Function):
|
||||||
return log(gamma(x))
|
return log(gamma(x))
|
||||||
|
|
||||||
|
|
||||||
class ln_cum_gaussian(Function):
|
class normcdfln(Function):
|
||||||
|
"""The log of the normal cdf. Can often be computed with better accuracy than log(normcdf(x)), particulary when x is either small or large."""
|
||||||
nargs = 1
|
nargs = 1
|
||||||
|
|
||||||
def fdiff(self, argindex=1):
|
def fdiff(self, argindex=1):
|
||||||
x = self.args[0]
|
x = self.args[0]
|
||||||
return exp(-ln_cum_gaussian(x) - 0.5*x*x)/sqrt(2*pi)
|
#return -erfcx(-x/sqrt(2))/sqrt(2*pi)
|
||||||
|
#return exp(-normcdfln(x) - 0.5*x*x)/sqrt(2*pi)
|
||||||
|
return sqrt(2/pi)*1/erfcx(-x/sqrt(2))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def eval(cls, x):
|
def eval(cls, x):
|
||||||
if x.is_Number:
|
if x.is_Number:
|
||||||
return log(cum_gaussian(x))
|
return log(normcdf(x))
|
||||||
|
|
||||||
def _eval_is_real(self):
|
def _eval_is_real(self):
|
||||||
return self.args[0].is_real
|
return self.args[0].is_real
|
||||||
|
|
||||||
class cum_gaussian(Function):
|
class normcdf(Function):
|
||||||
|
"""The cumulative distribution function of the standard normal. Provided as a convenient helper function. It is computed throught -0.5*erfc(-x/sqrt(2))."""
|
||||||
nargs = 1
|
nargs = 1
|
||||||
def fdiff(self, argindex=1):
|
def fdiff(self, argindex=1):
|
||||||
x = self.args[0]
|
x = self.args[0]
|
||||||
|
|
@ -37,12 +89,30 @@ class cum_gaussian(Function):
|
||||||
@classmethod
|
@classmethod
|
||||||
def eval(cls, x):
|
def eval(cls, x):
|
||||||
if x.is_Number:
|
if x.is_Number:
|
||||||
return 0.5*(1+erf(sqrt(2)/2*x))
|
return 0.5*(erfc(-x/sqrt(2)))
|
||||||
|
|
||||||
def _eval_is_real(self):
|
def _eval_is_real(self):
|
||||||
return self.args[0].is_real
|
return self.args[0].is_real
|
||||||
|
|
||||||
class gaussian(Function):
|
class normalln(Function):
|
||||||
|
"""The log of the standard normal distribution."""
|
||||||
|
nargs = 1
|
||||||
|
def fdiff(self, argindex=1):
|
||||||
|
x = self.args[0]
|
||||||
|
return -x
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def eval(cls, x):
|
||||||
|
if x.is_Number:
|
||||||
|
return 0.5*sqrt(2*pi) - 0.5*x*x
|
||||||
|
|
||||||
|
|
||||||
|
def _eval_is_real(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class normal(Function):
|
||||||
|
"""The standard normal distribution. Provided as a convenience function."""
|
||||||
nargs = 1
|
nargs = 1
|
||||||
@classmethod
|
@classmethod
|
||||||
def eval(cls, x):
|
def eval(cls, x):
|
||||||
|
|
@ -51,23 +121,23 @@ class gaussian(Function):
|
||||||
def _eval_is_real(self):
|
def _eval_is_real(self):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
class ln_diff_erf(Function):
|
class differfln(Function):
|
||||||
nargs = 2
|
nargs = 2
|
||||||
|
|
||||||
def fdiff(self, argindex=2):
|
def fdiff(self, argindex=2):
|
||||||
if argindex == 2:
|
if argindex == 2:
|
||||||
x0, x1 = self.args
|
x0, x1 = self.args
|
||||||
return -2*exp(-x1**2)/(sqrt(pi)*(erf(x0)-erf(x1)))
|
return -2/(sqrt(pi)*(erfcx(x1)-exp(x1**2-x0**2)*erfcx(x0)))
|
||||||
elif argindex == 1:
|
elif argindex == 1:
|
||||||
x0, x1 = self.args
|
x0, x1 = self.args
|
||||||
return 2.*exp(-x0**2)/(sqrt(pi)*(erf(x0)-erf(x1)))
|
return 2/(sqrt(pi)*(exp(x0**2-x1**2)*erfcx(x1)-erfcx(x0)))
|
||||||
else:
|
else:
|
||||||
raise ArgumentIndexError(self, argindex)
|
raise ArgumentIndexError(self, argindex)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def eval(cls, x0, x1):
|
def eval(cls, x0, x1):
|
||||||
if x0.is_Number and x1.is_Number:
|
if x0.is_Number and x1.is_Number:
|
||||||
return log(erf(x0)-erf(x1))
|
return log(erfc(x1)-erfc(x0))
|
||||||
|
|
||||||
class dh_dd_i(Function):
|
class dh_dd_i(Function):
|
||||||
nargs = 5
|
nargs = 5
|
||||||
|
|
@ -232,7 +302,6 @@ class h(Function):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def eval(cls, t, tprime, d_i, d_j, l):
|
def eval(cls, t, tprime, d_i, d_j, l):
|
||||||
# putting in the is_Number stuff forces it to look for a fdiff method for derivative. If it's left out, then when asking for self.diff, it just does the diff on the eval symbolic terms directly. We want to avoid that because we are looking to ensure everything is numerically stable. Maybe it's because of the if statement that this happens?
|
|
||||||
if (t.is_Number
|
if (t.is_Number
|
||||||
and tprime.is_Number
|
and tprime.is_Number
|
||||||
and d_i.is_Number
|
and d_i.is_Number
|
||||||
|
|
@ -273,17 +342,5 @@ class h(Function):
|
||||||
# *(erf(tprime/l - d_j/2.*l)
|
# *(erf(tprime/l - d_j/2.*l)
|
||||||
# + erf(d_j/2.*l))))
|
# + erf(d_j/2.*l))))
|
||||||
|
|
||||||
class erfc(Function):
|
|
||||||
nargs = 1
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, arg):
|
|
||||||
return 1-erf(arg)
|
|
||||||
|
|
||||||
class erfcx(Function):
|
|
||||||
nargs = 1
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def eval(cls, arg):
|
|
||||||
return erfc(arg)*exp(arg*arg)
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue