[GPU] GPU version of varDTC is ready

This commit is contained in:
Zhenwen Dai 2014-04-07 10:25:16 +01:00
commit bbcba2553c
59 changed files with 2012 additions and 1186 deletions

View file

@ -4,6 +4,7 @@
from model import * from model import *
from parameterization.parameterized import adjust_name_for_printing, Parameterizable from parameterization.parameterized import adjust_name_for_printing, Parameterizable
from parameterization.param import Param, ParamConcatenation from parameterization.param import Param, ParamConcatenation
from parameterization.observable_array import ObsAr
from gp import GP from gp import GP
from sparse_gp import SparseGP from sparse_gp import SparseGP

View file

@ -208,27 +208,9 @@ class GP(Model):
from ..plotting.matplot_dep import models_plots from ..plotting.matplot_dep import models_plots
return models_plots.plot_fit(self,*args,**kwargs) return models_plots.plot_fit(self,*args,**kwargs)
def _getstate(self): def input_sensitivity(self):
""" """
Returns the sensitivity for each dimension of this model
Get the current state of the class, here we return everything that is
needed to recompute the model.
""" """
return self.kern.input_sensitivity()
return Model._getstate(self) + [self.X,
self.num_data,
self.input_dim,
self.kern,
self.likelihood,
self.output_dim,
]
def _setstate(self, state):
self.output_dim = state.pop()
self.likelihood = state.pop()
self.kern = state.pop()
self.input_dim = state.pop()
self.num_data = state.pop()
self.X = state.pop()
Model._setstate(self, state)

View file

@ -34,7 +34,7 @@ class Mapping(Parameterized):
raise NotImplementedError raise NotImplementedError
def df_dtheta(self, dL_df, X): def df_dtheta(self, dL_df, X):
"""The gradient of the outputs of the multi-layer perceptron with respect to each of the parameters. """The gradient of the outputs of the mapping with respect to each of the parameters.
:param dL_df: gradient of the objective with respect to the function. :param dL_df: gradient of the objective with respect to the function.
:type dL_df: ndarray (num_data x output_dim) :type dL_df: ndarray (num_data x output_dim)
@ -50,7 +50,7 @@ class Mapping(Parameterized):
""" """
Plots the mapping associated with the model. Plots the mapping associated with the model.
- In one dimension, the function is plotted. - In one dimension, the function is plotted.
- In two dimsensions, a contour-plot shows the function - In two dimensions, a contour-plot shows the function
- In higher dimensions, we've not implemented this yet !TODO! - In higher dimensions, we've not implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions Can plot only part of the data and part of the posterior functions

View file

@ -24,37 +24,9 @@ class Model(Parameterized):
def log_likelihood(self): def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the model class"
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return self.gradient return self.gradient
def _getstate(self):
"""
Get the current state of the class.
Inherited from Parameterized, so add those parameters to the state
:return: list of states from the model.
"""
return Parameterized._getstate(self) + \
[self.priors, self.optimization_runs,
self.sampling_runs, self.preferred_optimizer]
def _setstate(self, state):
"""
set state from previous call to _getstate
call Parameterized with the rest of the state
:param state: the state of the model.
:type state: list as returned from _getstate.
"""
self.preferred_optimizer = state.pop()
self.sampling_runs = state.pop()
self.optimization_runs = state.pop()
self.priors = state.pop()
Parameterized._setstate(self, state)
def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
""" """
Perform random restarts of the model, and set the model to the best Perform random restarts of the model, and set the model to the best
@ -142,13 +114,60 @@ class Model(Parameterized):
""" """
raise DeprecationWarning, 'parameters now have default constraints' raise DeprecationWarning, 'parameters now have default constraints'
def input_sensitivity(self): def objective_function(self):
""" """
Returns the sensitivity for each dimension of this kernel. The objective function for the given algorithm.
"""
return self.kern.input_sensitivity()
def objective_function(self, x): This function is the true objective, which wants to be minimized.
Note that all parameters are already set and in place, so you just need
to return the objective function here.
For probabilistic models this is the negative log_likelihood
(including the MAP prior), so we return it here. If your model is not
probabilistic, just return your objective here!
"""
return -float(self.log_likelihood()) - self.log_prior()
def objective_function_gradients(self):
"""
The gradients for the objective function for the given algorithm.
You can find the gradient for the parameters in self.gradient at all times.
This is the place, where gradients get stored for parameters.
This function is the true objective, which wants to be minimized.
Note that all parameters are already set and in place, so you just need
to return the gradient here.
For probabilistic models this is the gradient of the negative log_likelihood
(including the MAP prior), so we return it here. If your model is not
probabilistic, just return your gradient here!
"""
return -(self._log_likelihood_gradients() + self._log_prior_gradients())
def _grads(self, x):
"""
Gets the gradients from the likelihood and the priors.
Failures are handled robustly. The algorithm will try several times to
return the gradients, and will raise the original exception if
the objective cannot be computed.
:param x: the parameters of the model.
:type x: np.array
"""
try:
self._set_params_transformed(x)
obj_grads = self._transform_gradients(self.objective_function_gradients())
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError):
if self._fail_count >= self._allowed_failures:
raise
self._fail_count += 1
obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100)
return obj_grads
def _objective(self, x):
""" """
The objective function passed to the optimizer. It combines The objective function passed to the optimizer. It combines
the likelihood and the priors. the likelihood and the priors.
@ -162,55 +181,26 @@ class Model(Parameterized):
""" """
try: try:
self._set_params_transformed(x) self._set_params_transformed(x)
obj = self.objective_function()
self._fail_count = 0 self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError) as e: except (LinAlgError, ZeroDivisionError, ValueError):
if self._fail_count >= self._allowed_failures: if self._fail_count >= self._allowed_failures:
raise e raise
self._fail_count += 1 self._fail_count += 1
return np.inf return np.inf
return -float(self.log_likelihood()) - self.log_prior() return obj
def objective_function_gradients(self, x): def _objective_grads(self, x):
"""
Gets the gradients from the likelihood and the priors.
Failures are handled robustly. The algorithm will try several times to
return the gradients, and will raise the original exception if
the objective cannot be computed.
:param x: the parameters of the model.
:type x: np.array
"""
try: try:
self._set_params_transformed(x) self._set_params_transformed(x)
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
self._fail_count = 0 self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError) as e: except (LinAlgError, ZeroDivisionError, ValueError):
if self._fail_count >= self._allowed_failures: if self._fail_count >= self._allowed_failures:
raise e raise
self._fail_count += 1
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
return obj_grads
def objective_and_gradients(self, x):
"""
Compute the objective function of the model and the gradient of the model at the point given by x.
:param x: the point at which gradients are to be computed.
:type x: np.array
"""
try:
self._set_params_transformed(x)
obj_f = -float(self.log_likelihood()) - self.log_prior()
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError) as e:
if self._fail_count >= self._allowed_failures:
raise e
self._fail_count += 1 self._fail_count += 1
obj_f = np.inf obj_f = np.inf
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100) obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100)
return obj_f, obj_grads return obj_f, obj_grads
def optimize(self, optimizer=None, start=None, **kwargs): def optimize(self, optimizer=None, start=None, **kwargs):
@ -241,7 +231,7 @@ class Model(Parameterized):
optimizer = optimization.get_optimizer(optimizer) optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model=self, **kwargs) opt = optimizer(start, model=self, **kwargs)
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients) opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)
self.optimization_runs.append(opt) self.optimization_runs.append(opt)
@ -289,19 +279,22 @@ class Model(Parameterized):
# just check the global ratio # just check the global ratio
dx = np.zeros(x.shape) dx = np.zeros(x.shape)
dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size)) dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.)
# evaulate around the point x # evaulate around the point x
f1 = self.objective_function(x + dx) f1 = self._objective(x + dx)
f2 = self.objective_function(x - dx) f2 = self._objective(x - dx)
gradient = self.objective_function_gradients(x) gradient = self._grads(x)
dx = dx[transformed_index] dx = dx[transformed_index]
gradient = gradient[transformed_index] gradient = gradient[transformed_index]
denominator = (2 * np.dot(dx, gradient)) denominator = (2 * np.dot(dx, gradient))
global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator) global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator)
return np.abs(1. - global_ratio) < tolerance or np.abs(f1-f2).sum() + np.abs((2 * np.dot(dx, gradient))).sum() < tolerance global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
if global_ratio is np.nan:
global_ratio = 0
return np.abs(1. - global_ratio) < tolerance or global_diff
else: else:
# check the gradient of each parameter individually, and do some pretty printing # check the gradient of each parameter individually, and do some pretty printing
try: try:
@ -337,15 +330,15 @@ class Model(Parameterized):
print "No free parameters to check" print "No free parameters to check"
return return
gradient = self.objective_function_gradients(x).copy() gradient = self._grads(x).copy()
np.where(gradient == 0, 1e-312, gradient) np.where(gradient == 0, 1e-312, gradient)
ret = True ret = True
for nind, xind in itertools.izip(param_index, transformed_index): for nind, xind in itertools.izip(param_index, transformed_index):
xx = x.copy() xx = x.copy()
xx[xind] += step xx[xind] += step
f1 = self.objective_function(xx) f1 = self._objective(xx)
xx[xind] -= 2.*step xx[xind] -= 2.*step
f2 = self.objective_function(xx) f2 = self._objective(xx)
numerical_gradient = (f1 - f2) / (2 * step) numerical_gradient = (f1 - f2) / (2 * step)
if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind] if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind]
else: ratio = (f1 - f2) / (2 * step * gradient[xind]) else: ratio = (f1 - f2) / (2 * step * gradient[xind])

View file

@ -24,12 +24,6 @@ class ParameterIndexOperations(object):
for t, i in constraints.iteritems(): for t, i in constraints.iteritems():
self.add(t, i) self.add(t, i)
def __getstate__(self):
return self._properties
def __setstate__(self, state):
self._properties = state
def iteritems(self): def iteritems(self):
return self._properties.iteritems() return self._properties.iteritems()
@ -92,13 +86,18 @@ class ParameterIndexOperations(object):
for i, v in parameter_index_view.iteritems(): for i, v in parameter_index_view.iteritems():
self.add(i, v+offset) self.add(i, v+offset)
def copy(self): def copy(self):
return self.__deepcopy__(None)
def __deepcopy__(self, memo):
return ParameterIndexOperations(dict(self.iteritems())) return ParameterIndexOperations(dict(self.iteritems()))
def __getitem__(self, prop): def __getitem__(self, prop):
return self._properties[prop] return self._properties[prop]
def __delitem__(self, prop):
del self._properties[prop]
def __str__(self, *args, **kwargs): def __str__(self, *args, **kwargs):
import pprint import pprint
return pprint.pformat(dict(self._properties)) return pprint.pformat(dict(self._properties))
@ -183,7 +182,7 @@ class ParameterIndexOperationsView(object):
def remove(self, prop, indices): def remove(self, prop, indices):
removed = self._param_index_ops.remove(prop, indices+self._offset) removed = self._param_index_ops.remove(prop, numpy.array(indices)+self._offset)
if removed.size > 0: if removed.size > 0:
return removed - self._size + 1 return removed - self._size + 1
return removed return removed
@ -193,6 +192,9 @@ class ParameterIndexOperationsView(object):
ind = self._filter_index(self._param_index_ops[prop]) ind = self._filter_index(self._param_index_ops[prop])
return ind return ind
def __delitem__(self, prop):
self.remove(prop, self[prop])
def __str__(self, *args, **kwargs): def __str__(self, *args, **kwargs):
import pprint import pprint
return pprint.pformat(dict(self.iteritems())) return pprint.pformat(dict(self.iteritems()))
@ -203,6 +205,9 @@ class ParameterIndexOperationsView(object):
def copy(self): def copy(self):
return self.__deepcopy__(None)
def __deepcopy__(self, memo):
return ParameterIndexOperations(dict(self.iteritems())) return ParameterIndexOperations(dict(self.iteritems()))
pass pass

View file

@ -5,6 +5,7 @@ Created on 27 Feb 2014
''' '''
from collections import defaultdict from collections import defaultdict
import weakref
def intarray_default_factory(): def intarray_default_factory():
import numpy as np import numpy as np
@ -28,4 +29,90 @@ class ArrayList(list):
return True return True
return False return False
def index(self, item):
index = 0
for el in self:
if el is item:
return index
index += 1
raise ValueError, "{} is not in list".format(item)
pass
class ObservablesList(object):
def __init__(self):
self._poc = []
def __getitem__(self, ind):
p,o,c = self._poc[ind]
return p, o(), c
def remove(self, priority, observable, callble):
"""
"""
self.flush()
for i in range(len(self) - 1, -1, -1):
p,o,c = self[i]
if priority==p and observable==o and callble==c:
del self._poc[i]
def __repr__(self):
return self._poc.__repr__()
def add(self, priority, observable, callble):
ins = 0
for pr, _, _ in self:
if priority > pr:
break
ins += 1
self._poc.insert(ins, (priority, weakref.ref(observable), callble))
def __str__(self):
ret = []
curr_p = None
for p, o, c in self:
curr = ''
if curr_p != p:
pre = "{!s}: ".format(p)
curr_pre = pre
else: curr_pre = " "*len(pre)
curr_p = p
curr += curr_pre
ret.append(curr + ", ".join(map(repr, [o,c])))
return '\n'.join(ret)
def flush(self):
self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None]
def __iter__(self):
self.flush()
for p, o, c in self._poc:
if o() is not None:
yield p, o(), c
def __len__(self):
self.flush()
return self._poc.__len__()
def __deepcopy__(self, memo):
self.flush()
s = ObservablesList()
import copy
s._poc = copy.deepcopy(self._poc, memo)
return s
def __getstate__(self):
self.flush()
from ...util.caching import Cacher
obs = []
for p, o, c in self:
if (getattr(o, c.__name__, None) is not None
and not isinstance(o, Cacher)):
obs.append((p,o,c.__name__))
return obs
def __setstate__(self, state):
self._poc = []
for p, o, c in state:
self.add(p,o,getattr(o, c))
pass pass

View file

@ -1,12 +1,12 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
__updated__ = '2014-03-21' __updated__ = '2014-03-31'
import numpy as np import numpy as np
from parameter_core import Observable from parameter_core import Observable, Pickleable
class ObsAr(np.ndarray, Observable): class ObsAr(np.ndarray, Pickleable, Observable):
""" """
An ndarray which reports changes to its observers. An ndarray which reports changes to its observers.
The observers can add themselves with a callable, which The observers can add themselves with a callable, which
@ -25,37 +25,34 @@ class ObsAr(np.ndarray, Observable):
def __array_finalize__(self, obj): def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments # see InfoArray.__array_finalize__ for comments
if obj is None: return if obj is None: return
self._observer_callables_ = getattr(obj, '_observer_callables_', None) self.observers = getattr(obj, 'observers', None)
def __array_wrap__(self, out_arr, context=None): def __array_wrap__(self, out_arr, context=None):
return out_arr.view(np.ndarray) return out_arr.view(np.ndarray)
def copy(self):
memo = {}
memo[id(self)] = self
return self.__deepcopy__(memo)
def __deepcopy__(self, memo):
s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy())
memo[id(self)] = s
import copy
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
return s
def __reduce__(self): def __reduce__(self):
func, args, state = np.ndarray.__reduce__(self) func, args, state = super(ObsAr, self).__reduce__()
return func, args, (state, Observable._getstate(self)) return func, args, (state, Pickleable.__getstate__(self))
def __setstate__(self, state): def __setstate__(self, state):
np.ndarray.__setstate__(self, state[0]) np.ndarray.__setstate__(self, state[0])
Observable._setstate(self, state[1]) Pickleable.__setstate__(self, state[1])
def _s_not_empty(self, s):
# this checks whether there is something picked by this slice.
return True
# TODO: disarmed, for performance increase,
if not isinstance(s, (list,tuple,np.ndarray)):
return True
if isinstance(s, (list,tuple)):
return len(s)!=0
if isinstance(s, np.ndarray):
if s.dtype is bool:
return np.all(s)
else:
return s.size != 0
def __setitem__(self, s, val): def __setitem__(self, s, val):
if self._s_not_empty(s): super(ObsAr, self).__setitem__(s, val)
super(ObsAr, self).__setitem__(s, val) self.notify_observers()
self.notify_observers()
def __getslice__(self, start, stop): def __getslice__(self, start, stop):
return self.__getitem__(slice(start, stop)) return self.__getitem__(slice(start, stop))
@ -63,12 +60,6 @@ class ObsAr(np.ndarray, Observable):
def __setslice__(self, start, stop, val): def __setslice__(self, start, stop, val):
return self.__setitem__(slice(start, stop), val) return self.__setitem__(slice(start, stop), val)
def __copy__(self, *args):
return ObsAr(self.view(np.ndarray).copy())
def copy(self, *args):
return self.__copy__(*args)
def __ilshift__(self, *args, **kwargs): def __ilshift__(self, *args, **kwargs):
r = np.ndarray.__ilshift__(self, *args, **kwargs) r = np.ndarray.__ilshift__(self, *args, **kwargs)
self.notify_observers() self.notify_observers()
@ -144,76 +135,3 @@ class ObsAr(np.ndarray, Observable):
r = np.ndarray.__imul__(self, *args, **kwargs) r = np.ndarray.__imul__(self, *args, **kwargs)
self.notify_observers() self.notify_observers()
return r return r
# def __rrshift__(self, *args, **kwargs):
# r = np.ndarray.__rrshift__(self, *args, **kwargs)
# self.notify_observers()
# return r
# def __ror__(self, *args, **kwargs):
# r = np.ndarray.__ror__(self, *args, **kwargs)
# self.notify_observers()
# return r
# def __rxor__(self, *args, **kwargs):
# r = np.ndarray.__rxor__(self, *args, **kwargs)
# self.notify_observers()
# return r
# def __rdivmod__(self, *args, **kwargs):
# r = np.ndarray.__rdivmod__(self, *args, **kwargs)
# self.notify_observers()
# return r
# def __radd__(self, *args, **kwargs):
# r = np.ndarray.__radd__(self, *args, **kwargs)
# self.notify_observers()
# return r
# def __rdiv__(self, *args, **kwargs):
# r = np.ndarray.__rdiv__(self, *args, **kwargs)
# self.notify_observers()
# return r
# def __rtruediv__(self, *args, **kwargs):
# r = np.ndarray.__rtruediv__(self, *args, **kwargs)
# self.notify_observers()
# return r
# def __rshift__(self, *args, **kwargs):
# r = np.ndarray.__rshift__(self, *args, **kwargs)
# self.notify_observers()
# return r
# def __rmul__(self, *args, **kwargs):
# r = np.ndarray.__rmul__(self, *args, **kwargs)
# self.notify_observers()
# return r
# def __rpow__(self, *args, **kwargs):
# r = np.ndarray.__rpow__(self, *args, **kwargs)
# self.notify_observers()
# return r
# def __rsub__(self, *args, **kwargs):
# r = np.ndarray.__rsub__(self, *args, **kwargs)
# self.notify_observers()
# return r
# def __rfloordiv__(self, *args, **kwargs):
# r = np.ndarray.__rfloordiv__(self, *args, **kwargs)
# self.notify_observers()
# return r

View file

@ -4,7 +4,7 @@
import itertools import itertools
import numpy import numpy
from parameter_core import OptimizationHandlable, adjust_name_for_printing from parameter_core import OptimizationHandlable, adjust_name_for_printing
from array_core import ObsAr from observable_array import ObsAr
###### printing ###### printing
__constraints_name__ = "Constraint" __constraints_name__ = "Constraint"
@ -43,14 +43,13 @@ class Param(OptimizationHandlable, ObsAr):
_fixes_ = None _fixes_ = None
_parameters_ = [] _parameters_ = []
def __new__(cls, name, input_array, default_constraint=None): def __new__(cls, name, input_array, default_constraint=None):
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array, name=name, default_constraint=default_constraint)) obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
cls.__name__ = "Param" cls.__name__ = "Param"
obj._current_slice_ = (slice(obj.shape[0]),) obj._current_slice_ = (slice(obj.shape[0]),)
obj._realshape_ = obj.shape obj._realshape_ = obj.shape
obj._realsize_ = obj.size obj._realsize_ = obj.size
obj._realndim_ = obj.ndim obj._realndim_ = obj.ndim
obj._original_ = True obj._original_ = True
obj._gradient_array_ = numpy.zeros(obj.shape, dtype=numpy.float64)
return obj return obj
def __init__(self, name, input_array, default_constraint=None, *a, **kw): def __init__(self, name, input_array, default_constraint=None, *a, **kw):
@ -60,7 +59,7 @@ class Param(OptimizationHandlable, ObsAr):
import pydot import pydot
node = pydot.Node(id(self), shape='record', label=self.name) node = pydot.Node(id(self), shape='record', label=self.name)
G.add_node(node) G.add_node(node)
for o in self._observer_callables_.keys(): for o in self.observers.keys():
label = o.name if hasattr(o, 'name') else str(o) label = o.name if hasattr(o, 'name') else str(o)
observed_node = pydot.Node(id(o), label=label) observed_node = pydot.Node(id(o), label=label)
G.add_node(observed_node) G.add_node(observed_node)
@ -87,74 +86,24 @@ class Param(OptimizationHandlable, ObsAr):
self.priors = getattr(obj, 'priors', None) self.priors = getattr(obj, 'priors', None)
@property @property
def _param_array_(self): def param_array(self):
return self return self
@property @property
def gradient(self): def gradient(self):
"""
Return a view on the gradient, which is in the same shape as this parameter is.
Note: this is not the real gradient array, it is just a view on it.
To work on the real gradient array use: self.full_gradient
"""
if getattr(self, '_gradient_array_', None) is None:
self._gradient_array_ = numpy.empty(self._realshape_, dtype=numpy.float64)
return self._gradient_array_[self._current_slice_] return self._gradient_array_[self._current_slice_]
@gradient.setter @gradient.setter
def gradient(self, val): def gradient(self, val):
self.gradient[:] = val self._gradient_array_[self._current_slice_] = val
#===========================================================================
# Pickling operations
#===========================================================================
def __reduce__(self):
func, args, state = super(Param, self).__reduce__()
return func, args, (state,
(self._name,
self._parent_,
self._parent_index_,
self._default_constraint_,
self._current_slice_,
self._realshape_,
self._realsize_,
self._realndim_,
self.constraints,
self.priors
)
)
def __setstate__(self, state):
super(Param, self).__setstate__(state[0])
state = list(state[1])
self.priors = state.pop()
self.constraints = state.pop()
self._realndim_ = state.pop()
self._realsize_ = state.pop()
self._realshape_ = state.pop()
self._current_slice_ = state.pop()
self._default_constraint_ = state.pop()
self._parent_index_ = state.pop()
self._parent_ = state.pop()
self._name = state.pop()
def copy(self, *args):
constr = self.constraints.copy()
priors = self.priors.copy()
p = Param(self.name, self.view(numpy.ndarray).copy(), self._default_constraint_)
p.constraints = constr
p.priors = priors
return p
#===========================================================================
# get/set parameters
#===========================================================================
# def _set_params(self, param, trigger_parent=True):
# self.flat = param
# if trigger_parent: min_priority = None
# else: min_priority = -numpy.inf
# self.notify_observers(None, min_priority)
#
# def _get_params(self):
# return self.flat
#
# def _collect_gradient(self, target):
# target += self.gradient.flat
#
# def _set_gradient(self, g):
# self.gradient = g.reshape(self._realshape_)
#=========================================================================== #===========================================================================
# Array operations -> done # Array operations -> done
@ -172,24 +121,6 @@ class Param(OptimizationHandlable, ObsAr):
def __setitem__(self, s, val): def __setitem__(self, s, val):
super(Param, self).__setitem__(s, val) super(Param, self).__setitem__(s, val)
#===========================================================================
# Index Operations:
#===========================================================================
#def _internal_offset(self):
# internal_offset = 0
# extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1]
# for i, si in enumerate(self._current_slice_[:self._realndim_]):
# if numpy.all(si == Ellipsis):
# continue
# if isinstance(si, slice):
# a = si.indices(self._realshape_[i])[0]
# elif isinstance(si, (list,numpy.ndarray,tuple)):
# a = si[0]
# else: a = si
# if a < 0:
# a = self._realshape_[i] + a
# internal_offset += a * extended_realshape[i]
# return internal_offset
def _raveled_index(self, slice_index=None): def _raveled_index(self, slice_index=None):
# return an index array on the raveled array, which is formed by the current_slice # return an index array on the raveled array, which is formed by the current_slice
@ -235,13 +166,21 @@ class Param(OptimizationHandlable, ObsAr):
def is_fixed(self): def is_fixed(self):
from transformations import __fixed__ from transformations import __fixed__
return self.constraints[__fixed__].size == self.size return self.constraints[__fixed__].size == self.size
#def round(self, decimals=0, out=None):
# view = super(Param, self).round(decimals, out).view(Param)
# view.__array_finalize__(self)
# return view
#round.__doc__ = numpy.round.__doc__
def _get_original(self, param): def _get_original(self, param):
return self return self
#===========================================================================
# Pickling and copying
#===========================================================================
def __deepcopy__(self, memo):
s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy())
memo[id(self)] = s
import copy
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
return s
#=========================================================================== #===========================================================================
# Printing -> done # Printing -> done
#=========================================================================== #===========================================================================
@ -250,7 +189,8 @@ class Param(OptimizationHandlable, ObsAr):
if self.size <= 1: if self.size <= 1:
return [str(self.view(numpy.ndarray)[0])] return [str(self.view(numpy.ndarray)[0])]
else: return [str(self.shape)] else: return [str(self.shape)]
def parameter_names(self, add_self=False, adjust_for_printing=False): def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
# this is just overwrighting the parameterized calls to parameter names, in order to maintain OOP
if adjust_for_printing: if adjust_for_printing:
return [adjust_name_for_printing(self.name)] return [adjust_name_for_printing(self.name)]
return [self.name] return [self.name]
@ -261,6 +201,9 @@ class Param(OptimizationHandlable, ObsAr):
def parameter_shapes(self): def parameter_shapes(self):
return [self.shape] return [self.shape]
@property @property
def num_params(self):
return 0
@property
def _constraints_str(self): def _constraints_str(self):
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))] return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))]
@property @property
@ -282,8 +225,8 @@ class Param(OptimizationHandlable, ObsAr):
if isinstance(slice_index, (tuple, list)): if isinstance(slice_index, (tuple, list)):
clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)] clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
for i in range(self._realndim_-len(clean_curr_slice)): for i in range(self._realndim_-len(clean_curr_slice)):
i+=len(clean_curr_slice) i+=1
clean_curr_slice += range(self._realshape_[i]) clean_curr_slice += [range(self._realshape_[i])]
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice) if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
and len(set(map(len, clean_curr_slice))) <= 1): and len(set(map(len, clean_curr_slice))) <= 1):
return numpy.fromiter(itertools.izip(*clean_curr_slice), return numpy.fromiter(itertools.izip(*clean_curr_slice),
@ -368,7 +311,7 @@ class ParamConcatenation(object):
#=========================================================================== #===========================================================================
def __getitem__(self, s): def __getitem__(self, s):
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
params = [p._param_array_[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p._param_array_[ind[ps]])] params = [p.param_array[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p.param_array[ind[ps]])]
if len(params)==1: return params[0] if len(params)==1: return params[0]
return ParamConcatenation(params) return ParamConcatenation(params)
def __setitem__(self, s, val, update=True): def __setitem__(self, s, val, update=True):
@ -381,7 +324,7 @@ class ParamConcatenation(object):
if update: if update:
self.update_all_params() self.update_all_params()
def values(self): def values(self):
return numpy.hstack([p._param_array_ for p in self.params]) return numpy.hstack([p.param_array.flat for p in self.params])
#=========================================================================== #===========================================================================
# parameter operations: # parameter operations:
#=========================================================================== #===========================================================================

View file

@ -13,10 +13,10 @@ Observable Pattern for patameterization
""" """
from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
import numpy as np import numpy as np
__updated__ = '2014-03-24' __updated__ = '2014-03-31'
class HierarchyError(Exception): class HierarchyError(Exception):
""" """
@ -31,71 +31,8 @@ def adjust_name_for_printing(name):
return name.replace(" ", "_").replace(".", "_").replace("-", "_m_").replace("+", "_p_").replace("!", "_I_").replace("**", "_xx_").replace("*", "_x_").replace("/", "_l_").replace("@", '_at_') return name.replace(" ", "_").replace(".", "_").replace("-", "_m_").replace("+", "_p_").replace("!", "_I_").replace("**", "_xx_").replace("*", "_x_").replace("/", "_l_").replace("@", '_at_')
return '' return ''
class InterfacePickleFunctions(object):
def __init__(self, *a, **kw):
super(InterfacePickleFunctions, self).__init__()
def _getstate(self): class Observable(object):
"""
Returns the state of this class in a memento pattern.
The state must be a list-like structure of all the fields
this class needs to run.
See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
"""
raise NotImplementedError, "To be able to use pickling you need to implement this method"
def _setstate(self, state):
"""
Set the state (memento pattern) of this class to the given state.
Usually this is just the counterpart to _getstate, such that
an object is a copy of another when calling
copy = <classname>.__new__(*args,**kw)._setstate(<to_be_copied>._getstate())
See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
"""
raise NotImplementedError, "To be able to use pickling you need to implement this method"
class Pickleable(InterfacePickleFunctions):
"""
Make an object pickleable (See python doc 'pickling').
This class allows for pickling support by Memento pattern.
_getstate returns a memento of the class, which gets pickled.
_setstate(<memento>) (re-)sets the state of the class to the memento
"""
def __init__(self, *a, **kw):
super(Pickleable, self).__init__()
#===========================================================================
# Pickling operations
#===========================================================================
def pickle(self, f, protocol=-1):
"""
:param f: either filename or open file object to write to.
if it is an open buffer, you have to make sure to close
it properly.
:param protocol: pickling protocol to use, python-pickle for details.
"""
import cPickle
if isinstance(f, str):
with open(f, 'w') as f:
cPickle.dump(self, f, protocol)
else:
cPickle.dump(self, f, protocol)
def __getstate__(self):
if self._has_get_set_state():
return self._getstate()
return self.__dict__
def __setstate__(self, state):
if self._has_get_set_state():
self._setstate(state)
# TODO: maybe parameters_changed() here?
return
self.__dict__ = state
def _has_get_set_state(self):
return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__)
class Observable(Pickleable):
""" """
Observable pattern for parameterization. Observable pattern for parameterization.
@ -105,23 +42,25 @@ class Observable(Pickleable):
""" """
_updated = True _updated = True
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Observable, self).__init__(*args, **kwargs) super(Observable, self).__init__()
self._observer_callables_ = [] from lists_and_dicts import ObservablesList
self.observers = ObservablesList()
def add_observer(self, observer, callble, priority=0): def add_observer(self, observer, callble, priority=0):
self._insert_sorted(priority, observer, callble) self.observers.add(priority, observer, callble)
def remove_observer(self, observer, callble=None): def remove_observer(self, observer, callble=None):
to_remove = [] to_remove = []
for p, obs, clble in self._observer_callables_: for poc in self.observers:
_, obs, clble = poc
if callble is not None: if callble is not None:
if (obs == observer) and (callble == clble): if (obs == observer) and (callble == clble):
to_remove.append((p, obs, clble)) to_remove.append(poc)
else: else:
if obs is observer: if obs is observer:
to_remove.append((p, obs, clble)) to_remove.append(poc)
for r in to_remove: for r in to_remove:
self._observer_callables_.remove(r) self.observers.remove(*r)
def notify_observers(self, which=None, min_priority=None): def notify_observers(self, which=None, min_priority=None):
""" """
@ -136,32 +75,18 @@ class Observable(Pickleable):
if which is None: if which is None:
which = self which = self
if min_priority is None: if min_priority is None:
[callble(self, which=which) for _, _, callble in self._observer_callables_] [callble(self, which=which) for _, _, callble in self.observers]
else: else:
for p, _, callble in self._observer_callables_: for p, _, callble in self.observers:
if p <= min_priority: if p <= min_priority:
break break
callble(self, which=which) callble(self, which=which)
def _insert_sorted(self, p, o, c):
ins = 0
for pr, _, _ in self._observer_callables_:
if p > pr:
break
ins += 1
self._observer_callables_.insert(ins, (p, o, c))
def _getstate(self):
return [self._observer_callables_]
def _setstate(self, state):
self._observer_callables_ = state.pop()
#=============================================================================== #===============================================================================
# Foundation framework for parameterized and param objects: # Foundation framework for parameterized and param objects:
#=============================================================================== #===============================================================================
class Parentable(Observable): class Parentable(object):
""" """
Enable an Object to have a parent. Enable an Object to have a parent.
@ -171,7 +96,7 @@ class Parentable(Observable):
_parent_ = None _parent_ = None
_parent_index_ = None _parent_index_ = None
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Parentable, self).__init__(*args, **kwargs) super(Parentable, self).__init__()
def has_parent(self): def has_parent(self):
""" """
@ -207,7 +132,84 @@ class Parentable(Observable):
""" """
pass pass
class Gradcheckable(Parentable): class Pickleable(object):
"""
Make an object pickleable (See python doc 'pickling').
This class allows for pickling support by Memento pattern.
_getstate returns a memento of the class, which gets pickled.
_setstate(<memento>) (re-)sets the state of the class to the memento
"""
def __init__(self, *a, **kw):
super(Pickleable, self).__init__()
#===========================================================================
# Pickling operations
#===========================================================================
def pickle(self, f, protocol=-1):
"""
:param f: either filename or open file object to write to.
if it is an open buffer, you have to make sure to close
it properly.
:param protocol: pickling protocol to use, python-pickle for details.
"""
import cPickle as pickle
import pickle #TODO: cPickle
if isinstance(f, str):
with open(f, 'w') as f:
pickle.dump(self, f, protocol)
else:
pickle.dump(self, f, protocol)
#===========================================================================
# copy and pickling
#===========================================================================
def copy(self):
"""Returns a (deep) copy of the current model"""
#raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy"
import copy
memo = {}
memo[id(self._parent_)] = None
memo[id(self.gradient)] = None
memo[id(self.param_array)] = None
memo[id(self._fixes_)] = None
c = copy.deepcopy(self, memo)
c._parent_index_ = None
return c
def __deepcopy__(self, memo):
s = self.__new__(self.__class__)
memo[id(self)] = s
import copy
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
return s
def __getstate__(self):
ignore_list = ([#'_parent_', '_parent_index_',
#'observers',
'_param_array_', '_gradient_array_', '_fixes_',
'_Cacher_wrap__cachers']
#+ self.parameter_names(recursive=False)
)
dc = dict()
for k,v in self.__dict__.iteritems():
if k not in ignore_list:
#if hasattr(v, "__getstate__"):
#dc[k] = v.__getstate__()
#else:
dc[k] = v
return dc
def __setstate__(self, state):
self.__dict__.update(state)
return self
#def __getstate__(self, memo):
# raise NotImplementedError, "get state must be implemented to be able to pickle objects"
#def __setstate__(self, memo):
# raise NotImplementedError, "set state must be implemented to be able to pickle objects"
class Gradcheckable(Pickleable, Parentable):
""" """
Adds the functionality for an object to be gradcheckable. Adds the functionality for an object to be gradcheckable.
It is just a thin wrapper of a call to the highest parent for now. It is just a thin wrapper of a call to the highest parent for now.
@ -312,7 +314,7 @@ class Indexable(object):
raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
class Constrainable(Nameable, Indexable): class Constrainable(Nameable, Indexable, Observable):
""" """
Make an object constrainable with Priors and Transformations. Make an object constrainable with Priors and Transformations.
TODO: Mappings!! TODO: Mappings!!
@ -394,6 +396,7 @@ class Constrainable(Nameable, Indexable):
self._fixes_[fixed_indices] = FIXED self._fixes_[fixed_indices] = FIXED
else: else:
self._fixes_ = None self._fixes_ = None
del self.constraints[__fixed__]
def _has_fixes(self): def _has_fixes(self):
return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size
@ -429,14 +432,14 @@ class Constrainable(Nameable, Indexable):
def log_prior(self): def log_prior(self):
"""evaluate the prior""" """evaluate the prior"""
if self.priors.size > 0: if self.priors.size > 0:
x = self._param_array_ x = self.param_array
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0) return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
return 0. return 0.
def _log_prior_gradients(self): def _log_prior_gradients(self):
"""evaluate the gradients of the priors""" """evaluate the gradients of the priors"""
if self.priors.size > 0: if self.priors.size > 0:
x = self._param_array_ x = self.param_array
ret = np.zeros(x.size) ret = np.zeros(x.size)
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()] [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
return ret return ret
@ -455,7 +458,7 @@ class Constrainable(Nameable, Indexable):
Constrain the parameter to the given Constrain the parameter to the given
:py:class:`GPy.core.transformations.Transformation`. :py:class:`GPy.core.transformations.Transformation`.
""" """
self._param_array_[:] = transform.initialize(self._param_array_) self.param_array[:] = transform.initialize(self.param_array)
reconstrained = self.unconstrain() reconstrained = self.unconstrain()
self._add_to_index_operations(self.constraints, reconstrained, transform, warning) self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
self.notify_observers(self, None if trigger_parent else -np.inf) self.notify_observers(self, None if trigger_parent else -np.inf)
@ -565,14 +568,14 @@ class OptimizationHandlable(Constrainable):
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw) super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
def transform(self): def transform(self):
[np.put(self._param_array_, ind, c.finv(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] [np.put(self.param_array, ind, c.finv(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def untransform(self): def untransform(self):
[np.put(self._param_array_, ind, c.f(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] [np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def _get_params_transformed(self): def _get_params_transformed(self):
# transformed parameters (apply transformation rules) # transformed parameters (apply transformation rules)
p = self._param_array_.copy() p = self.param_array.copy()
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self.has_parent() and self.constraints[__fixed__].size != 0: if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool) fixes = np.ones(self.size).astype(bool)
@ -583,14 +586,14 @@ class OptimizationHandlable(Constrainable):
return p return p
def _set_params_transformed(self, p): def _set_params_transformed(self, p):
if p is self._param_array_: if p is self.param_array:
p = p.copy() p = p.copy()
if self.has_parent() and self.constraints[__fixed__].size != 0: if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool) fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED fixes[self.constraints[__fixed__]] = FIXED
self._param_array_.flat[fixes] = p self.param_array.flat[fixes] = p
elif self._has_fixes(): self._param_array_.flat[self._fixes_] = p elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
else: self._param_array_.flat = p else: self.param_array.flat = p
self.untransform() self.untransform()
self._trigger_params_changed() self._trigger_params_changed()
@ -600,36 +603,29 @@ class OptimizationHandlable(Constrainable):
def _size_transformed(self): def _size_transformed(self):
return self.size - self.constraints[__fixed__].size return self.size - self.constraints[__fixed__].size
#
# def _untransform_params(self, p):
# # inverse apply transformations for parameters
# #p = p.copy()
# if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
# [np.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
# return p
#
# def _get_params(self):
# """
# get all parameters
# """
# return self._param_array_
# p = np.empty(self.size, dtype=np.float64)
# if self.size == 0:
# return p
# [np.put(p, ind, par._get_params()) for ind, par in itertools.izip(self._param)]
# return p
# def _set_params(self, params, trigger_parent=True): @property
# self._param_array_.flat = params def num_params(self):
# if trigger_parent: min_priority = None """
# else: min_priority = -np.inf Return the number of parameters of this parameter_handle.
# self.notify_observers(None, min_priority) Param objects will allways return 0.
# don't overwrite this anymore! """
# raise NotImplementedError, "Abstract superclass: This needs to be implemented in Param and Parameterizable" raise NotImplemented, "Abstract, please implement in respective classes"
#=========================================================================== def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
# Optimization handles: """
#=========================================================================== Get the names of all parameters of this model.
:param bool add_self: whether to add the own name in front of names
:param bool adjust_for_printing: whether to call `adjust_name_for_printing` on names
:param bool recursive: whether to traverse through hierarchy and append leaf node names
"""
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
else: adjust = lambda x: x
if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)]
else: names = [adjust(x.name) for x in self._parameters_]
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
return names
def _get_param_names(self): def _get_param_names(self):
n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()]) n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
return n return n
@ -663,16 +659,30 @@ class OptimizationHandlable(Constrainable):
# For shared memory arrays. This does nothing in Param, but sets the memory # For shared memory arrays. This does nothing in Param, but sets the memory
# for all parameterized objects # for all parameterized objects
#=========================================================================== #===========================================================================
@property
def full_gradient(self):
"""
Note to users:
This does not return the gradient in the right shape! Use self.gradient
for the right gradient array.
To work on the gradient array, use this as the gradient handle.
This method exists for in memory use of parameters.
When trying to access the true gradient array, use this.
"""
self.gradient # <<< ensure _gradient_array_
return self._gradient_array_
def _propagate_param_grad(self, parray, garray): def _propagate_param_grad(self, parray, garray):
pi_old_size = 0 pi_old_size = 0
for pi in self._parameters_: for pi in self._parameters_:
pislice = slice(pi_old_size, pi_old_size + pi.size) pislice = slice(pi_old_size, pi_old_size + pi.size)
self._param_array_[pislice] = pi._param_array_.flat # , requirements=['C', 'W']).flat self.param_array[pislice] = pi.param_array.flat # , requirements=['C', 'W']).flat
self._gradient_array_[pislice] = pi._gradient_array_.flat # , requirements=['C', 'W']).flat self.full_gradient[pislice] = pi.full_gradient.flat # , requirements=['C', 'W']).flat
pi._param_array_.data = parray[pislice].data pi.param_array.data = parray[pislice].data
pi._gradient_array_.data = garray[pislice].data pi.full_gradient.data = garray[pislice].data
pi._propagate_param_grad(parray[pislice], garray[pislice]) pi._propagate_param_grad(parray[pislice], garray[pislice])
pi_old_size += pi.size pi_old_size += pi.size
@ -681,26 +691,32 @@ class Parameterizable(OptimizationHandlable):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Parameterizable, self).__init__(*args, **kwargs) super(Parameterizable, self).__init__(*args, **kwargs)
from GPy.core.parameterization.lists_and_dicts import ArrayList from GPy.core.parameterization.lists_and_dicts import ArrayList
_parameters_ = ArrayList() self._parameters_ = ArrayList()
self.size = 0 self.size = 0
self._param_array_ = np.empty(self.size, dtype=np.float64)
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
self._added_names_ = set() self._added_names_ = set()
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): @property
""" def param_array(self):
Get the names of all parameters of this model. if not hasattr(self, '_param_array_'):
self._param_array_ = np.empty(self.size, dtype=np.float64)
return self._param_array_
:param bool add_self: whether to add the own name in front of names @param_array.setter
:param bool adjust_for_printing: whether to call `adjust_name_for_printing` on names def param_array(self, arr):
:param bool recursive: whether to traverse through hierarchy and append leaf node names self._param_array_ = arr
"""
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) #=========================================================================
else: adjust = lambda x: x # Gradient handling
if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] #=========================================================================
else: names = [adjust(x.name) for x in self._parameters_] @property
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) def gradient(self):
return names if not hasattr(self, '_gradient_array_'):
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
return self._gradient_array_
@gradient.setter
def gradient(self, val):
self._gradient_array_[:] = val
@property @property
def num_params(self): def num_params(self):
@ -737,34 +753,6 @@ class Parameterizable(OptimizationHandlable):
self._remove_parameter_name(None, old_name) self._remove_parameter_name(None, old_name)
self._add_parameter_name(param) self._add_parameter_name(param)
#=========================================================================
# Gradient handling
#=========================================================================
@property
def gradient(self):
return self._gradient_array_
@gradient.setter
def gradient(self, val):
self._gradient_array_[:] = val
#===========================================================================
# def _collect_gradient(self, target):
# [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
#===========================================================================
#===========================================================================
# def _set_params(self, params, trigger_parent=True):
# [p._set_params(params[s], trigger_parent=False) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
# if trigger_parent: min_priority = None
# else: min_priority = -np.inf
# self.notify_observers(None, min_priority)
#===========================================================================
#===========================================================================
# def _set_gradient(self, g):
# [p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
#===========================================================================
def add_parameter(self, param, index=None, _ignore_added_names=False): def add_parameter(self, param, index=None, _ignore_added_names=False):
""" """
:param parameters: the parameters to add :param parameters: the parameters to add
@ -864,7 +852,7 @@ class Parameterizable(OptimizationHandlable):
# no parameters for this class # no parameters for this class
return return
old_size = 0 old_size = 0
self._param_array_ = np.empty(self.size, dtype=np.float64) self.param_array = np.empty(self.size, dtype=np.float64)
self._gradient_array_ = np.empty(self.size, dtype=np.float64) self._gradient_array_ = np.empty(self.size, dtype=np.float64)
self._param_slices_ = [] self._param_slices_ = []
@ -874,15 +862,15 @@ class Parameterizable(OptimizationHandlable):
pslice = slice(old_size, old_size + p.size) pslice = slice(old_size, old_size + p.size)
# first connect all children # first connect all children
p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice]) p._propagate_param_grad(self.param_array[pslice], self.full_gradient[pslice])
# then connect children to self # then connect children to self
self._param_array_[pslice] = p._param_array_.flat # , requirements=['C', 'W']).ravel(order='C') self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C')
self._gradient_array_[pslice] = p._gradient_array_.flat # , requirements=['C', 'W']).ravel(order='C') self.full_gradient[pslice] = p.full_gradient.flat # , requirements=['C', 'W']).ravel(order='C')
if not p._param_array_.flags['C_CONTIGUOUS']: if not p.param_array.flags['C_CONTIGUOUS']:
import ipdb;ipdb.set_trace() raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
p._param_array_.data = self._param_array_[pslice].data p.param_array.data = self.param_array[pslice].data
p._gradient_array_.data = self._gradient_array_[pslice].data p.full_gradient.data = self.full_gradient[pslice].data
self._param_slices_.append(pslice) self._param_slices_.append(pslice)
@ -898,41 +886,22 @@ class Parameterizable(OptimizationHandlable):
self.notify_observers(which=which) self.notify_observers(which=which)
#=========================================================================== #===========================================================================
# TODO: not working yet # Pickling
#=========================================================================== #===========================================================================
def __setstate__(self, state):
super(Parameterizable, self).__setstate__(state)
self._connect_parameters()
self._connect_fixes()
self._notify_parent_change()
self.parameters_changed()
def copy(self): def copy(self):
"""Returns a (deep) copy of the current model""" c = super(Parameterizable, self).copy()
raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy" c._connect_parameters()
import copy c._connect_fixes()
from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView c._notify_parent_change()
from .lists_and_dicts import ArrayList return c
dc = dict()
for k, v in self.__dict__.iteritems():
if k not in ['_parent_', '_parameters_', '_parent_index_', '_observer_callables_'] + self.parameter_names(recursive=False):
if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)):
dc[k] = v.copy()
else:
dc[k] = copy.deepcopy(v)
if k == '_parameters_':
params = [p.copy() for p in v]
dc['_parent_'] = None
dc['_parent_index_'] = None
dc['_observer_callables_'] = []
dc['_parameters_'] = ArrayList()
dc['constraints'].clear()
dc['priors'].clear()
dc['size'] = 0
s = self.__new__(self.__class__)
s.__dict__ = dc
for p in params:
s.add_parameter(p, _ignore_added_names=True)
return s
#=========================================================================== #===========================================================================
# From being parentable, we have to define the parent_change notification # From being parentable, we have to define the parent_change notification
#=========================================================================== #===========================================================================

View file

@ -17,7 +17,7 @@ class ParametersChangedMeta(type):
instance.parameters_changed() instance.parameters_changed()
return instance return instance
class Parameterized(Parameterizable, Pickleable): class Parameterized(Parameterizable):
""" """
Parameterized class Parameterized class
@ -90,7 +90,7 @@ class Parameterized(Parameterizable, Pickleable):
child_node = child.build_pydot(G) child_node = child.build_pydot(G)
G.add_edge(pydot.Edge(node, child_node)) G.add_edge(pydot.Edge(node, child_node))
for o in self._observer_callables_.keys(): for o in self.observers.keys():
label = o.name if hasattr(o, 'name') else str(o) label = o.name if hasattr(o, 'name') else str(o)
observed_node = pydot.Node(id(o), label=label) observed_node = pydot.Node(id(o), label=label)
G.add_node(observed_node) G.add_node(observed_node)
@ -101,48 +101,13 @@ class Parameterized(Parameterizable, Pickleable):
return G return G
return node return node
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
For inheriting from Parameterized:
Allways append the state of the inherited object
and call down to the inherited object in _setstate!!
"""
return [
self._fixes_,
self.priors,
self.constraints,
self._parameters_,
self._name,
self._added_names_,
]
def _setstate(self, state):
self._added_names_ = state.pop()
self._name = state.pop()
self._parameters_ = state.pop()
self.constraints = state.pop()
self.priors = state.pop()
self._fixes_ = state.pop()
self._connect_parameters()
self.parameters_changed()
#===========================================================================
# Override copy to handle programmatically added observers
#===========================================================================
def copy(self):
c = super(Pickleable, self).copy()
c.add_observer(c, c._parameters_changed_notification, -100)
return c
#=========================================================================== #===========================================================================
# Gradient control # Gradient control
#=========================================================================== #===========================================================================
def _transform_gradients(self, g): def _transform_gradients(self, g):
if self.has_parent(): if self.has_parent():
return g return g
[numpy.put(g, i, g[i] * c.gradfactor(self._param_array_[i])) for c, i in self.constraints.iteritems() if c != __fixed__] [numpy.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes(): return g[self._fixes_] if self._has_fixes(): return g[self._fixes_]
return g return g
@ -206,7 +171,7 @@ class Parameterized(Parameterizable, Pickleable):
def __getitem__(self, name, paramlist=None): def __getitem__(self, name, paramlist=None):
if isinstance(name, (int, slice, tuple, np.ndarray)): if isinstance(name, (int, slice, tuple, np.ndarray)):
return self._param_array_[name] return self.param_array[name]
else: else:
if paramlist is None: if paramlist is None:
paramlist = self.grep_param_names(name) paramlist = self.grep_param_names(name)
@ -222,7 +187,7 @@ class Parameterized(Parameterizable, Pickleable):
def __setitem__(self, name, value, paramlist=None): def __setitem__(self, name, value, paramlist=None):
if isinstance(name, (slice, tuple, np.ndarray)): if isinstance(name, (slice, tuple, np.ndarray)):
try: try:
self._param_array_[name] = value self.param_array[name] = value
except: except:
raise ValueError, "Setting by slice or index only allowed with array-like" raise ValueError, "Setting by slice or index only allowed with array-like"
self._trigger_params_changed() self._trigger_params_changed()

View file

@ -66,7 +66,7 @@ class SpikeAndSlabPrior(VariationalPrior):
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0) self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
class VariationalPosterior(Parameterized): class VariationalPosterior(Parameterized):
def __init__(self, means=None, variances=None, name=None, *a, **kw): def __init__(self, means=None, variances=None, name='latent space', *a, **kw):
super(VariationalPosterior, self).__init__(name=name, *a, **kw) super(VariationalPosterior, self).__init__(name=name, *a, **kw)
self.mean = Param("mean", means) self.mean = Param("mean", means)
self.variance = Param("variance", variances, Logexp()) self.variance = Param("variance", variances, Logexp())
@ -124,6 +124,7 @@ class NormalPosterior(VariationalPosterior):
import sys import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import variational_plots from ...plotting.matplot_dep import variational_plots
import matplotlib
return variational_plots.plot(self,*args) return variational_plots.plot(self,*args)
class SpikeAndSlabPosterior(VariationalPosterior): class SpikeAndSlabPosterior(VariationalPosterior):

View file

@ -106,15 +106,3 @@ class SparseGP(GP):
return mu, var return mu, var
def _getstate(self):
"""
Get the current state of the class,
"""
return GP._getstate(self) + [
self.Z,
self.num_inducing]
def _setstate(self, state):
self.num_inducing = state.pop()
self.Z = state.pop()
GP._setstate(self, state)

View file

@ -89,57 +89,57 @@ class SVIGP(GP):
self._param_steplength_trace = [] self._param_steplength_trace = []
self._vb_steplength_trace = [] self._vb_steplength_trace = []
def _getstate(self): # def _getstate(self):
steplength_params = [self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength] # steplength_params = [self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength]
return GP._getstate(self) + \ # return GP._getstate(self) + \
[self.get_vb_param(), # [self.get_vb_param(),
self.Z, # self.Z,
self.num_inducing, # self.num_inducing,
self.has_uncertain_inputs, # self.has_uncertain_inputs,
self.X_variance, # self.X_variance,
self.X_batch, # self.X_batch,
self.X_variance_batch, # self.X_variance_batch,
steplength_params, # steplength_params,
self.batchcounter, # self.batchcounter,
self.batchsize, # self.batchsize,
self.epochs, # self.epochs,
self.momentum, # self.momentum,
self.data_prop, # self.data_prop,
self._param_trace, # self._param_trace,
self._param_steplength_trace, # self._param_steplength_trace,
self._vb_steplength_trace, # self._vb_steplength_trace,
self._ll_trace, # self._ll_trace,
self._grad_trace, # self._grad_trace,
self.Y, # self.Y,
self._permutation, # self._permutation,
self.iterations # self.iterations
] # ]
#
def _setstate(self, state): # def _setstate(self, state):
self.iterations = state.pop() # self.iterations = state.pop()
self._permutation = state.pop() # self._permutation = state.pop()
self.Y = state.pop() # self.Y = state.pop()
self._grad_trace = state.pop() # self._grad_trace = state.pop()
self._ll_trace = state.pop() # self._ll_trace = state.pop()
self._vb_steplength_trace = state.pop() # self._vb_steplength_trace = state.pop()
self._param_steplength_trace = state.pop() # self._param_steplength_trace = state.pop()
self._param_trace = state.pop() # self._param_trace = state.pop()
self.data_prop = state.pop() # self.data_prop = state.pop()
self.momentum = state.pop() # self.momentum = state.pop()
self.epochs = state.pop() # self.epochs = state.pop()
self.batchsize = state.pop() # self.batchsize = state.pop()
self.batchcounter = state.pop() # self.batchcounter = state.pop()
steplength_params = state.pop() # steplength_params = state.pop()
(self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength) = steplength_params # (self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength) = steplength_params
self.X_variance_batch = state.pop() # self.X_variance_batch = state.pop()
self.X_batch = state.pop() # self.X_batch = state.pop()
self.X_variance = state.pop() # self.X_variance = state.pop()
self.has_uncertain_inputs = state.pop() # self.has_uncertain_inputs = state.pop()
self.num_inducing = state.pop() # self.num_inducing = state.pop()
self.Z = state.pop() # self.Z = state.pop()
vb_param = state.pop() # vb_param = state.pop()
GP._setstate(self, state) # GP._setstate(self, state)
self.set_vb_param(vb_param) # self.set_vb_param(vb_param)
def _compute_kernel_matrices(self): def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation # kernel computations, using BGPLVM notation

View file

@ -277,6 +277,8 @@ def bgplvm_simulation(optimize=True, verbose=1,
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
#k = kern.RBF(Q, ARD=True, lengthscale=10.) #k = kern.RBF(Q, ARD=True, lengthscale=10.)
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
m.likelihood.variance = .1
if optimize: if optimize:
print "Optimizing model:" print "Optimizing model:"
@ -304,7 +306,8 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
m.inference_method = VarDTCMissingData() m.inference_method = VarDTCMissingData()
m.Y[inan] = _np.nan m.Y[inan] = _np.nan
m.X.variance *= .1 m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
m.likelihood.variance = .01
m.parameters_changed() m.parameters_changed()
m.Yreal = Y m.Yreal = Y
@ -321,18 +324,15 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
from GPy import kern from GPy import kern
from GPy.models import MRD from GPy.models import MRD
from GPy.likelihoods import Gaussian
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
#Ylist = [Ylist[0]] #Ylist = [Ylist[0]]
k = [kern.Linear(Q, ARD=True) + kern.White(Q, 1e-4) for _ in range(len(Ylist))] k = kern.Linear(Q, ARD=True)
m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="", initz='permute', **kw) m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="PCA_concat", initz='permute', **kw)
m['.*noise'] = [Y.var()/500. for Y in Ylist] m['.*noise'] = [Y.var()/40. for Y in Ylist]
#for i, Y in enumerate(Ylist):
# m['.*Y_{}.*Gaussian.*noise'.format(i)] = Y.var(1) / 500.
if optimize: if optimize:
print "Optimizing Model:" print "Optimizing Model:"

View file

@ -32,7 +32,8 @@ class ExactGaussianInference(object):
return Y return Y
else: else:
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L. #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
raise NotImplementedError, 'TODO' #TODO print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
return Y
def inference(self, kern, X, likelihood, Y, Y_metadata=None): def inference(self, kern, X, likelihood, Y, Y_metadata=None):
""" """

View file

@ -1,4 +1,4 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
# #
#Parts of this file were influenced by the Matlab GPML framework written by #Parts of this file were influenced by the Matlab GPML framework written by
@ -91,7 +91,11 @@ class Laplace(object):
iteration = 0 iteration = 0
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter: while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata) W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
if np.any(np.isnan(W)):
raise ValueError('One or more element(s) of W is NaN')
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata) grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
if np.any(np.isnan(grad)):
raise ValueError('One or more element(s) of grad is NaN')
W_f = W*f W_f = W*f
@ -141,25 +145,30 @@ class Laplace(object):
""" """
#At this point get the hessian matrix (or vector as W is diagonal) #At this point get the hessian matrix (or vector as W is diagonal)
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata) W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
if np.any(np.isnan(W)):
raise ValueError('One or more element(s) of W is NaN')
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave) K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
#compute vital matrices #compute vital matrices
C = np.dot(LiW12, K) C = np.dot(LiW12, K)
Ki_W_i = K - C.T.dot(C) #Could this be wrong? Ki_W_i = K - C.T.dot(C)
#compute the log marginal #compute the log marginal
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L))) log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
#Compute vival matrices for derivatives # Compute matrices for derivatives
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9. if np.any(np.isnan(dW_df)):
raise ValueError('One or more element(s) of dW_df is NaN')
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) # s2 in R&W p126 line 9.
#BiK, _ = dpotrs(L, K, lower=1) #BiK, _ = dpotrs(L, K, lower=1)
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df #dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i) I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i)
#################### ####################
#compute dL_dK# # compute dL_dK #
#################### ####################
if kern.size > 0 and not kern.is_fixed: if kern.size > 0 and not kern.is_fixed:
#Explicit #Explicit
@ -202,12 +211,12 @@ class Laplace(object):
def _compute_B_statistics(self, K, W, log_concave): def _compute_B_statistics(self, K, W, log_concave):
""" """
Rasmussen suggests the use of a numerically stable positive definite matrix B Rasmussen suggests the use of a numerically stable positive definite matrix B
Which has a positive diagonal element and can be easyily inverted Which has a positive diagonal elements and can be easily inverted
:param K: Prior Covariance matrix evaluated at locations X :param K: Prior Covariance matrix evaluated at locations X
:type K: NxN matrix :type K: NxN matrix
:param W: Negative hessian at a point (diagonal matrix) :param W: Negative hessian at a point (diagonal matrix)
:type W: Vector of diagonal values of hessian (1xN) :type W: Vector of diagonal values of Hessian (1xN)
:returns: (W12BiW12, L_B, Li_W12) :returns: (W12BiW12, L_B, Li_W12)
""" """
if not log_concave: if not log_concave:
@ -218,7 +227,8 @@ class Laplace(object):
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance # If the likelihood is non-log-concave. We wan't to say that there is a negative variance
# To cause the posterior to become less certain than the prior and likelihood, # To cause the posterior to become less certain than the prior and likelihood,
# This is a property only held by non-log-concave likelihoods # This is a property only held by non-log-concave likelihoods
if np.any(np.isnan(W)):
raise ValueError('One or more element(s) of W is NaN')
#W is diagonal so its sqrt is just the sqrt of the diagonal elements #W is diagonal so its sqrt is just the sqrt of the diagonal elements
W_12 = np.sqrt(W) W_12 = np.sqrt(W)
B = np.eye(K.shape[0]) + W_12*K*W_12.T B = np.eye(K.shape[0]) + W_12*K*W_12.T

View file

@ -23,6 +23,7 @@ class VarDTC(object):
def __init__(self, limit=1): def __init__(self, limit=1):
#self._YYTfactor_cache = caching.cache() #self._YYTfactor_cache = caching.cache()
from ...util.caching import Cacher from ...util.caching import Cacher
self.limit = limit
self.get_trYYT = Cacher(self._get_trYYT, limit) self.get_trYYT = Cacher(self._get_trYYT, limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit) self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
@ -33,6 +34,17 @@ class VarDTC(object):
def _get_trYYT(self, Y): def _get_trYYT(self, Y):
return param_to_array(np.sum(np.square(Y))) return param_to_array(np.sum(np.square(Y)))
def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled.
return self.limit
def __setstate__(self, state):
# has to be overridden, as Cacher objects cannot be pickled.
self.limit = state
from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
def _get_YYTfactor(self, Y): def _get_YYTfactor(self, Y):
""" """
find a matrix L which satisfies LLT = YYT. find a matrix L which satisfies LLT = YYT.
@ -179,6 +191,7 @@ class VarDTC(object):
return post, log_marginal, grad_dict return post, log_marginal, grad_dict
class VarDTCMissingData(object): class VarDTCMissingData(object):
const_jitter = 1e-6
def __init__(self, limit=1): def __init__(self, limit=1):
from ...util.caching import Cacher from ...util.caching import Cacher
self._Y = Cacher(self._subarray_computations, limit) self._Y = Cacher(self._subarray_computations, limit)
@ -250,7 +263,7 @@ class VarDTCMissingData(object):
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
if het_noise: beta = beta_all[ind] if het_noise: beta = beta_all[ind]
else: beta = beta_all[0] else: beta = beta_all
VVT_factor = (beta*y) VVT_factor = (beta*y)
VVT_factor_all[v, ind].flat = VVT_factor.flat VVT_factor_all[v, ind].flat = VVT_factor.flat
@ -311,7 +324,7 @@ class VarDTCMissingData(object):
het_noise, uncertain_inputs, LB, het_noise, uncertain_inputs, LB,
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
psi0, psi1, beta, psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT) data_fit, num_data, output_dim, trYYT, Y)
if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf
else: else:

View file

@ -1,9 +1,9 @@
from _src.kern import Kern from _src.kern import Kern
from _src.rbf import RBF from _src.rbf import RBF
from _src.linear import Linear from _src.linear import Linear, LinearFull
from _src.static import Bias, White from _src.static import Bias, White
from _src.brownian import Brownian from _src.brownian import Brownian
from _src.sympykern import Sympykern from _src.symbolic import Symbolic
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from _src.mlp import MLP from _src.mlp import MLP
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52

View file

@ -23,7 +23,6 @@ class Add(CombinationKernel):
If a list of parts (of this kernel!) `which_parts` is given, only If a list of parts (of this kernel!) `which_parts` is given, only
the parts of the list are taken to compute the covariance. the parts of the list are taken to compute the covariance.
""" """
assert X.shape[1] > max(np.r_[self.active_dims])
if which_parts is None: if which_parts is None:
which_parts = self.parts which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)): elif not isinstance(which_parts, (list, tuple)):
@ -33,7 +32,6 @@ class Add(CombinationKernel):
@Cache_this(limit=2, force_kwargs=['which_parts']) @Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None): def Kdiag(self, X, which_parts=None):
assert X.shape[1] > max(np.r_[self.active_dims])
if which_parts is None: if which_parts is None:
which_parts = self.parts which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)): elif not isinstance(which_parts, (list, tuple)):
@ -160,21 +158,13 @@ class Add(CombinationKernel):
target_S += b target_S += b
return target_mu, target_S return target_mu, target_S
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return super(Add, self)._getstate()
def _setstate(self, state):
super(Add, self)._setstate(state)
def add(self, other, name='sum'): def add(self, other, name='sum'):
if isinstance(other, Add): if isinstance(other, Add):
other_params = other._parameters_[:] other_params = other._parameters_[:]
for p in other_params: for p in other_params:
other.remove_parameter(p) other.remove_parameter(p)
self.add_parameters(*other_params) self.add_parameters(*other_params)
else: self.add_parameter(other) else:
self.add_parameter(other)
self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts)
return self return self

View file

@ -54,85 +54,93 @@ class IndependentOutputs(CombinationKernel):
self.kern = kernels self.kern = kernels
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name) super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
self.index_dim = index_dim self.index_dim = index_dim
self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0])
def K(self,X ,X2=None): def K(self,X ,X2=None):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if X2 is None: if X2 is None:
target = np.zeros((X.shape[0], X.shape[0])) target = np.zeros((X.shape[0], X.shape[0]))
[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)] [[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)]
else: else:
slices2 = index_to_slices(X2[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0])) target = np.zeros((X.shape[0], X2.shape[0]))
[[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)] [[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)]
return target return target
def Kdiag(self,X): def Kdiag(self,X):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
target = np.zeros(X.shape[0]) target = np.zeros(X.shape[0])
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
return target return target
def update_gradients_full(self,dL_dK,X,X2=None): def update_gradients_full(self,dL_dK,X,X2=None):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
if self.single_kern: target = np.zeros(self.kern.size) if self.single_kern:
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)] target = np.zeros(self.kern.size)
kerns = itertools.repeat(self.kern)
else:
kerns = self.kern
target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
def collate_grads(kern, i, dL, X, X2): def collate_grads(kern, i, dL, X, X2):
kern.update_gradients_full(dL,X,X2) kern.update_gradients_full(dL,X,X2)
if self.single_kern: target[:] += kern.gradient if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient else: target[i][:] += kern.gradient
if X2 is None: if X2 is None:
[[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))] [[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(kerns,slices))]
else: else:
slices2 = index_to_slices(X2[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim])
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))] [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
if self.single_kern: kern.gradient = target if self.single_kern: kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))] else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
def gradients_X(self,dL_dK, X, X2=None): def gradients_X(self,dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if X2 is None: if X2 is None:
# TODO: make use of index_to_slices # TODO: make use of index_to_slices
values = np.unique(X[:,self.index_dim]) values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values] slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None)) [target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
for kern, s in zip(self.kerns, slices)] for kern, s in zip(kerns, slices)]
#slices = index_to_slices(X[:,self.index_dim]) #slices = index_to_slices(X[:,self.index_dim])
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s]) #[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
# for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] # for s in slices_i] for kern, slices_i in zip(kerns, slices)]
#import ipdb;ipdb.set_trace() #import ipdb;ipdb.set_trace()
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]), #[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
# np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss])) # np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss]))
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)] # for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(kerns, slices)]
else: else:
values = np.unique(X[:,self.index_dim]) values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values] slices = [X[:,self.index_dim]==i for i in values]
slices2 = [X2[:,self.index_dim]==i for i in values] slices2 = [X2[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2])) [target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2]))
for kern, s, s2 in zip(self.kerns, slices, slices2)] for kern, s, s2 in zip(kerns, slices, slices2)]
# TODO: make work with index_to_slices # TODO: make work with index_to_slices
#slices = index_to_slices(X[:,self.index_dim]) #slices = index_to_slices(X[:,self.index_dim])
#slices2 = index_to_slices(X2[:,self.index_dim]) #slices2 = index_to_slices(X2[:,self.index_dim])
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)] #[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)]
return target return target
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
target = np.zeros(X.shape) target = np.zeros(X.shape)
[[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] [[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
return target return target
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if self.single_kern: target = np.zeros(self.kern.size) if self.single_kern: target = np.zeros(self.kern.size)
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)] else: target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
def collate_grads(kern, i, dL, X): def collate_grads(kern, i, dL, X):
kern.update_gradients_diag(dL,X) kern.update_gradients_diag(dL,X)
if self.single_kern: target[:] += kern.gradient if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient else: target[i][:] += kern.gradient
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))] [[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(kerns, slices))]
if self.single_kern: kern.gradient = target if self.single_kern: kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))] else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
class Hierarchical(CombinationKernel): class Hierarchical(CombinationKernel):
""" """
@ -148,30 +156,30 @@ class Hierarchical(CombinationKernel):
def __init__(self, kern, name='hierarchy'): def __init__(self, kern, name='hierarchy'):
assert all([k.input_dim==kerns[0].input_dim for k in kerns]) assert all([k.input_dim==kerns[0].input_dim for k in kerns])
super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name) super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
self.kerns = kerns kerns = kerns
self.add_parameters(self.kerns) self.add_parameters(kerns)
def K(self,X ,X2=None): def K(self,X ,X2=None):
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)] X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)]
K = self.kerns[0].K(X, X2) K = kerns[0].K(X, X2)
if X2 is None: if X2 is None:
[[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)] [[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)]
else: else:
X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(self.kerns[1:], slices, slices2)] [[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(kerns[1:], slices, slices2)]
return target return target
def Kdiag(self,X): def Kdiag(self,X):
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)] X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)]
K = self.kerns[0].K(X, X2) K = kerns[0].K(X, X2)
[[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)] [[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)]
return target return target
def update_gradients_full(self,dL_dK,X,X2=None): def update_gradients_full(self,dL_dK,X,X2=None):
X,slices = X[:,:-1],index_to_slices(X[:,-1]) X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None: if X2 is None:
self.kerns[0].update_gradients_full(dL_dK, X, None) kerns[0].update_gradients_full(dL_dK, X, None)
for k, slices_k in zip(self.kerns[1:], slices): for k, slices_k in zip(kerns[1:], slices):
target = np.zeros(k.size) target = np.zeros(k.size)
def collate_grads(dL, X, X2): def collate_grads(dL, X, X2):
k.update_gradients_full(dL,X,X2) k.update_gradients_full(dL,X,X2)
@ -180,8 +188,8 @@ class Hierarchical(CombinationKernel):
k._set_gradient(target) k._set_gradient(target)
else: else:
X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1]) X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1])
self.kerns[0].update_gradients_full(dL_dK, X, None) kerns[0].update_gradients_full(dL_dK, X, None)
for k, slices_k in zip(self.kerns[1:], slices): for k, slices_k in zip(kerns[1:], slices):
target = np.zeros(k.size) target = np.zeros(k.size)
def collate_grads(dL, X, X2): def collate_grads(dL, X, X2):
k.update_gradients_full(dL,X,X2) k.update_gradients_full(dL,X,X2)

View file

@ -15,9 +15,8 @@ class Kern(Parameterized):
# found in kernel_slice_operations # found in kernel_slice_operations
__metaclass__ = KernCallsViaSlicerMeta __metaclass__ = KernCallsViaSlicerMeta
#=========================================================================== #===========================================================================
_debug=False
_support_GPU=False _support_GPU=False
def __init__(self, input_dim, active_dims, name, useGPU=False,*a, **kw): def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw):
""" """
The base class for a kernel: a positive definite function The base class for a kernel: a positive definite function
which forms of a covariance function (kernel). which forms of a covariance function (kernel).
@ -178,22 +177,6 @@ class Kern(Parameterized):
#else: kernels.append(other) #else: kernels.append(other)
return Prod([self, other], name) return Prod([self, other], name)
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return super(Kern, self)._getstate() + [
self.active_dims,
self.input_dim,
self._sliced_X]
def _setstate(self, state):
self._sliced_X = state.pop()
self.input_dim = state.pop()
self.active_dims = state.pop()
super(Kern, self)._setstate(state)
class CombinationKernel(Kern): class CombinationKernel(Kern):
""" """
Abstract super class for combination kernels. Abstract super class for combination kernels.
@ -211,9 +194,7 @@ class CombinationKernel(Kern):
:param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on :param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on
""" """
assert all([isinstance(k, Kern) for k in kernels]) assert all([isinstance(k, Kern) for k in kernels])
active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int)) input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims)
input_dim = active_dims.max()+1 + len(extra_dims)
active_dims = slice(active_dims.max()+1+len(extra_dims))
# initialize the kernel with the full input_dim # initialize the kernel with the full input_dim
super(CombinationKernel, self).__init__(input_dim, active_dims, name) super(CombinationKernel, self).__init__(input_dim, active_dims, name)
self.extra_dims = extra_dims self.extra_dims = extra_dims
@ -223,6 +204,12 @@ class CombinationKernel(Kern):
def parts(self): def parts(self):
return self._parameters_ return self._parameters_
def get_input_dim_active_dims(self, kernels, extra_dims = None):
active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
input_dim = active_dims.max()+1 + (len(np.r_[extra_dims]) if extra_dims is not None else 0)
active_dims = slice(0, input_dim, 1)
return input_dim, active_dims
def input_sensitivity(self): def input_sensitivity(self):
in_sen = np.zeros((self.num_params, self.input_dim)) in_sen = np.zeros((self.num_params, self.input_dim))
for i, p in enumerate(self.parts): for i, p in enumerate(self.parts):

View file

@ -5,130 +5,135 @@ Created on 11 Mar 2014
''' '''
from ...core.parameterization.parameterized import ParametersChangedMeta from ...core.parameterization.parameterized import ParametersChangedMeta
import numpy as np import numpy as np
from functools import wraps
def put_clean(dct, name, func):
if name in dct:
dct['_clean_{}'.format(name)] = dct[name]
dct[name] = func(dct[name])
class KernCallsViaSlicerMeta(ParametersChangedMeta): class KernCallsViaSlicerMeta(ParametersChangedMeta):
def __call__(self, *args, **kw): def __new__(cls, name, bases, dct):
instance = super(ParametersChangedMeta, self).__call__(*args, **kw) put_clean(dct, 'K', _slice_K)
instance.K = _slice_wrapper(instance, instance.K) put_clean(dct, 'Kdiag', _slice_Kdiag)
instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True) put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True) put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True) put_clean(dct, 'gradients_X', _slice_gradients_X)
instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True) put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True)
instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False)
instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False)
instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False)
instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True)
instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True)
instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True)
instance.parameters_changed()
return instance
def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False): put_clean(dct, 'psi0', _slice_psi)
""" put_clean(dct, 'psi1', _slice_psi)
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension. put_clean(dct, 'psi2', _slice_psi)
The different switches are: put_clean(dct, 'update_gradients_expectations', _slice_update_gradients_expectations)
diag: if X2 exists put_clean(dct, 'gradients_Z_expectations', _slice_gradients_Z_expectations)
derivative: if first arg is dL_dK put_clean(dct, 'gradients_qX_expectations', _slice_gradients_qX_expectations)
psi_stat: if first 3 args are dL_dpsi0..2 return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
psi_stat_Z: if first 2 args are dL_dpsi1..2
""" class _Slice_wrap(object):
if derivative: def __init__(self, k, X, X2=None):
if diag: self.k = k
def x_slice_wrapper(dL_dKdiag, X): self.shape = X.shape
ret_X_not_sliced = ret_X and kern._sliced_X == 0 if self.k._sliced_X == 0:
if ret_X_not_sliced: assert X.shape[1] > max(np.r_[self.k.active_dims]), "At least {} dimensional X needed".format(max(np.r_[self.k.active_dims]))
ret = np.zeros(X.shape) self.X = self.k._slice_X(X)
X = kern._slice_X(X) if not kern._sliced_X else X self.X2 = self.k._slice_X(X2) if X2 is not None else X2
# if the return value is of shape X.shape, we need to make sure to return the right shape self.ret = True
kern._sliced_X += 1
try:
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X)
else: ret = operation(dL_dKdiag, X)
except:
raise
finally:
kern._sliced_X -= 1
return ret
elif psi_stat:
def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced:
ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
kern._sliced_X += 1
# if the return value is of shape X.shape, we need to make sure to return the right shape
try:
if ret_X_not_sliced:
ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior))
r2 = ret[:2]
ret[0] = ret1
ret[1] = ret2
ret[0][:, kern.active_dims] = r2[0]
ret[1][:, kern.active_dims] = r2[1]
del r2
else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
except:
raise
finally:
kern._sliced_X -= 1
return ret
elif psi_stat_Z:
def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced: ret = np.zeros(Z.shape)
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
kern._sliced_X += 1
try:
if ret_X_not_sliced:
ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
except:
raise
finally:
kern._sliced_X -= 1
return ret
else: else:
def x_slice_wrapper(dL_dK, X, X2=None): self.X = X
ret_X_not_sliced = ret_X and kern._sliced_X == 0 self.X2 = X2
if ret_X_not_sliced: self.ret = False
ret = np.zeros(X.shape) def __enter__(self):
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2 self.k._sliced_X += 1
kern._sliced_X += 1 return self
try: def __exit__(self, *a):
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2) self.k._sliced_X -= 1
else: ret = operation(dL_dK, X, X2) def handle_return_array(self, return_val):
except: if self.ret:
raise ret = np.zeros(self.shape)
finally: ret[:, self.k.active_dims] = return_val
kern._sliced_X -= 1 return ret
return ret return return_val
else:
if diag: def _slice_K(f):
def x_slice_wrapper(X, *args, **kw): @wraps(f)
X = kern._slice_X(X) if not kern._sliced_X else X def wrap(self, X, X2 = None, *a, **kw):
kern._sliced_X += 1 with _Slice_wrap(self, X, X2) as s:
try: ret = f(self, s.X, s.X2, *a, **kw)
ret = operation(X, *args, **kw) return ret
except: return wrap
raise
finally: def _slice_Kdiag(f):
kern._sliced_X -= 1 @wraps(f)
return ret def wrap(self, X, *a, **kw):
else: with _Slice_wrap(self, X, None) as s:
def x_slice_wrapper(X, X2=None, *args, **kw): ret = f(self, s.X, *a, **kw)
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2 return ret
kern._sliced_X += 1 return wrap
try:
ret = operation(X, X2, *args, **kw) def _slice_update_gradients_full(f):
except: raise @wraps(f)
finally: def wrap(self, dL_dK, X, X2=None):
kern._sliced_X -= 1 with _Slice_wrap(self, X, X2) as s:
return ret ret = f(self, dL_dK, s.X, s.X2)
x_slice_wrapper._operation = operation return ret
x_slice_wrapper.__name__ = ("slicer("+str(operation) return wrap
+(","+str(bool(diag)) if diag else'')
+(','+str(bool(derivative)) if derivative else '') def _slice_update_gradients_diag(f):
+')') @wraps(f)
x_slice_wrapper.__doc__ = "**sliced**\n" + (operation.__doc__ or "") def wrap(self, dL_dKdiag, X):
return x_slice_wrapper with _Slice_wrap(self, X, None) as s:
ret = f(self, dL_dKdiag, s.X)
return ret
return wrap
def _slice_gradients_X(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
with _Slice_wrap(self, X, X2) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
return ret
return wrap
def _slice_gradients_X_diag(f):
@wraps(f)
def wrap(self, dL_dKdiag, X):
with _Slice_wrap(self, X, None) as s:
ret = s.handle_return_array(f(self, dL_dKdiag, s.X))
return ret
return wrap
def _slice_psi(f):
@wraps(f)
def wrap(self, Z, variational_posterior):
with _Slice_wrap(self, Z, variational_posterior) as s:
ret = f(self, s.X, s.X2)
return ret
return wrap
def _slice_update_gradients_expectations(f):
@wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
with _Slice_wrap(self, Z, variational_posterior) as s:
ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2)
return ret
return wrap
def _slice_gradients_Z_expectations(f):
@wraps(f)
def wrap(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
with _Slice_wrap(self, Z, variational_posterior) as s:
ret = s.handle_return_array(f(self, dL_dpsi1, dL_dpsi2, s.X, s.X2))
return ret
return wrap
def _slice_gradients_qX_expectations(f):
@wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
with _Slice_wrap(self, variational_posterior, Z) as s:
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X))
r2 = ret[:2]
ret[0] = s.handle_return_array(r2[0])
ret[1] = s.handle_return_array(r2[1])
del r2
return ret
return wrap

View file

@ -313,3 +313,47 @@ class Linear(Kern):
def input_sensitivity(self): def input_sensitivity(self):
return np.ones(self.input_dim) * self.variances return np.ones(self.input_dim) * self.variances
class LinearFull(Kern):
def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'):
super(LinearFull, self).__init__(input_dim, active_dims, name)
if W is None:
W = np.ones((input_dim, rank))
if kappa is None:
kappa = np.ones(input_dim)
assert W.shape == (input_dim, rank)
assert kappa.shape == (input_dim,)
self.W = Param('W', W)
self.kappa = Param('kappa', kappa, Logexp())
self.add_parameters(self.W, self.kappa)
def K(self, X, X2=None):
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
return np.einsum('ij,jk,lk->il', X, P, X if X2 is None else X2)
def update_gradients_full(self, dL_dK, X, X2=None):
self.kappa.gradient = np.einsum('ij,ik,kj->j', X, dL_dK, X if X2 is None else X2)
self.W.gradient = np.einsum('ij,kl,ik,lm->jm', X, X if X2 is None else X2, dL_dK, self.W)
self.W.gradient += np.einsum('ij,kl,ik,jm->lm', X, X if X2 is None else X2, dL_dK, self.W)
def Kdiag(self, X):
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
return np.einsum('ij,jk,ik->i', X, P, X)
def update_gradients_diag(self, dL_dKdiag, X):
self.kappa.gradient = np.einsum('ij,i->j', np.square(X), dL_dKdiag)
self.W.gradient = 2.*np.einsum('ij,ik,jl,i->kl', X, X, self.W, dL_dKdiag)
def gradients_X(self, dL_dK, X, X2=None):
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
if X2 is None:
return 2.*np.einsum('ij,jk,kl->il', dL_dK, X, P)
else:
return np.einsum('ij,jk,kl->il', dL_dK, X2, P)
def gradients_X_diag(self, dL_dKdiag, X):
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
return 2.*np.einsum('jk,i,ij->ik', P, dL_dKdiag, X)

View file

@ -23,7 +23,6 @@ class Prod(CombinationKernel):
@Cache_this(limit=2, force_kwargs=['which_parts']) @Cache_this(limit=2, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None): def K(self, X, X2=None, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None: if which_parts is None:
which_parts = self.parts which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)): elif not isinstance(which_parts, (list, tuple)):
@ -33,7 +32,6 @@ class Prod(CombinationKernel):
@Cache_this(limit=2, force_kwargs=['which_parts']) @Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None): def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None: if which_parts is None:
which_parts = self.parts which_parts = self.parts
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts)) return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
@ -58,8 +56,6 @@ class Prod(CombinationKernel):
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape) target = np.zeros(X.shape)
for k1,k2 in itertools.combinations(self.parts, 2): for k1,k2 in itertools.combinations(self.parts, 2):
target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X) target += k1.gradients_X_diag(dL_dKdiag*k2.Kdiag(X), X)
target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X) target += k2.gradients_X_diag(dL_dKdiag*k1.Kdiag(X), X)
return target return target

View file

@ -74,9 +74,6 @@ class RBF(Stationary):
# Spike-and-Slab GPLVM # Spike-and-Slab GPLVM
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
if self.useGPU: if self.useGPU:
# dL_dpsi0_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi0))
# dL_dpsi1_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi1))
# dL_dpsi2_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi2))
self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else: else:
@ -107,8 +104,6 @@ class RBF(Stationary):
#contributions from psi0: #contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0) self.variance.gradient = np.sum(dL_dpsi0)
if self._debug:
num_grad = self.lengthscale.gradient.copy()
self.lengthscale.gradient = 0. self.lengthscale.gradient = 0.
#from psi1 #from psi1
@ -128,8 +123,6 @@ class RBF(Stationary):
else: else:
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2) self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
if self._debug:
import ipdb;ipdb.set_trace()
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
else: else:
@ -139,8 +132,6 @@ class RBF(Stationary):
# Spike-and-Slab GPLVM # Spike-and-Slab GPLVM
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
if self.useGPU: if self.useGPU:
# dL_dpsi1_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi1))
# dL_dpsi2_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi2))
return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else: else:
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
@ -177,8 +168,6 @@ class RBF(Stationary):
# Spike-and-Slab GPLVM # Spike-and-Slab GPLVM
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
if self.useGPU: if self.useGPU:
# dL_dpsi1_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi1))
# dL_dpsi2_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi2))
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else: else:
ndata = variational_posterior.mean.shape[0] ndata = variational_posterior.mean.shape[0]

View file

@ -11,9 +11,9 @@ from kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
class Sympykern(Kern): class Symbolic(Kern):
""" """
A kernel object, where all the hard work in done by sympy. A kernel object, where all the hard work is done by sympy.
:param k: the covariance function :param k: the covariance function
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2... :type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
@ -26,10 +26,8 @@ class Sympykern(Kern):
- to handle multiple inputs, call them x_1, z_1, etc - to handle multiple inputs, call them x_1, z_1, etc
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
""" """
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None, active_dims=None): def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None):
if name is None:
name='sympykern'
if k is None: if k is None:
raise ValueError, "You must provide an argument for the covariance function." raise ValueError, "You must provide an argument for the covariance function."
super(Sympykern, self).__init__(input_dim, active_dims, name) super(Sympykern, self).__init__(input_dim, active_dims, name)
@ -60,7 +58,6 @@ class Sympykern(Kern):
# extract parameter names from the covariance # extract parameter names from the covariance
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name) thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
# Look for parameters with index (subscripts), they are associated with different outputs. # Look for parameters with index (subscripts), they are associated with different outputs.
if self.output_dim>1: if self.output_dim>1:
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name) self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
@ -97,8 +94,8 @@ class Sympykern(Kern):
val = 1.0 val = 1.0
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class. # TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
if param is not None: if param is not None:
if param.has_key(theta): if param.has_key(theta.name):
val = param[theta] val = param[theta.name]
setattr(self, theta.name, Param(theta.name, val, None)) setattr(self, theta.name, Param(theta.name, val, None))
self.add_parameters(getattr(self, theta.name)) self.add_parameters(getattr(self, theta.name))
@ -117,6 +114,12 @@ class Sympykern(Kern):
self.arg_list += self._sp_theta_i + self._sp_theta_j self.arg_list += self._sp_theta_i + self._sp_theta_j
self.diag_arg_list += self._sp_theta_i self.diag_arg_list += self._sp_theta_i
# Check if there are additional linear operators on the covariance.
self._sp_operators = operators
# TODO: Deal with linear operators
#if self._sp_operators:
# for operator in self._sp_operators:
# psi_stats aren't yet implemented. # psi_stats aren't yet implemented.
if False: if False:
self.compute_psi_stats() self.compute_psi_stats()
@ -254,3 +257,176 @@ class Sympykern(Kern):
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
if False:
class Symcombine(CombinationKernel):
"""
Combine list of given sympy covariances together with the provided operations.
"""
def __init__(self, subkerns, operations, name='sympy_combine'):
super(Symcombine, self).__init__(subkerns, name)
for subkern, operation in zip(subkerns, operations):
self._sp_k += self._k_double_operate(subkern._sp_k, operation)
#def _double_operate(self, k, operation):
@Cache_this(limit=2, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None):
"""
Combine covariances with a linear operator.
"""
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None):
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target
def psi0(self, Z, variational_posterior):
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
def psi1(self, Z, variational_posterior):
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
def psi2(self, Z, variational_posterior):
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
#return psi2
# compute the "cross" terms
from static import White, Bias
from rbf import RBF
#from rbf_inv import RBFInv
from linear import Linear
#ffrom fixed import Fixed
for p1, p2 in itertools.combinations(self.parts, 2):
# i1, i2 = p1.active_dims, p2.active_dims
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
tmp = p2.psi1(Z, variational_posterior)
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z, variational_posterior)
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
tmp1 = p1.psi1(Z, variational_posterior)
tmp2 = p2.psi1(Z, variational_posterior)
psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
for p1 in self.parts:
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target = np.zeros(Z.shape)
for p1 in self.parts:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
return target
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target_mu = np.zeros(variational_posterior.shape)
target_S = np.zeros(variational_posterior.shape)
for p1 in self._parameters_:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self._parameters_:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
target_mu += a
target_S += b
return target_mu, target_S
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return super(Add, self)._getstate()
def _setstate(self, state):
super(Add, self)._setstate(state)
def add(self, other, name='sum'):
if isinstance(other, Add):
other_params = other._parameters_.copy()
for p in other_params:
other.remove_parameter(p)
self.add_parameters(*other_params)
else: self.add_parameter(other)
return self

Binary file not shown.

View file

@ -6,3 +6,17 @@ from poisson import Poisson
from student_t import StudentT from student_t import StudentT
from likelihood import Likelihood from likelihood import Likelihood
from mixed_noise import MixedNoise from mixed_noise import MixedNoise
# TODO need to fix this in a config file.
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
if sympy_available:
# These are likelihoods that rely on symbolic.
from symbolic import Symbolic
#from sstudent_t import SstudentT
from negative_binomial import Negative_binomial
#from skew_normal import Skew_normal
#from skew_exponential import Skew_exponential
#from null_category import Null_category

View file

@ -15,7 +15,7 @@ class Bernoulli(Likelihood):
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
.. Note:: .. Note::
Y is expected to take values in {-1, 1} TODO: {0, 1}?? Y takes values in either {-1, 1} or {0, 1}.
link function should have the domain [0, 1], e.g. probit (default) or Heaviside link function should have the domain [0, 1], e.g. probit (default) or Heaviside
.. See also:: .. See also::
@ -54,10 +54,10 @@ class Bernoulli(Likelihood):
""" """
if Y_i == 1: if Y_i == 1:
sign = 1. sign = 1.
elif Y_i == 0: elif Y_i == 0 or Y_i == -1:
sign = -1 sign = -1
else: else:
raise ValueError("bad value for Bernouilli observation (0, 1)") raise ValueError("bad value for Bernoulli observation (0, 1)")
if isinstance(self.gp_link, link_functions.Probit): if isinstance(self.gp_link, link_functions.Probit):
z = sign*v_i/np.sqrt(tau_i**2 + tau_i) z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z) Z_hat = std_norm_cdf(z)
@ -95,15 +95,15 @@ class Bernoulli(Likelihood):
else: else:
return np.nan return np.nan
def pdf_link(self, link_f, y, Y_metadata=None): def pdf_link(self, inv_link_f, y, Y_metadata=None):
""" """
Likelihood function given link(f) Likelihood function given inverse link of f.
.. math:: .. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
@ -113,102 +113,106 @@ class Bernoulli(Likelihood):
.. Note: .. Note:
Each y_i must be in {0, 1} Each y_i must be in {0, 1}
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#objective = (link_f**y) * ((1.-link_f)**(1.-y)) #objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y))
objective = np.where(y, link_f, 1.-link_f) objective = np.where(y, inv_link_f, 1.-inv_link_f)
return np.exp(np.sum(np.log(objective))) return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, Y_metadata=None): def logpdf_link(self, inv_link_f, y, Y_metadata=None):
""" """
Log Likelihood function given link(f) Log Likelihood function given inverse link of f.
.. math:: .. math::
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i}) \\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
:param link_f: latent variables link(f) :param inv_link_f: latent variables inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: log likelihood evaluated at points link(f) :returns: log likelihood evaluated at points inverse link of f.
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f) #objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f)
state = np.seterr(divide='ignore') state = np.seterr(divide='ignore')
objective = np.where(y==1, np.log(link_f), np.log(1-link_f)) # TODO check y \in {0, 1} or {-1, 1}
objective = np.where(y==1, np.log(inv_link_f), np.log(1-inv_link_f))
np.seterr(**state) np.seterr(**state)
return np.sum(objective) return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
""" """
Gradient of the pdf at y, given link(f) w.r.t link(f) Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.
.. math:: .. math::
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))} \\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
:param link_f: latent variables link(f) :param inv_link_f: latent variables inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: gradient of log likelihood evaluated at points link(f) :returns: gradient of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#grad = (y/link_f) - (1.-y)/(1-link_f) #grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f)
state = np.seterr(divide='ignore') state = np.seterr(divide='ignore')
grad = np.where(y, 1./link_f, -1./(1-link_f)) # TODO check y \in {0, 1} or {-1, 1}
grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f))
np.seterr(**state) np.seterr(**state)
return grad return grad
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
""" """
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j.
.. math:: .. math::
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}} \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array :rtype: Nx1 array
.. Note:: .. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2) #d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2)
state = np.seterr(divide='ignore') state = np.seterr(divide='ignore')
d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f)) # TODO check y \in {0, 1} or {-1, 1}
d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f))
np.seterr(**state) np.seterr(**state)
return d2logpdf_dlink2 return d2logpdf_dlink2
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
""" """
Third order derivative log-likelihood function at y given link(f) w.r.t link(f) Third order derivative log-likelihood function at y given inverse link of f w.r.t inverse link of f
.. math:: .. math::
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}} \\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables passed through inverse link of f.
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli :param Y_metadata: Y_metadata not used in bernoulli
:returns: third derivative of log likelihood evaluated at points link(f) :returns: third derivative of log likelihood evaluated at points inverse_link(f)
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3)) #d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3))
state = np.seterr(divide='ignore') state = np.seterr(divide='ignore')
d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3)) # TODO check y \in {0, 1} or {-1, 1}
d3logpdf_dlink3 = np.where(y, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3))
np.seterr(**state) np.seterr(**state)
return d3logpdf_dlink3 return d3logpdf_dlink3

View file

@ -16,20 +16,20 @@ class Likelihood(Parameterized):
Likelihood base class, used to defing p(y|f). Likelihood base class, used to defing p(y|f).
All instances use _inverse_ link functions, which can be swapped out. It is All instances use _inverse_ link functions, which can be swapped out. It is
expected that inherriting classes define a default inverse link function expected that inheriting classes define a default inverse link function
To use this class, inherrit and define missing functionality. To use this class, inherit and define missing functionality.
Inherriting classes *must* implement: Inheriting classes *must* implement:
pdf_link : a bound method which turns the output of the link function into the pdf pdf_link : a bound method which turns the output of the link function into the pdf
logpdf_link : the logarithm of the above logpdf_link : the logarithm of the above
To enable use with EP, inherriting classes *must* define: To enable use with EP, inheriting classes *must* define:
TODO: a suitable derivative function for any parameters of the class TODO: a suitable derivative function for any parameters of the class
It is also desirable to define: It is also desirable to define:
moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature. moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature.
To enable use with Laplace approximation, inherriting classes *must* define: To enable use with Laplace approximation, inheriting classes *must* define:
Some derivative functions *AS TODO* Some derivative functions *AS TODO*
For exact Gaussian inference, define *JH TODO* For exact Gaussian inference, define *JH TODO*
@ -159,7 +159,7 @@ class Likelihood(Parameterized):
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
""" """
Numerical approximation to the predictive variance: V(Y_star) Approximation to the predictive variance: V(Y_star)
The following variance decomposition is used: The following variance decomposition is used:
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
@ -208,28 +208,28 @@ class Likelihood(Parameterized):
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2 # V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
return exp_var + var_exp return exp_var + var_exp
def pdf_link(self, link_f, y, Y_metadata=None): def pdf_link(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def logpdf_link(self, link_f, y, Y_metadata=None): def logpdf_link(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None): def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None): def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError raise NotImplementedError
def pdf(self, f, y, Y_metadata=None): def pdf(self, f, y, Y_metadata=None):
@ -247,8 +247,8 @@ class Likelihood(Parameterized):
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
return self.pdf_link(link_f, y, Y_metadata=Y_metadata) return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
def logpdf(self, f, y, Y_metadata=None): def logpdf(self, f, y, Y_metadata=None):
""" """
@ -265,8 +265,8 @@ class Likelihood(Parameterized):
:returns: log likelihood evaluated for this point :returns: log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
return self.logpdf_link(link_f, y, Y_metadata=Y_metadata) return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
def dlogpdf_df(self, f, y, Y_metadata=None): def dlogpdf_df(self, f, y, Y_metadata=None):
""" """
@ -284,8 +284,8 @@ class Likelihood(Parameterized):
:returns: derivative of log likelihood evaluated for this point :returns: derivative of log likelihood evaluated for this point
:rtype: 1xN array :rtype: 1xN array
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
return chain_1(dlogpdf_dlink, dlink_df) return chain_1(dlogpdf_dlink, dlink_df)
@ -305,10 +305,10 @@ class Likelihood(Parameterized):
:returns: second derivative of log likelihood evaluated for this point (diagonal only) :returns: second derivative of log likelihood evaluated for this point (diagonal only)
:rtype: 1xN array :rtype: 1xN array
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata) d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2) return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
@ -328,12 +328,12 @@ class Likelihood(Parameterized):
:returns: third derivative of log likelihood evaluated for this point :returns: third derivative of log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata) d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata) d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d3link_df3 = self.gp_link.d3transf_df3(f) d3link_df3 = self.gp_link.d3transf_df3(f)
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3) return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
@ -342,10 +342,10 @@ class Likelihood(Parameterized):
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata) return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
else: else:
#Is no parameters so return an empty array for its derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([1, 0]) return np.zeros([1, 0])
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None): def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
@ -353,12 +353,12 @@ class Likelihood(Parameterized):
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
return chain_1(dlogpdf_dlink_dtheta, dlink_df) return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else: else:
#Is no parameters so return an empty array for its derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0]) return np.zeros([f.shape[0], 0])
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None): def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
@ -366,14 +366,14 @@ class Likelihood(Parameterized):
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata) d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else: else:
#Is no parameters so return an empty array for its derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0]) return np.zeros([f.shape[0], 0])
def _laplace_gradients(self, f, y, Y_metadata=None): def _laplace_gradients(self, f, y, Y_metadata=None):
@ -383,12 +383,9 @@ class Likelihood(Parameterized):
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names' #Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
# ensure we have gradients for every parameter we want to optimize # ensure we have gradients for every parameter we want to optimize
try: assert len(dlogpdf_dtheta) == self.size #1 x num_param array
assert len(dlogpdf_dtheta) == self.size #1 x num_param array assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix
assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
except Exception as e:
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
@ -411,7 +408,10 @@ class Likelihood(Parameterized):
#compute the quantiles by sampling!!! #compute the quantiles by sampling!!!
N_samp = 1000 N_samp = 1000
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
#ss_f = s.flatten()
#ss_y = self.samples(ss_f, Y_metadata)
ss_y = self.samples(s, Y_metadata) ss_y = self.samples(s, Y_metadata)
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles] return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]

View file

@ -71,6 +71,7 @@ class Probit(GPTransformation):
g(f) = \\Phi^{-1} (mu) g(f) = \\Phi^{-1} (mu)
""" """
def transf(self,f): def transf(self,f):
return std_norm_cdf(f) return std_norm_cdf(f)

View file

@ -0,0 +1,46 @@
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
try:
import sympy as sym
sympy_available=True
from sympy.utilities.lambdify import lambdify
from GPy.util.symbolic import gammaln, ln_cum_gaussian, cum_gaussian
except ImportError:
sympy_available=False
import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions
from symbolic import Symbolic
from scipy import stats
if sympy_available:
class Negative_binomial(Symbolic):
"""
Negative binomial
.. math::
p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
.. Note::
Y takes non zero integer values..
link function should have a positive domain, e.g. log (default).
.. See also::
symbolic.py, for the parent class
"""
def __init__(self, gp_link=None):
if gp_link is None:
gp_link = link_functions.Log()
dispersion = sym.Symbol('dispersion', positive=True, real=True)
y = sym.Symbol('y', nonnegative=True, integer=True)
f = sym.Symbol('f', positive=True, real=True)
log_pdf=dispersion*sym.log(dispersion) - (dispersion+y)*sym.log(dispersion+f) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*sym.log(f)
super(Negative_binomial, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Negative_binomial')
# TODO: Check this.
self.log_concave = False

View file

@ -26,8 +26,8 @@ class StudentT(Likelihood):
gp_link = link_functions.Identity() gp_link = link_functions.Identity()
super(StudentT, self).__init__(gp_link, name='Student_T') super(StudentT, self).__init__(gp_link, name='Student_T')
# sigma2 is not a noise parameter, it is a squared scale.
self.sigma2 = Param('t_noise', float(sigma2), Logexp()) self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
self.v = Param('deg_free', float(deg_free)) self.v = Param('deg_free', float(deg_free))
self.add_parameter(self.sigma2) self.add_parameter(self.sigma2)
self.add_parameter(self.v) self.add_parameter(self.v)
@ -46,23 +46,23 @@ class StudentT(Likelihood):
self.sigma2.gradient = grads[0] self.sigma2.gradient = grads[0]
self.v.gradient = grads[1] self.v.gradient = grads[1]
def pdf_link(self, link_f, y, Y_metadata=None): def pdf_link(self, inv_link_f, y, Y_metadata=None):
""" """
Likelihood function given link(f) Likelihood function given link(f)
.. math:: .. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
#Careful gamma(big_number) is infinity! #Careful gamma(big_number) is infinity!
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5)) objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
/ (np.sqrt(self.v * np.pi * self.sigma2))) / (np.sqrt(self.v * np.pi * self.sigma2)))
@ -70,15 +70,15 @@ class StudentT(Likelihood):
) )
return np.prod(objective) return np.prod(objective)
def logpdf_link(self, link_f, y, Y_metadata=None): def logpdf_link(self, inv_link_f, y, Y_metadata=None):
""" """
Log Likelihood Function given link(f) Log Likelihood Function given link(f)
.. math:: .. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right) \\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
:param link_f: latent variables (link(f)) :param inv_link_f: latent variables (link(f))
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
@ -86,11 +86,11 @@ class StudentT(Likelihood):
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
#FIXME: #FIXME:
#Why does np.log(1 + (1/self.v)*((y-link_f)**2)/self.sigma2) suppress the divide by zero?! #Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
#But np.log(1 + (1/float(self.v))*((y-link_f)**2)/self.sigma2) throws it correctly #But np.log(1 + (1/float(self.v))*((y-inv_link_f)**2)/self.sigma2) throws it correctly
#print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) #print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
objective = (+ gammaln((self.v + 1) * 0.5) objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5) - gammaln(self.v * 0.5)
@ -99,15 +99,15 @@ class StudentT(Likelihood):
) )
return np.sum(objective) return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
""" """
Gradient of the log likelihood function at y, given link(f) w.r.t link(f) Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math:: .. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v} \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
:param link_f: latent variables (f) :param inv_link_f: latent variables (f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
@ -115,12 +115,12 @@ class StudentT(Likelihood):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad return grad
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
""" """
Hessian at y, given link(f), w.r.t link(f) Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -129,8 +129,8 @@ class StudentT(Likelihood):
.. math:: .. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}} \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables inv_link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
@ -141,90 +141,90 @@ class StudentT(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess return hess
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
""" """
Third order derivative log-likelihood function at y given link(f) w.r.t link(f) Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math:: .. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3} \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
:param link_f: latent variables link(f) :param inv_link_f: latent variables link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3) ((e**2 + self.sigma2*self.v)**3)
) )
return d3lik_dlink3 return d3lik_dlink3
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None): def dlogpdf_link_dvar(self, inv_link_f, y, Y_metadata=None):
""" """
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
.. math:: .. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})} \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
:param link_f: latent variables link(f) :param inv_link_f: latent variables link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return np.sum(dlogpdf_dvar) return np.sum(dlogpdf_dvar)
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None): def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
""" """
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise) Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
.. math:: .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2} \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
:param link_f: latent variables link_f :param inv_link_f: latent variables inv_link_f
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlogpdf_dlink_dvar return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None): def d2logpdf_dlink2_dvar(self, inv_link_f, y, Y_metadata=None):
""" """
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise) Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
.. math:: .. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}} \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
:param link_f: latent variables link(f) :param inv_link_f: latent variables link(f)
:type link_f: Nx1 array :type inv_link_f: Nx1 array
:param y: data :param y: data
:type y: Nx1 array :type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution :param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - inv_link_f
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ ((self.sigma2*self.v + (e**2))**3) / ((self.sigma2*self.v + (e**2))**3)
) )
@ -246,11 +246,12 @@ class StudentT(Likelihood):
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def predictive_mean(self, mu, sigma, Y_metadata=None): def predictive_mean(self, mu, sigma, Y_metadata=None):
return self.gp_link.transf(mu) # only true in link is monotoci, which it is. # The comment here confuses mean and median.
return self.gp_link.transf(mu) # only true if link is monotonic, which it is.
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
if self.deg_free <2.: if self.deg_free<=2.:
return np.empty(mu.shape)*np.nan #not defined for small degress fo freedom return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2.
else: else:
return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata) return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)

298
GPy/likelihoods/symbolic.py Normal file
View file

@ -0,0 +1,298 @@
# Copyright (c) 2014 GPy Authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)
try:
import sympy as sym
sympy_available=True
from sympy.utilities.lambdify import lambdify
except ImportError:
sympy_available=False
import numpy as np
import link_functions
from scipy import stats, integrate
from scipy.special import gammaln, gamma, erf, polygamma
from GPy.util.functions import cum_gaussian, ln_cum_gaussian
from likelihood import Likelihood
from ..core.parameterization import Param
if sympy_available:
class Symbolic(Likelihood):
"""
Symbolic likelihood.
Likelihood where the form of the likelihood is provided by a sympy expression.
"""
def __init__(self, log_pdf=None, logZ=None, missing_log_pdf=None, gp_link=None, name='symbolic', log_concave=False, param=None, func_modules=[]):
if gp_link is None:
gp_link = link_functions.Identity()
if log_pdf is None:
raise ValueError, "You must provide an argument for the log pdf."
self.func_modules = func_modules + [{'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma, 'cum_gaussian':cum_gaussian, 'ln_cum_gaussian':ln_cum_gaussian}, 'numpy']
super(Symbolic, self).__init__(gp_link, name=name)
self.missing_data = False
self._sym_log_pdf = log_pdf
if missing_log_pdf:
self.missing_data = True
self._sym_missing_log_pdf = missing_log_pdf
# pull the variable names out of the symbolic pdf
sym_vars = [e for e in self._sym_log_pdf.atoms() if e.is_Symbol]
self._sym_f = [e for e in sym_vars if e.name=='f']
if not self._sym_f:
raise ValueError('No variable f in log pdf.')
self._sym_y = [e for e in sym_vars if e.name=='y']
if not self._sym_y:
raise ValueError('No variable y in log pdf.')
self._sym_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name)
theta_names = [theta.name for theta in self._sym_theta]
if self.missing_data:
# pull the variable names out of missing data
sym_vars = [e for e in self._sym_missing_log_pdf.atoms() if e.is_Symbol]
sym_f = [e for e in sym_vars if e.name=='f']
if not sym_f:
raise ValueError('No variable f in missing log pdf.')
sym_y = [e for e in sym_vars if e.name=='y']
if sym_y:
raise ValueError('Data is present in missing data portion of likelihood.')
# additional missing data parameters
missing_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='missing' or e.name in theta_names)],key=lambda e:e.name)
self._sym_theta += missing_theta
self._sym_theta = sorted(self._sym_theta, key=lambda e:e.name)
# These are all the arguments need to compute likelihoods.
self.arg_list = self._sym_y + self._sym_f + self._sym_theta
# these are arguments for computing derivatives.
derivative_arguments = self._sym_f + self._sym_theta
# Do symbolic work to compute derivatives.
self._log_pdf_derivatives = {theta.name : sym.diff(self._sym_log_pdf,theta).simplify() for theta in derivative_arguments}
self._log_pdf_second_derivatives = {theta.name : sym.diff(self._log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments}
self._log_pdf_third_derivatives = {theta.name : sym.diff(self._log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments}
if self.missing_data:
# Do symbolic work to compute derivatives.
self._missing_log_pdf_derivatives = {theta.name : sym.diff(self._sym_missing_log_pdf,theta).simplify() for theta in derivative_arguments}
self._missing_log_pdf_second_derivatives = {theta.name : sym.diff(self._missing_log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments}
self._missing_log_pdf_third_derivatives = {theta.name : sym.diff(self._missing_log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments}
# Add parameters to the model.
for theta in self._sym_theta:
val = 1.0
# TODO: need to decide how to handle user passing values for the se parameter vectors.
if param is not None:
if param.has_key(theta.name):
val = param[theta.name]
setattr(self, theta.name, Param(theta.name, val, None))
self.add_parameters(getattr(self, theta.name))
# Is there some way to check whether the pdf is log
# concave? For the moment, need user to specify.
self.log_concave = log_concave
# initialise code arguments
self._arguments = {}
# generate the code for the pdf and derivatives
self._gen_code()
def _gen_code(self):
"""Generate the code from the symbolic parts that will be used for likleihod computation."""
# TODO: Check here whether theano is available and set up
# functions accordingly.
self._log_pdf_function = lambdify(self.arg_list, self._sym_log_pdf, self.func_modules)
# compute code for derivatives (for implicit likelihood terms
# we need up to 3rd derivatives)
setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], self.func_modules) for key in self._log_pdf_derivatives.keys()})
setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], self.func_modules) for key in self._log_pdf_second_derivatives.keys()})
setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], self.func_modules) for key in self._log_pdf_third_derivatives.keys()})
if self.missing_data:
setattr(self, '_missing_first_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_derivatives[key], self.func_modules) for key in self._missing_log_pdf_derivatives.keys()})
setattr(self, '_missing_second_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_second_derivatives[key], self.func_modules) for key in self._missing_log_pdf_second_derivatives.keys()})
setattr(self, '_missing_third_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_third_derivatives[key], self.func_modules) for key in self._missing_log_pdf_third_derivatives.keys()})
# TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta
def parameters_changed(self):
pass
def update_gradients(self, grads):
"""
Pull out the gradients, be careful as the order must match the order
in which the parameters are added
"""
# The way the Laplace approximation is run requires the
# covariance function to compute the true gradient (because it
# is dependent on the mode). This means we actually compute
# the gradient outside this object. This function would
# normally ask the object to update its gradients internally,
# but here it provides them externally, because they are
# computed in the inference code. TODO: Thought: How does this
# effect EP? Shouldn't this be done by a separate
# Laplace-approximation specific call?
for grad, theta in zip(grads, self._sym_theta):
parameter = getattr(self, theta.name)
setattr(parameter, 'gradient', grad)
def _arguments_update(self, f, y):
"""Set up argument lists for the derivatives."""
# If we do make use of Theano, then at this point we would
# need to do a lot of precomputation to ensure that the
# likelihoods and gradients are computed together, then check
# for parameter changes before updating.
for i, fvar in enumerate(self._sym_f):
self._arguments[fvar.name] = f
for i, yvar in enumerate(self._sym_y):
self._arguments[yvar.name] = y
for theta in self._sym_theta:
self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
def pdf_link(self, inv_link_f, y, Y_metadata=None):
"""
Likelihood function given inverse link of f.
:param inv_link_f: inverse link of latent variables.
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
return np.exp(self.logpdf_link(inv_link_f, y, Y_metadata=None))
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
"""
Log Likelihood Function given inverse link of latent variables.
:param inv_inv_link_f: latent variables (inverse link of f)
:type inv_inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
if self.missing_data:
ll = np.where(np.isnan(y), self._missing_log_pdf_function(**self._missing_arguments), self._log_pdf_function(**self._arguments))
else:
ll = np.where(np.isnan(y), 0., self._log_pdf_function(**self._arguments))
return np.sum(ll)
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
"""
Gradient of log likelihood with respect to the inverse link function.
:param inv_inv_link_f: latent variables (inverse link of f)
:type inv_inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata
:returns: gradient of likelihood with respect to each point.
:rtype: Nx1 array
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
if self.missing_data:
return np.where(np.isnan(y), self._missing_first_derivative_code['f'](**self._missing_argments), self._first_derivative_code['f'](**self._argments))
else:
return np.where(np.isnan(y), 0., self._first_derivative_code['f'](**self._arguments))
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
"""
Hessian of log likelihood given inverse link of latent variables with respect to that inverse link.
i.e. second derivative logpdf at y given inv_link(f_i) and inv_link(f_j) w.r.t inv_link(f_i) and inv_link(f_j).
:param inv_link_f: inverse link of the latent variables.
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: Diagonal of Hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Returns diagonal of Hessian, since every where else it is
0, as the likelihood factorizes over cases (the
distribution for y_i depends only on link(f_i) not on
link(f_(j!=i))
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
if self.missing_data:
return np.where(np.isnan(y), self._missing_second_derivative_code['f'](**self._missing_argments), self._second_derivative_code['f'](**self._argments))
else:
return np.where(np.isnan(y), 0., self._second_derivative_code['f'](**self._arguments))
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
if self.missing_data:
return np.where(np.isnan(y), self._missing_third_derivative_code['f'](**self._missing_argments), self._third_derivative_code['f'](**self._argments))
else:
return np.where(np.isnan(y), 0., self._third_derivative_code['f'](**self._arguments))
def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None):
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
for i, theta in enumerate(self._sym_theta):
if self.missing_data:
g[:, i:i+1] = np.where(np.isnan(y), self._missing_first_derivative_code[theta.name](**self._arguments), self._first_derivative_code[theta.name](**self._arguments))
else:
g[:, i:i+1] = np.where(np.isnan(y), 0., self._first_derivative_code[theta.name](**self._arguments))
return g.sum(0)
def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
for i, theta in enumerate(self._sym_theta):
if self.missing_data:
g[:, i:i+1] = np.where(np.isnan(y), self._missing_second_derivative_code[theta.name](**self._arguments), self._second_derivative_code[theta.name](**self._arguments))
else:
g[:, i:i+1] = np.where(np.isnan(y), 0., self._second_derivative_code[theta.name](**self._arguments))
return g
def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None):
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
self._arguments_update(inv_link_f, y)
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
for i, theta in enumerate(self._sym_theta):
if self.missing_data:
g[:, i:i+1] = np.where(np.isnan(y), self._missing_third_derivative_code[theta.name](**self._arguments), self._third_derivative_code[theta.name](**self._arguments))
else:
g[:, i:i+1] = np.where(np.isnan(y), 0., self._third_derivative_code[theta.name](**self._arguments))
return g
def predictive_mean(self, mu, sigma, Y_metadata=None):
raise NotImplementedError
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
raise NotImplementedError
def conditional_mean(self, gp):
raise NotImplementedError
def conditional_variance(self, gp):
raise NotImplementedError
def samples(self, gp, Y_metadata=None):
raise NotImplementedError

View file

@ -27,7 +27,10 @@ class BayesianGPLVM(SparseGP):
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs): Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
if X == None: if X == None:
from ..util.initialization import initialize_latent from ..util.initialization import initialize_latent
X = initialize_latent(init, input_dim, Y) X, fracs = initialize_latent(init, input_dim, Y)
else:
fracs = np.ones(input_dim)
self.init = init self.init = init
if X_variance is None: if X_variance is None:
@ -39,7 +42,7 @@ class BayesianGPLVM(SparseGP):
assert Z.shape[1] == X.shape[1] assert Z.shape[1] == X.shape[1]
if kernel is None: if kernel is None:
kernel = kern.RBF(input_dim) # + kern.white(input_dim) kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
if likelihood is None: if likelihood is None:
likelihood = Gaussian() likelihood = Gaussian()
@ -48,21 +51,17 @@ class BayesianGPLVM(SparseGP):
self.variational_prior = NormalPrior() self.variational_prior = NormalPrior()
X = NormalPosterior(X, X_variance) X = NormalPosterior(X, X_variance)
if inference_method is None:
if np.any(np.isnan(Y)):
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
inference_method = VarDTCMissingData()
else:
from ..inference.latent_function_inference.var_dtc import VarDTC
inference_method = VarDTC()
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.add_parameter(self.X, index=0) self.add_parameter(self.X, index=0)
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return SparseGP._getstate(self) + [self.init]
def _setstate(self, state):
self._const_jitter = None
self.init = state.pop()
SparseGP._setstate(self, state)
def set_X_gradients(self, X, X_grad): def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form.""" """Set the gradients of the posterior distribution of X in its specific form."""
X.mean.gradient, X.variance.gradient = X_grad X.mean.gradient, X.variance.gradient = X_grad

View file

@ -2,10 +2,10 @@
# Copyright (c) 2013, the GPy Authors (see AUTHORS.txt) # Copyright (c) 2013, the GPy Authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP from ..core import GP
from .. import likelihoods from .. import likelihoods
from .. import kern from .. import kern
from ..inference.latent_function_inference.expectation_propagation import EP
class GPClassification(GP): class GPClassification(GP):
""" """
@ -27,4 +27,4 @@ class GPClassification(GP):
likelihood = likelihoods.Bernoulli() likelihood = likelihoods.Bernoulli()
GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, name='gp_classification') GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=EP(), name='gp_classification')

View file

@ -29,8 +29,3 @@ class GPRegression(GP):
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata) super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata)
def _getstate(self):
return GP._getstate(self)
def _setstate(self, state):
return GP._setstate(self, state)

View file

@ -5,7 +5,6 @@
import numpy as np import numpy as np
import pylab as pb import pylab as pb
from .. import kern from .. import kern
from ..util.linalg import PCA
from ..core import GP, Param from ..core import GP, Param
from ..likelihoods import Gaussian from ..likelihoods import Gaussian
from .. import util from .. import util
@ -29,9 +28,11 @@ class GPLVM(GP):
""" """
if X is None: if X is None:
from ..util.initialization import initialize_latent from ..util.initialization import initialize_latent
X = initialize_latent(init, input_dim, Y) X, fracs = initialize_latent(init, input_dim, Y)
else:
fracs = np.ones(input_dim)
if kernel is None: if kernel is None:
kernel = kern.RBF(input_dim, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2)) kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2))
likelihood = Gaussian() likelihood = Gaussian()
@ -43,12 +44,6 @@ class GPLVM(GP):
super(GPLVM, self).parameters_changed() super(GPLVM, self).parameters_changed()
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None) self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
def _getstate(self):
return GP._getstate(self)
def _setstate(self, state):
GP._setstate(self, state)
def jacobian(self,X): def jacobian(self,X):
target = np.zeros((X.shape[0],X.shape[1],self.output_dim)) target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
for i in range(self.output_dim): for i in range(self.output_dim):

View file

@ -6,12 +6,12 @@ import itertools
import pylab import pylab
from ..core import Model from ..core import Model
from ..util.linalg import PCA
from ..kern import Kern from ..kern import Kern
from ..core.parameterization.variational import NormalPosterior, NormalPrior from ..core.parameterization.variational import NormalPosterior, NormalPrior
from ..core.parameterization import Param, Parameterized from ..core.parameterization import Param, Parameterized
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC
from ..likelihoods import Gaussian from ..likelihoods import Gaussian
from GPy.util.initialization import initialize_latent
class MRD(Model): class MRD(Model):
""" """
@ -51,27 +51,31 @@ class MRD(Model):
inference_method=None, likelihood=None, name='mrd', Ynames=None): inference_method=None, likelihood=None, name='mrd', Ynames=None):
super(MRD, self).__init__(name) super(MRD, self).__init__(name)
# sort out the kernels
if kernel is None:
from ..kern import RBF
self.kern = [RBF(input_dim, ARD=1, name='rbf'.format(i)) for i in range(len(Ylist))]
elif isinstance(kernel, Kern):
self.kern = [kernel.copy(name='{}'.format(kernel.name, i)) for i in range(len(Ylist))]
else:
assert len(kernel) == len(Ylist), "need one kernel per output"
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
self.kern = kernel
self.input_dim = input_dim self.input_dim = input_dim
self.num_inducing = num_inducing self.num_inducing = num_inducing
self.Ylist = Ylist self.Ylist = Ylist
self._in_init_ = True self._in_init_ = True
X = self._init_X(initx, Ylist) X, fracs = self._init_X(initx, Ylist)
self.Z = Param('inducing inputs', self._init_Z(initz, X)) self.Z = Param('inducing inputs', self._init_Z(initz, X))
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
# sort out the kernels
if kernel is None:
from ..kern import RBF
self.kern = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))]
elif isinstance(kernel, Kern):
self.kern = []
for i in range(len(Ylist)):
k = kernel.copy()
self.kern.append(k)
else:
assert len(kernel) == len(Ylist), "need one kernel per output"
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
self.kern = kernel
if X_variance is None: if X_variance is None:
X_variance = np.random.uniform(0, .2, X.shape) X_variance = np.random.uniform(0.1, 0.2, X.shape)
self.variational_prior = NormalPrior() self.variational_prior = NormalPrior()
self.X = NormalPosterior(X, X_variance) self.X = NormalPosterior(X, X_variance)
@ -107,9 +111,8 @@ class MRD(Model):
def parameters_changed(self): def parameters_changed(self):
self._log_marginal_likelihood = 0 self._log_marginal_likelihood = 0
self.posteriors = [] self.posteriors = []
self.Z.gradient = 0. self.Z.gradient[:] = 0.
self.X.mean.gradient = 0. self.X.gradient[:] = 0.
self.X.variance.gradient = 0.
for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method): for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method):
posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y) posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
@ -147,14 +150,22 @@ class MRD(Model):
if Ylist is None: if Ylist is None:
Ylist = self.Ylist Ylist = self.Ylist
if init in "PCA_concat": if init in "PCA_concat":
X = PCA(np.hstack(Ylist), self.input_dim)[0] X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist))
fracs = [fracs]*self.input_dim
elif init in "PCA_single": elif init in "PCA_single":
X = np.zeros((Ylist[0].shape[0], self.input_dim)) X = np.zeros((Ylist[0].shape[0], self.input_dim))
fracs = []
for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist): for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
X[:, qs] = PCA(Y, len(qs))[0] x,frcs = initialize_latent('PCA', len(qs), Y)
X[:, qs] = x
fracs.append(frcs)
else: # init == 'random': else: # init == 'random':
X = np.random.randn(Ylist[0].shape[0], self.input_dim) X = np.random.randn(Ylist[0].shape[0], self.input_dim)
return X fracs = X.var(0)
fracs = [fracs]*self.input_dim
X -= X.mean()
X /= X.std()
return X, fracs
def _init_Z(self, init="permute", X=None): def _init_Z(self, init="permute", X=None):
if X is None: if X is None:

View file

@ -46,11 +46,3 @@ class SparseGPClassification(SparseGP):
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X) SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
self.ensure_default_constraints() self.ensure_default_constraints()
def _getstate(self):
return SparseGP._getstate(self)
def _setstate(self, state):
return SparseGP._setstate(self, state)
pass

View file

@ -51,14 +51,6 @@ class SparseGPRegression(SparseGP):
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC()) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC())
def _getstate(self):
return SparseGP._getstate(self)
def _setstate(self, state):
return SparseGP._setstate(self, state)
class SparseGPRegressionUncertainInput(SparseGP): class SparseGPRegressionUncertainInput(SparseGP):
""" """
Gaussian Process model for regression with Gaussian variance on the inputs (X_variance) Gaussian Process model for regression with Gaussian variance on the inputs (X_variance)

View file

@ -28,14 +28,6 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing) SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
self.ensure_default_constraints() self.ensure_default_constraints()
def _getstate(self):
return SparseGPRegression._getstate(self)
def _setstate(self, state):
return SparseGPRegression._setstate(self, state)
def _get_param_names(self): def _get_param_names(self):
return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
+ SparseGPRegression._get_param_names(self)) + SparseGPRegression._get_param_names(self))

View file

@ -43,10 +43,3 @@ class SVIGPRegression(SVIGP):
SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize) SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
self.load_batch() self.load_batch()
def _getstate(self):
return GPBase._getstate(self)
def _setstate(self, state):
return GPBase._setstate(self, state)

View file

@ -30,14 +30,6 @@ class WarpedGP(GP):
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params()) self._set_params(self._get_params())
def _getstate(self):
return GP._getstate(self)
def _setstate(self, state):
return GP._setstate(self, state)
def _scale_data(self, Y): def _scale_data(self, Y):
self._Ymax = Y.max() self._Ymax = Y.max()
self._Ymin = Y.min() self._Ymin = Y.min()

View file

@ -53,8 +53,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_rows = slice(None) which_data_rows = slice(None)
if which_data_ycols == 'all': if which_data_ycols == 'all':
which_data_ycols = np.arange(model.output_dim) which_data_ycols = np.arange(model.output_dim)
if len(which_data_ycols)==0: #if len(which_data_ycols)==0:
raise ValueError('No data selected for plotting') #raise ValueError('No data selected for plotting')
if ax is None: if ax is None:
fig = pb.figure(num=fignum) fig = pb.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)

View file

@ -33,6 +33,8 @@ class Test(unittest.TestCase):
self.assertListEqual(self.param_index[one].tolist(), [3]) self.assertListEqual(self.param_index[one].tolist(), [3])
self.assertListEqual(self.param_index.remove('not in there', [2,3,4]).tolist(), []) self.assertListEqual(self.param_index.remove('not in there', [2,3,4]).tolist(), [])
self.assertListEqual(self.view.remove('not in there', [2,3,4]).tolist(), [])
def test_shift_left(self): def test_shift_left(self):
self.view.shift_left(0, 2) self.view.shift_left(0, 2)
self.assertListEqual(self.param_index[three].tolist(), [2,5]) self.assertListEqual(self.param_index[three].tolist(), [2,5])
@ -82,6 +84,10 @@ class Test(unittest.TestCase):
self.assertEqual(self.param_index.size, 6) self.assertEqual(self.param_index.size, 6)
self.assertEqual(self.view.size, 5) self.assertEqual(self.view.size, 5)
def test_print(self):
print self.param_index
print self.view
if __name__ == "__main__": if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_index_view'] #import sys;sys.argv = ['', 'Test.test_index_view']
unittest.main() unittest.main()

View file

@ -5,9 +5,11 @@ import unittest
import numpy as np import numpy as np
import GPy import GPy
import sys import sys
from GPy.core.parameterization.param import Param
verbose = 0 verbose = 0
np.random.seed(50)
class Kern_check_model(GPy.core.Model): class Kern_check_model(GPy.core.Model):
@ -29,7 +31,7 @@ class Kern_check_model(GPy.core.Model):
dL_dK = np.ones((X.shape[0], X2.shape[0])) dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel = kernel self.kernel = kernel
self.X = GPy.core.parameterization.Param('X',X) self.X = X
self.X2 = X2 self.X2 = X2
self.dL_dK = dL_dK self.dL_dK = dL_dK
@ -76,10 +78,11 @@ class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """ """This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.X = Param('X',X)
self.add_parameter(self.X) self.add_parameter(self.X)
def parameters_changed(self): def parameters_changed(self):
self.X.gradient = self.kernel.gradients_X(self.dL_dK, self.X, self.X2) self.X.gradient[:] = self.kernel.gradients_X(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dX(Kern_check_dK_dX): class Kern_check_dKdiag_dX(Kern_check_dK_dX):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """ """This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
@ -90,7 +93,7 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum() return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
def parameters_changed(self): def parameters_changed(self):
self.X.gradient = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X) self.X.gradient[:] = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X)
@ -126,6 +129,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if not result: if not result:
print("Positive definite check failed for " + kern.name + " covariance function.") print("Positive definite check failed for " + kern.name + " covariance function.")
pass_checks = False pass_checks = False
assert(result)
return False return False
if verbose: if verbose:
@ -137,6 +141,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True) Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False pass_checks = False
assert(result)
return False return False
if verbose: if verbose:
@ -148,6 +153,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True) Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False pass_checks = False
assert(result)
return False return False
if verbose: if verbose:
@ -164,6 +170,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True) Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
pass_checks = False pass_checks = False
assert(result)
return False return False
if verbose: if verbose:
@ -182,6 +189,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if not result: if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
testmodel.checkgrad(verbose=True) testmodel.checkgrad(verbose=True)
import ipdb;ipdb.set_trace()
assert(result)
pass_checks = False pass_checks = False
return False return False
@ -201,6 +210,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if not result: if not result:
print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
testmodel.checkgrad(verbose=True) testmodel.checkgrad(verbose=True)
assert(result)
pass_checks = False pass_checks = False
return False return False
@ -218,6 +228,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True) Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
pass_checks = False pass_checks = False
assert(result)
return False return False
return pass_checks return pass_checks
@ -226,7 +237,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
class KernelGradientTestsContinuous(unittest.TestCase): class KernelGradientTestsContinuous(unittest.TestCase):
def setUp(self): def setUp(self):
self.N, self.D = 100, 5 self.N, self.D = 10, 5
self.X = np.random.randn(self.N,self.D) self.X = np.random.randn(self.N,self.D)
self.X2 = np.random.randn(self.N+10,self.D) self.X2 = np.random.randn(self.N+10,self.D)
@ -243,6 +254,17 @@ class KernelGradientTestsContinuous(unittest.TestCase):
k.randomize() k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Prod2(self):
k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Prod3(self):
k = GPy.kern.Matern32(2, active_dims=[2,3]) * (GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D))
k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Add(self): def test_Add(self):
k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
k += GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) k += GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
@ -276,6 +298,11 @@ class KernelGradientTestsContinuous(unittest.TestCase):
k.randomize() k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_LinearFull(self):
k = GPy.kern.LinearFull(self.D, self.D-1)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
#TODO: turn off grad checkingwrt X for indexed kernels like coregionalize #TODO: turn off grad checkingwrt X for indexed kernels like coregionalize
# class KernelGradientTestsContinuous1D(unittest.TestCase): # class KernelGradientTestsContinuous1D(unittest.TestCase):
# def setUp(self): # def setUp(self):
@ -356,25 +383,25 @@ class KernelTestsNonContinuous(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests, please be (very) patient..." print "Running unit tests, please be (very) patient..."
#unittest.main() unittest.main()
np.random.seed(0) # np.random.seed(0)
N0 = 3 # N0 = 3
N1 = 9 # N1 = 9
N2 = 4 # N2 = 4
N = N0+N1+N2 # N = N0+N1+N2
D = 3 # D = 3
X = np.random.randn(N, D+1) # X = np.random.randn(N, D+1)
indices = np.random.random_integers(0, 2, size=N) # indices = np.random.random_integers(0, 2, size=N)
X[indices==0, -1] = 0 # X[indices==0, -1] = 0
X[indices==1, -1] = 1 # X[indices==1, -1] = 1
X[indices==2, -1] = 2 # X[indices==2, -1] = 2
#X = X[X[:, -1].argsort(), :] # #X = X[X[:, -1].argsort(), :]
X2 = np.random.randn((N0+N1)*2, D+1) # X2 = np.random.randn((N0+N1)*2, D+1)
X2[:(N0*2), -1] = 0 # X2[:(N0*2), -1] = 0
X2[(N0*2):, -1] = 1 # X2[(N0*2):, -1] = 1
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] # k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') # kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) # assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
k = GPy.kern.RBF(D) # k = GPy.kern.RBF(D)
kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') # kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) # assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))

View file

@ -112,7 +112,7 @@ class TestNoiseModels(object):
self.f = None self.f = None
self.X = None self.X = None
def test_noise_models(self): def test_scale2_models(self):
self.setUp() self.setUp()
#################################################### ####################################################
@ -150,64 +150,64 @@ class TestNoiseModels(object):
noise_models = {"Student_t_default": { noise_models = {"Student_t_default": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": { "grad_params": {
"names": [".*t_noise"], "names": [".*t_scale2"],
"vals": [self.var], "vals": [self.var],
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
#"constraints": [("t_noise", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))] #"constraints": [("t_scale2", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))]
}, },
"laplace": True "laplace": True
}, },
"Student_t_1_var": { "Student_t_1_var": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": { "grad_params": {
"names": [".*t_noise"], "names": [".*t_scale2"],
"vals": [1.0], "vals": [1.0],
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
}, },
"laplace": True "laplace": True
}, },
"Student_t_small_deg_free": { "Student_t_small_deg_free": {
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var), "model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
"grad_params": { "grad_params": {
"names": [".*t_noise"], "names": [".*t_scale2"],
"vals": [self.var], "vals": [self.var],
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
}, },
"laplace": True "laplace": True
}, },
"Student_t_small_var": { "Student_t_small_var": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": { "grad_params": {
"names": [".*t_noise"], "names": [".*t_scale2"],
"vals": [0.001], "vals": [0.001],
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
}, },
"laplace": True "laplace": True
}, },
"Student_t_large_var": { "Student_t_large_var": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": { "grad_params": {
"names": [".*t_noise"], "names": [".*t_scale2"],
"vals": [10.0], "vals": [10.0],
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
}, },
"laplace": True "laplace": True
}, },
"Student_t_approx_gauss": { "Student_t_approx_gauss": {
"model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var), "model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
"grad_params": { "grad_params": {
"names": [".*t_noise"], "names": [".*t_scale2"],
"vals": [self.var], "vals": [self.var],
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
}, },
"laplace": True "laplace": True
}, },
"Student_t_log": { "Student_t_log": {
"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var), "model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
"grad_params": { "grad_params": {
"names": [".*t_noise"], "names": [".*t_scale2"],
"vals": [self.var], "vals": [self.var],
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
}, },
"laplace": True "laplace": True
}, },

View file

@ -130,7 +130,14 @@ class MiscTests(unittest.TestCase):
m2.kern[:] = m.kern[''].values() m2.kern[:] = m.kern[''].values()
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
class GradientTests(unittest.TestCase): def test_model_optimize(self):
X = np.random.uniform(-3., 3., (20, 1))
Y = np.sin(X) + np.random.randn(20, 1) * 0.05
m = GPy.models.GPRegression(X,Y)
m.optimize()
print m
class GradientTests(np.testing.TestCase):
def setUp(self): def setUp(self):
###################################### ######################################
# # 1 dimensional example # # 1 dimensional example

View file

@ -93,12 +93,12 @@ class Test(unittest.TestCase):
def test_set_params(self): def test_set_params(self):
self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet') self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet')
self.par._param_array_[:] = 1 self.par.param_array[:] = 1
self.par._trigger_params_changed() self.par._trigger_params_changed()
self.assertEqual(self.par.params_changed_count, 1, 'now params changed') self.assertEqual(self.par.params_changed_count, 1, 'now params changed')
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count) self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
self.par._param_array_[:] = 2 self.par.param_array[:] = 2
self.par._trigger_params_changed() self.par._trigger_params_changed()
self.assertEqual(self.par.params_changed_count, 2, 'now params changed') self.assertEqual(self.par.params_changed_count, 2, 'now params changed')
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count) self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)

View file

@ -7,7 +7,7 @@ import unittest
import GPy import GPy
import numpy as np import numpy as np
from GPy.core.parameterization.parameter_core import HierarchyError from GPy.core.parameterization.parameter_core import HierarchyError
from GPy.core.parameterization.array_core import ObsAr from GPy.core.parameterization.observable_array import ObsAr
class ArrayCoreTest(unittest.TestCase): class ArrayCoreTest(unittest.TestCase):
def setUp(self): def setUp(self):
@ -34,6 +34,7 @@ class ParameterizedTest(unittest.TestCase):
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1)) self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
self.test1 = GPy.core.Parameterized("test model") self.test1 = GPy.core.Parameterized("test model")
self.test1.param = self.param
self.test1.kern = self.rbf+self.white self.test1.kern = self.rbf+self.white
self.test1.add_parameter(self.test1.kern) self.test1.add_parameter(self.test1.kern)
self.test1.add_parameter(self.param, 0) self.test1.add_parameter(self.param, 0)
@ -58,6 +59,9 @@ class ParameterizedTest(unittest.TestCase):
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED]) self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
self.test1.kern.rbf.fix() self.test1.kern.rbf.fix()
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3) self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3)
self.test1.fix()
self.assertTrue(self.test1.is_fixed)
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*self.test1.size)
def test_remove_parameter(self): def test_remove_parameter(self):
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
@ -86,9 +90,9 @@ class ParameterizedTest(unittest.TestCase):
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), range(self.param.size, self.param.size+self.rbf.size)) self.assertListEqual(self.test1.constraints[Logexp()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
def test_remove_parameter_param_array_grad_array(self): def test_remove_parameter_param_array_grad_array(self):
val = self.test1.kern._param_array_.copy() val = self.test1.kern.param_array.copy()
self.test1.kern.remove_parameter(self.white) self.test1.kern.remove_parameter(self.white)
self.assertListEqual(self.test1.kern._param_array_.tolist(), val[:2].tolist()) self.assertListEqual(self.test1.kern.param_array.tolist(), val[:2].tolist())
def test_add_parameter_already_in_hirarchy(self): def test_add_parameter_already_in_hirarchy(self):
self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0]) self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])

207
GPy/testing/pickle_tests.py Normal file
View file

@ -0,0 +1,207 @@
'''
Created on 13 Mar 2014
@author: maxz
'''
import unittest, itertools
import cPickle as pickle
import numpy as np
from GPy.core.parameterization.index_operations import ParameterIndexOperations,\
ParameterIndexOperationsView
import tempfile
from GPy.core.parameterization.param import Param
from GPy.core.parameterization.observable_array import ObsAr
from GPy.core.parameterization.priors import Gaussian
from GPy.kern._src.rbf import RBF
from GPy.kern._src.linear import Linear
from GPy.kern._src.static import Bias, White
from GPy.examples.dimensionality_reduction import mrd_simulation,\
bgplvm_simulation
from GPy.examples.regression import toy_rbf_1d_50
from GPy.core.parameterization.variational import NormalPosterior
from GPy.models.gp_regression import GPRegression
class ListDictTestCase(unittest.TestCase):
def assertListDictEquals(self, d1, d2, msg=None):
for k,v in d1.iteritems():
self.assertListEqual(list(v), list(d2[k]), msg)
def assertArrayListEquals(self, l1, l2):
for a1, a2 in itertools.izip(l1,l2):
np.testing.assert_array_equal(a1, a2)
class Test(ListDictTestCase):
def test_parameter_index_operations(self):
pio = ParameterIndexOperations(dict(test1=np.array([4,3,1,6,4]), test2=np.r_[2:130]))
piov = ParameterIndexOperationsView(pio, 20, 250)
self.assertListDictEquals(dict(piov.items()), dict(piov.copy().iteritems()))
self.assertListDictEquals(dict(pio.iteritems()), dict(pio.copy().items()))
self.assertArrayListEquals(pio.copy().indices(), pio.indices())
self.assertArrayListEquals(piov.copy().indices(), piov.indices())
with tempfile.TemporaryFile('w+b') as f:
pickle.dump(pio, f)
f.seek(0)
pio2 = pickle.load(f)
self.assertListDictEquals(pio._properties, pio2._properties)
with tempfile.TemporaryFile('w+b') as f:
pickle.dump(piov, f)
f.seek(0)
pio2 = pickle.load(f)
self.assertListDictEquals(dict(piov.items()), dict(pio2.iteritems()))
def test_param(self):
param = Param('test', np.arange(4*2).reshape(4,2))
param[0].constrain_positive()
param[1].fix()
param[2].set_prior(Gaussian(0,1))
pcopy = param.copy()
self.assertListEqual(param.tolist(), pcopy.tolist())
self.assertListEqual(str(param).split('\n'), str(pcopy).split('\n'))
self.assertIsNot(param, pcopy)
with tempfile.TemporaryFile('w+b') as f:
pickle.dump(param, f)
f.seek(0)
pcopy = pickle.load(f)
self.assertListEqual(param.tolist(), pcopy.tolist())
self.assertSequenceEqual(str(param), str(pcopy))
def test_observable_array(self):
obs = ObsAr(np.arange(4*2).reshape(4,2))
pcopy = obs.copy()
self.assertListEqual(obs.tolist(), pcopy.tolist())
with tempfile.TemporaryFile('w+b') as f:
pickle.dump(obs, f)
f.seek(0)
pcopy = pickle.load(f)
self.assertListEqual(obs.tolist(), pcopy.tolist())
self.assertSequenceEqual(str(obs), str(pcopy))
def test_parameterized(self):
par = RBF(1, active_dims=[1]) + Linear(2, active_dims=[0,2]) + Bias(3) + White(3)
par.gradient = 10
par.randomize()
pcopy = par.copy()
self.assertIsInstance(pcopy.constraints, ParameterIndexOperations)
self.assertIsInstance(pcopy.rbf.constraints, ParameterIndexOperationsView)
self.assertIs(pcopy.constraints, pcopy.rbf.constraints._param_index_ops)
self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops)
self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
self.assertSequenceEqual(str(par), str(pcopy))
self.assertIsNot(par.param_array, pcopy.param_array)
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
with tempfile.TemporaryFile('w+b') as f:
par.pickle(f)
f.seek(0)
pcopy = pickle.load(f)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
pcopy.gradient = 10
np.testing.assert_allclose(par.linear.full_gradient, pcopy.linear.full_gradient)
np.testing.assert_allclose(pcopy.linear.full_gradient, 10)
self.assertSequenceEqual(str(par), str(pcopy))
def test_model(self):
par = toy_rbf_1d_50(optimize=0, plot=0)
pcopy = par.copy()
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
self.assertSequenceEqual(str(par), str(pcopy))
self.assertIsNot(par.param_array, pcopy.param_array)
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
self.assertTrue(pcopy.checkgrad())
self.assert_(np.any(pcopy.gradient!=0.0))
with tempfile.TemporaryFile('w+b') as f:
par.pickle(f)
f.seek(0)
pcopy = pickle.load(f)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
self.assertSequenceEqual(str(par), str(pcopy))
self.assert_(pcopy.checkgrad())
def test_modelrecreation(self):
par = toy_rbf_1d_50(optimize=0, plot=0)
pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
self.assertSequenceEqual(str(par), str(pcopy))
self.assertIsNot(par.param_array, pcopy.param_array)
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
self.assertTrue(pcopy.checkgrad())
self.assert_(np.any(pcopy.gradient!=0.0))
with tempfile.TemporaryFile('w+b') as f:
par.pickle(f)
f.seek(0)
pcopy = pickle.load(f)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
self.assertSequenceEqual(str(par), str(pcopy))
self.assert_(pcopy.checkgrad())
def test_posterior(self):
X = np.random.randn(3,5)
Xv = np.random.rand(*X.shape)
par = NormalPosterior(X,Xv)
par.gradient = 10
pcopy = par.copy()
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
self.assertSequenceEqual(str(par), str(pcopy))
self.assertIsNot(par.param_array, pcopy.param_array)
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
with tempfile.TemporaryFile('w+b') as f:
par.pickle(f)
f.seek(0)
pcopy = pickle.load(f)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
pcopy.gradient = 10
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
np.testing.assert_allclose(pcopy.mean.full_gradient, 10)
self.assertSequenceEqual(str(par), str(pcopy))
def test_model_concat(self):
par = mrd_simulation(optimize=0, plot=0, plot_sim=0)
par.randomize()
pcopy = par.copy()
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
self.assertSequenceEqual(str(par), str(pcopy))
self.assertIsNot(par.param_array, pcopy.param_array)
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
self.assertTrue(pcopy.checkgrad())
self.assert_(np.any(pcopy.gradient!=0.0))
with tempfile.TemporaryFile('w+b') as f:
par.pickle(f)
f.seek(0)
pcopy = pickle.load(f)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
self.assertSequenceEqual(str(par), str(pcopy))
self.assert_(pcopy.checkgrad())
def _callback(self, what, which):
what.count += 1
def test_add_observer(self):
par = toy_rbf_1d_50(optimize=0, plot=0)
par.name = "original"
par.count = 0
par.add_observer(self, self._callback, 1)
pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
self.assertNotIn(par.observers[0], pcopy.observers)
pcopy = par.copy()
pcopy.name = "copy"
self.assertTrue(par.checkgrad())
self.assertTrue(pcopy.checkgrad())
self.assertTrue(pcopy.kern.checkgrad())
import ipdb;ipdb.set_trace()
self.assertIn(par.observers[0], pcopy.observers)
self.assertEqual(par.count, 3)
self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_parameter_index_operations']
unittest.main()

View file

@ -48,7 +48,7 @@ class Cacher(object):
if k in kw and kw[k] is not None: if k in kw and kw[k] is not None:
return self.operation(*args, **kw) return self.operation(*args, **kw)
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING # TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
#return self.operation(*args) # return self.operation(*args, **kw)
#if the result is cached, return the cached computation #if the result is cached, return the cached computation
state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs] state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
@ -98,11 +98,20 @@ class Cacher(object):
self.cached_outputs = [] self.cached_outputs = []
self.inputs_changed = [] self.inputs_changed = []
def __deepcopy__(self, memo=None):
return Cacher(self.operation, self.limit, self.ignore_args, self.force_kwargs)
def __getstate__(self, memo=None):
raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation))
def __setstate__(self, memo=None):
raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation))
@property @property
def __name__(self): def __name__(self):
return self.operation.__name__ return self.operation.__name__
from functools import partial from functools import partial, update_wrapper
class Cacher_wrap(object): class Cacher_wrap(object):
def __init__(self, f, limit, ignore_args, force_kwargs): def __init__(self, f, limit, ignore_args, force_kwargs):
@ -110,6 +119,7 @@ class Cacher_wrap(object):
self.ignore_args = ignore_args self.ignore_args = ignore_args
self.force_kwargs = force_kwargs self.force_kwargs = force_kwargs
self.f = f self.f = f
update_wrapper(self, self.f)
def __get__(self, obj, objtype=None): def __get__(self, obj, objtype=None):
return partial(self, obj) return partial(self, obj)
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):

View file

@ -331,7 +331,7 @@ def football_data(season='1314', data_set='football_data'):
# This will be for downloading google trends data. # This will be for downloading google trends data.
def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'): def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'):
"""Data downloaded from Google trends for given query terms.""" """Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations."""
# Inspired by this notebook: # Inspired by this notebook:
# http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb # http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb

18
GPy/util/functions.py Normal file
View file

@ -0,0 +1,18 @@
import numpy as np
from scipy.special import erf, erfcx
import sys
epsilon = sys.float_info.epsilon
lim_val = -np.log(epsilon)
def cum_gaussian(x):
g=0.5*(1+erf(x/np.sqrt(2)))
return np.where(g==0, epsilon, np.where(g==1, 1-epsilon, g))
def ln_cum_gaussian(x):
return np.where(x < 0, -.5*x*x + np.log(.5) + np.log(erfcx(-np.sqrt(2)/2*x)), np.log(cum_gaussian(x)))
def clip_exp(x):
if any(x>=lim_val) or any(x<=-lim_val):
return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val))
else:
return np.exp(x)

View file

@ -5,13 +5,15 @@ Created on 24 Feb 2014
''' '''
import numpy as np import numpy as np
from linalg import PCA from GPy.util.pca import pca
def initialize_latent(init, input_dim, Y): def initialize_latent(init, input_dim, Y):
Xr = np.random.randn(Y.shape[0], input_dim) Xr = np.random.randn(Y.shape[0], input_dim)
if init == 'PCA': if init == 'PCA':
PC = PCA(Y, input_dim)[0] p = pca(Y)
PC = p.project(Y, min(input_dim, Y.shape[1]))
Xr[:PC.shape[0], :PC.shape[1]] = PC Xr[:PC.shape[0], :PC.shape[1]] = PC
else: else:
pass var = Xr.var(0)
return Xr return Xr, var/var.max()
return Xr, p.fracs[:input_dim]

View file

@ -580,26 +580,3 @@ def backsub_both_sides(L, X, transpose='left'):
tmp, _ = dtrtrs(L, X, lower=1, trans=0) tmp, _ = dtrtrs(L, X, lower=1, trans=0)
return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T
def PCA(Y, input_dim):
"""
Principal component analysis: maximum likelihood solution by SVD
:param Y: NxD np.array of data
:param input_dim: int, dimension of projection
:rval X: - Nxinput_dim np.array of dimensionality reduced data
:rval W: - input_dimxD mapping from X to Y
"""
if not np.allclose(Y.mean(axis=0), 0.0):
print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
# Y -= Y.mean(axis=0)
Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False)
[X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]]
v = X.std(axis=0)
X /= v;
W *= v;
return X, W.T

122
GPy/util/pca.py Normal file
View file

@ -0,0 +1,122 @@
'''
Created on 10 Sep 2012
@author: Max Zwiessele
@copyright: Max Zwiessele 2012
'''
import numpy
import pylab
import matplotlib
from numpy.linalg.linalg import LinAlgError
class pca(object):
"""
pca module with automatic primal/dual determination.
"""
def __init__(self, X):
self.mu = X.mean(0)
self.sigma = X.std(0)
X = self.center(X)
# self.X = input
if X.shape[0] >= X.shape[1]:
# print "N >= D: using primal"
self.eigvals, self.eigvectors = self._primal_eig(X)
else:
# print "N < D: using dual"
self.eigvals, self.eigvectors = self._dual_eig(X)
self.sort = numpy.argsort(self.eigvals)[::-1]
self.eigvals = self.eigvals[self.sort]
self.eigvectors = self.eigvectors[:, self.sort]
self.fracs = self.eigvals / self.eigvals.sum()
self.Q = self.eigvals.shape[0]
def center(self, X):
"""
Center `X` in pca space.
"""
X = X - self.mu
X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
return X
def _primal_eig(self, X):
return numpy.linalg.eigh(numpy.einsum('ji,jk->ik',X,X))
def _dual_eig(self, X):
dual_eigvals, dual_eigvects = numpy.linalg.eigh(numpy.einsum('ij,kj->ik',X,X))
relevant_dimensions = numpy.argsort(numpy.abs(dual_eigvals))[-X.shape[1]:]
eigvals = dual_eigvals[relevant_dimensions]
eigvects = dual_eigvects[:, relevant_dimensions]
eigvects = (1. / numpy.sqrt(X.shape[0] * numpy.abs(eigvals))) * X.T.dot(eigvects)
eigvects /= numpy.sqrt(numpy.diag(eigvects.T.dot(eigvects)))
return eigvals, eigvects
def project(self, X, Q=None):
"""
Project X into pca space, defined by the Q highest eigenvalues.
Y = X dot V
"""
if Q is None:
Q = self.Q
if Q > X.shape[1]:
raise IndexError("requested dimension larger then input dimension")
X = self.center(X)
return X.dot(self.eigvectors[:, :Q])
def plot_fracs(self, Q=None, ax=None, fignum=None):
"""
Plot fractions of Eigenvalues sorted in descending order.
"""
if ax is None:
fig = pylab.figure(fignum)
ax = fig.add_subplot(111)
if Q is None:
Q = self.Q
ticks = numpy.arange(Q)
bar = ax.bar(ticks - .4, self.fracs[:Q])
ax.set_xticks(ticks, map(lambda x: r"${}$".format(x), ticks + 1))
ax.set_ylabel("Eigenvalue fraction")
ax.set_xlabel("PC")
ax.set_ylim(0, ax.get_ylim()[1])
ax.set_xlim(ticks.min() - .5, ticks.max() + .5)
try:
pylab.tight_layout()
except:
pass
return bar
def plot_2d(self, X, labels=None, s=20, marker='o',
dimensions=(0, 1), ax=None, colors=None,
fignum=None, cmap=matplotlib.cm.jet, # @UndefinedVariable
** kwargs):
"""
Plot dimensions `dimensions` with given labels against each other in
PC space. Labels can be any sequence of labels of dimensions X.shape[0].
Labels can be drawn with a subsequent call to legend()
"""
if ax is None:
fig = pylab.figure(fignum)
ax = fig.add_subplot(111)
if labels is None:
labels = numpy.zeros(X.shape[0])
ulabels = []
for lab in labels:
if not lab in ulabels:
ulabels.append(lab)
nlabels = len(ulabels)
if colors is None:
colors = [cmap(float(i) / nlabels) for i in range(nlabels)]
X_ = self.project(X, self.Q)[:,dimensions]
kwargs.update(dict(s=s))
plots = list()
for i, l in enumerate(ulabels):
kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)]))
plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs))
ax.set_xlabel(r"PC$_1$")
ax.set_ylabel(r"PC$_2$")
try:
pylab.tight_layout()
except:
pass
return plots

View file

@ -1,5 +1,55 @@
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma,polygamma
class gammaln(Function):
nargs = 1
def fdiff(self, argindex=1):
x=self.args[0]
return polygamma(0, x)
@classmethod
def eval(cls, x):
if x.is_Number:
return log(gamma(x))
class ln_cum_gaussian(Function):
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return exp(-ln_cum_gaussian(x) - 0.5*x*x)/sqrt(2*pi)
@classmethod
def eval(cls, x):
if x.is_Number:
return log(cum_gaussian(x))
def _eval_is_real(self):
return self.args[0].is_real
class cum_gaussian(Function):
nargs = 1
def fdiff(self, argindex=1):
x = self.args[0]
return gaussian(x)
@classmethod
def eval(cls, x):
if x.is_Number:
return 0.5*(1+erf(sqrt(2)/2*x))
def _eval_is_real(self):
return self.args[0].is_real
class gaussian(Function):
nargs = 1
@classmethod
def eval(cls, x):
return 1/sqrt(2*pi)*exp(-0.5*x*x)
def _eval_is_real(self):
return True
class ln_diff_erf(Function): class ln_diff_erf(Function):
nargs = 2 nargs = 2