mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-10 04:22:38 +02:00
Merge branch 'params' of github.com:SheffieldML/GPy into params
Conflicts: GPy/models/gp_classification.py GPy/models/sparse_gp_classification.py
This commit is contained in:
commit
fbec4d0d0b
73 changed files with 3587 additions and 1353 deletions
|
|
@ -4,6 +4,7 @@
|
|||
from model import *
|
||||
from parameterization.parameterized import adjust_name_for_printing, Parameterizable
|
||||
from parameterization.param import Param, ParamConcatenation
|
||||
from parameterization.observable_array import ObsAr
|
||||
|
||||
from gp import GP
|
||||
from sparse_gp import SparseGP
|
||||
|
|
|
|||
|
|
@ -208,27 +208,9 @@ class GP(Model):
|
|||
from ..plotting.matplot_dep import models_plots
|
||||
return models_plots.plot_fit(self,*args,**kwargs)
|
||||
|
||||
def _getstate(self):
|
||||
def input_sensitivity(self):
|
||||
"""
|
||||
|
||||
Get the current state of the class, here we return everything that is
|
||||
needed to recompute the model.
|
||||
|
||||
Returns the sensitivity for each dimension of this model
|
||||
"""
|
||||
return self.kern.input_sensitivity()
|
||||
|
||||
return Model._getstate(self) + [self.X,
|
||||
self.num_data,
|
||||
self.input_dim,
|
||||
self.kern,
|
||||
self.likelihood,
|
||||
self.output_dim,
|
||||
]
|
||||
|
||||
def _setstate(self, state):
|
||||
self.output_dim = state.pop()
|
||||
self.likelihood = state.pop()
|
||||
self.kern = state.pop()
|
||||
self.input_dim = state.pop()
|
||||
self.num_data = state.pop()
|
||||
self.X = state.pop()
|
||||
Model._setstate(self, state)
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ class Mapping(Parameterized):
|
|||
raise NotImplementedError
|
||||
|
||||
def df_dtheta(self, dL_df, X):
|
||||
"""The gradient of the outputs of the multi-layer perceptron with respect to each of the parameters.
|
||||
"""The gradient of the outputs of the mapping with respect to each of the parameters.
|
||||
|
||||
:param dL_df: gradient of the objective with respect to the function.
|
||||
:type dL_df: ndarray (num_data x output_dim)
|
||||
|
|
@ -50,7 +50,7 @@ class Mapping(Parameterized):
|
|||
"""
|
||||
Plots the mapping associated with the model.
|
||||
- In one dimension, the function is plotted.
|
||||
- In two dimsensions, a contour-plot shows the function
|
||||
- In two dimensions, a contour-plot shows the function
|
||||
- In higher dimensions, we've not implemented this yet !TODO!
|
||||
|
||||
Can plot only part of the data and part of the posterior functions
|
||||
|
|
|
|||
|
|
@ -24,37 +24,9 @@ class Model(Parameterized):
|
|||
|
||||
def log_likelihood(self):
|
||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.gradient
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Get the current state of the class.
|
||||
Inherited from Parameterized, so add those parameters to the state
|
||||
|
||||
:return: list of states from the model.
|
||||
|
||||
"""
|
||||
return Parameterized._getstate(self) + \
|
||||
[self.priors, self.optimization_runs,
|
||||
self.sampling_runs, self.preferred_optimizer]
|
||||
|
||||
def _setstate(self, state):
|
||||
"""
|
||||
set state from previous call to _getstate
|
||||
call Parameterized with the rest of the state
|
||||
|
||||
:param state: the state of the model.
|
||||
:type state: list as returned from _getstate.
|
||||
|
||||
"""
|
||||
self.preferred_optimizer = state.pop()
|
||||
self.sampling_runs = state.pop()
|
||||
self.optimization_runs = state.pop()
|
||||
self.priors = state.pop()
|
||||
Parameterized._setstate(self, state)
|
||||
|
||||
def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
|
||||
"""
|
||||
Perform random restarts of the model, and set the model to the best
|
||||
|
|
@ -142,13 +114,60 @@ class Model(Parameterized):
|
|||
"""
|
||||
raise DeprecationWarning, 'parameters now have default constraints'
|
||||
|
||||
def input_sensitivity(self):
|
||||
def objective_function(self):
|
||||
"""
|
||||
Returns the sensitivity for each dimension of this kernel.
|
||||
"""
|
||||
return self.kern.input_sensitivity()
|
||||
The objective function for the given algorithm.
|
||||
|
||||
def objective_function(self, x):
|
||||
This function is the true objective, which wants to be minimized.
|
||||
Note that all parameters are already set and in place, so you just need
|
||||
to return the objective function here.
|
||||
|
||||
For probabilistic models this is the negative log_likelihood
|
||||
(including the MAP prior), so we return it here. If your model is not
|
||||
probabilistic, just return your objective here!
|
||||
"""
|
||||
return -float(self.log_likelihood()) - self.log_prior()
|
||||
|
||||
def objective_function_gradients(self):
|
||||
"""
|
||||
The gradients for the objective function for the given algorithm.
|
||||
|
||||
You can find the gradient for the parameters in self.gradient at all times.
|
||||
This is the place, where gradients get stored for parameters.
|
||||
|
||||
This function is the true objective, which wants to be minimized.
|
||||
Note that all parameters are already set and in place, so you just need
|
||||
to return the gradient here.
|
||||
|
||||
For probabilistic models this is the gradient of the negative log_likelihood
|
||||
(including the MAP prior), so we return it here. If your model is not
|
||||
probabilistic, just return your gradient here!
|
||||
"""
|
||||
return -(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||
|
||||
def _grads(self, x):
|
||||
"""
|
||||
Gets the gradients from the likelihood and the priors.
|
||||
|
||||
Failures are handled robustly. The algorithm will try several times to
|
||||
return the gradients, and will raise the original exception if
|
||||
the objective cannot be computed.
|
||||
|
||||
:param x: the parameters of the model.
|
||||
:type x: np.array
|
||||
"""
|
||||
try:
|
||||
self._set_params_transformed(x)
|
||||
obj_grads = self._transform_gradients(self.objective_function_gradients())
|
||||
self._fail_count = 0
|
||||
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||
if self._fail_count >= self._allowed_failures:
|
||||
raise
|
||||
self._fail_count += 1
|
||||
obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100)
|
||||
return obj_grads
|
||||
|
||||
def _objective(self, x):
|
||||
"""
|
||||
The objective function passed to the optimizer. It combines
|
||||
the likelihood and the priors.
|
||||
|
|
@ -162,55 +181,26 @@ class Model(Parameterized):
|
|||
"""
|
||||
try:
|
||||
self._set_params_transformed(x)
|
||||
obj = self.objective_function()
|
||||
self._fail_count = 0
|
||||
except (LinAlgError, ZeroDivisionError, ValueError) as e:
|
||||
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||
if self._fail_count >= self._allowed_failures:
|
||||
raise e
|
||||
raise
|
||||
self._fail_count += 1
|
||||
return np.inf
|
||||
return -float(self.log_likelihood()) - self.log_prior()
|
||||
return obj
|
||||
|
||||
def objective_function_gradients(self, x):
|
||||
"""
|
||||
Gets the gradients from the likelihood and the priors.
|
||||
|
||||
Failures are handled robustly. The algorithm will try several times to
|
||||
return the gradients, and will raise the original exception if
|
||||
the objective cannot be computed.
|
||||
|
||||
:param x: the parameters of the model.
|
||||
:type x: np.array
|
||||
"""
|
||||
def _objective_grads(self, x):
|
||||
try:
|
||||
self._set_params_transformed(x)
|
||||
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||
obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
|
||||
self._fail_count = 0
|
||||
except (LinAlgError, ZeroDivisionError, ValueError) as e:
|
||||
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||
if self._fail_count >= self._allowed_failures:
|
||||
raise e
|
||||
self._fail_count += 1
|
||||
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
|
||||
return obj_grads
|
||||
|
||||
def objective_and_gradients(self, x):
|
||||
"""
|
||||
Compute the objective function of the model and the gradient of the model at the point given by x.
|
||||
|
||||
:param x: the point at which gradients are to be computed.
|
||||
:type x: np.array
|
||||
"""
|
||||
|
||||
try:
|
||||
self._set_params_transformed(x)
|
||||
obj_f = -float(self.log_likelihood()) - self.log_prior()
|
||||
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||
self._fail_count = 0
|
||||
except (LinAlgError, ZeroDivisionError, ValueError) as e:
|
||||
if self._fail_count >= self._allowed_failures:
|
||||
raise e
|
||||
raise
|
||||
self._fail_count += 1
|
||||
obj_f = np.inf
|
||||
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
|
||||
obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100)
|
||||
return obj_f, obj_grads
|
||||
|
||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||
|
|
@ -241,7 +231,7 @@ class Model(Parameterized):
|
|||
optimizer = optimization.get_optimizer(optimizer)
|
||||
opt = optimizer(start, model=self, **kwargs)
|
||||
|
||||
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
|
||||
opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)
|
||||
|
||||
self.optimization_runs.append(opt)
|
||||
|
||||
|
|
@ -289,19 +279,22 @@ class Model(Parameterized):
|
|||
|
||||
# just check the global ratio
|
||||
dx = np.zeros(x.shape)
|
||||
dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size))
|
||||
dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.)
|
||||
|
||||
# evaulate around the point x
|
||||
f1 = self.objective_function(x + dx)
|
||||
f2 = self.objective_function(x - dx)
|
||||
gradient = self.objective_function_gradients(x)
|
||||
f1 = self._objective(x + dx)
|
||||
f2 = self._objective(x - dx)
|
||||
gradient = self._grads(x)
|
||||
|
||||
dx = dx[transformed_index]
|
||||
gradient = gradient[transformed_index]
|
||||
|
||||
denominator = (2 * np.dot(dx, gradient))
|
||||
global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator)
|
||||
return np.abs(1. - global_ratio) < tolerance or np.abs(f1-f2).sum() + np.abs((2 * np.dot(dx, gradient))).sum() < tolerance
|
||||
global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
|
||||
if global_ratio is np.nan:
|
||||
global_ratio = 0
|
||||
return np.abs(1. - global_ratio) < tolerance or global_diff
|
||||
else:
|
||||
# check the gradient of each parameter individually, and do some pretty printing
|
||||
try:
|
||||
|
|
@ -337,15 +330,15 @@ class Model(Parameterized):
|
|||
print "No free parameters to check"
|
||||
return
|
||||
|
||||
gradient = self.objective_function_gradients(x).copy()
|
||||
gradient = self._grads(x).copy()
|
||||
np.where(gradient == 0, 1e-312, gradient)
|
||||
ret = True
|
||||
for nind, xind in itertools.izip(param_index, transformed_index):
|
||||
xx = x.copy()
|
||||
xx[xind] += step
|
||||
f1 = self.objective_function(xx)
|
||||
f1 = self._objective(xx)
|
||||
xx[xind] -= 2.*step
|
||||
f2 = self.objective_function(xx)
|
||||
f2 = self._objective(xx)
|
||||
numerical_gradient = (f1 - f2) / (2 * step)
|
||||
if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind]
|
||||
else: ratio = (f1 - f2) / (2 * step * gradient[xind])
|
||||
|
|
|
|||
|
|
@ -24,12 +24,6 @@ class ParameterIndexOperations(object):
|
|||
for t, i in constraints.iteritems():
|
||||
self.add(t, i)
|
||||
|
||||
def __getstate__(self):
|
||||
return self._properties
|
||||
|
||||
def __setstate__(self, state):
|
||||
self._properties = state
|
||||
|
||||
def iteritems(self):
|
||||
return self._properties.iteritems()
|
||||
|
||||
|
|
@ -92,13 +86,18 @@ class ParameterIndexOperations(object):
|
|||
for i, v in parameter_index_view.iteritems():
|
||||
self.add(i, v+offset)
|
||||
|
||||
|
||||
def copy(self):
|
||||
return self.__deepcopy__(None)
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
return ParameterIndexOperations(dict(self.iteritems()))
|
||||
|
||||
def __getitem__(self, prop):
|
||||
return self._properties[prop]
|
||||
|
||||
def __delitem__(self, prop):
|
||||
del self._properties[prop]
|
||||
|
||||
def __str__(self, *args, **kwargs):
|
||||
import pprint
|
||||
return pprint.pformat(dict(self._properties))
|
||||
|
|
@ -183,7 +182,7 @@ class ParameterIndexOperationsView(object):
|
|||
|
||||
|
||||
def remove(self, prop, indices):
|
||||
removed = self._param_index_ops.remove(prop, indices+self._offset)
|
||||
removed = self._param_index_ops.remove(prop, numpy.array(indices)+self._offset)
|
||||
if removed.size > 0:
|
||||
return removed - self._size + 1
|
||||
return removed
|
||||
|
|
@ -193,6 +192,9 @@ class ParameterIndexOperationsView(object):
|
|||
ind = self._filter_index(self._param_index_ops[prop])
|
||||
return ind
|
||||
|
||||
def __delitem__(self, prop):
|
||||
self.remove(prop, self[prop])
|
||||
|
||||
def __str__(self, *args, **kwargs):
|
||||
import pprint
|
||||
return pprint.pformat(dict(self.iteritems()))
|
||||
|
|
@ -203,6 +205,9 @@ class ParameterIndexOperationsView(object):
|
|||
|
||||
|
||||
def copy(self):
|
||||
return self.__deepcopy__(None)
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
return ParameterIndexOperations(dict(self.iteritems()))
|
||||
pass
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ Created on 27 Feb 2014
|
|||
'''
|
||||
|
||||
from collections import defaultdict
|
||||
import weakref
|
||||
|
||||
def intarray_default_factory():
|
||||
import numpy as np
|
||||
|
|
@ -28,4 +29,90 @@ class ArrayList(list):
|
|||
return True
|
||||
return False
|
||||
|
||||
def index(self, item):
|
||||
index = 0
|
||||
for el in self:
|
||||
if el is item:
|
||||
return index
|
||||
index += 1
|
||||
raise ValueError, "{} is not in list".format(item)
|
||||
pass
|
||||
|
||||
class ObservablesList(object):
|
||||
def __init__(self):
|
||||
self._poc = []
|
||||
|
||||
def __getitem__(self, ind):
|
||||
p,o,c = self._poc[ind]
|
||||
return p, o(), c
|
||||
|
||||
def remove(self, priority, observable, callble):
|
||||
"""
|
||||
"""
|
||||
self.flush()
|
||||
for i in range(len(self) - 1, -1, -1):
|
||||
p,o,c = self[i]
|
||||
if priority==p and observable==o and callble==c:
|
||||
del self._poc[i]
|
||||
|
||||
def __repr__(self):
|
||||
return self._poc.__repr__()
|
||||
|
||||
def add(self, priority, observable, callble):
|
||||
ins = 0
|
||||
for pr, _, _ in self:
|
||||
if priority > pr:
|
||||
break
|
||||
ins += 1
|
||||
self._poc.insert(ins, (priority, weakref.ref(observable), callble))
|
||||
|
||||
def __str__(self):
|
||||
ret = []
|
||||
curr_p = None
|
||||
for p, o, c in self:
|
||||
curr = ''
|
||||
if curr_p != p:
|
||||
pre = "{!s}: ".format(p)
|
||||
curr_pre = pre
|
||||
else: curr_pre = " "*len(pre)
|
||||
curr_p = p
|
||||
curr += curr_pre
|
||||
ret.append(curr + ", ".join(map(repr, [o,c])))
|
||||
return '\n'.join(ret)
|
||||
|
||||
def flush(self):
|
||||
self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None]
|
||||
|
||||
def __iter__(self):
|
||||
self.flush()
|
||||
for p, o, c in self._poc:
|
||||
if o() is not None:
|
||||
yield p, o(), c
|
||||
|
||||
def __len__(self):
|
||||
self.flush()
|
||||
return self._poc.__len__()
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
self.flush()
|
||||
s = ObservablesList()
|
||||
import copy
|
||||
s._poc = copy.deepcopy(self._poc, memo)
|
||||
return s
|
||||
|
||||
def __getstate__(self):
|
||||
self.flush()
|
||||
from ...util.caching import Cacher
|
||||
obs = []
|
||||
for p, o, c in self:
|
||||
if (getattr(o, c.__name__, None) is not None
|
||||
and not isinstance(o, Cacher)):
|
||||
obs.append((p,o,c.__name__))
|
||||
return obs
|
||||
|
||||
def __setstate__(self, state):
|
||||
self._poc = []
|
||||
for p, o, c in state:
|
||||
self.add(p,o,getattr(o, c))
|
||||
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
__updated__ = '2014-03-21'
|
||||
__updated__ = '2014-04-15'
|
||||
|
||||
import numpy as np
|
||||
from parameter_core import Observable
|
||||
from parameter_core import Observable, Pickleable
|
||||
|
||||
class ObsAr(np.ndarray, Observable):
|
||||
class ObsAr(np.ndarray, Pickleable, Observable):
|
||||
"""
|
||||
An ndarray which reports changes to its observers.
|
||||
The observers can add themselves with a callable, which
|
||||
|
|
@ -25,35 +25,32 @@ class ObsAr(np.ndarray, Observable):
|
|||
def __array_finalize__(self, obj):
|
||||
# see InfoArray.__array_finalize__ for comments
|
||||
if obj is None: return
|
||||
self._observer_callables_ = getattr(obj, '_observer_callables_', None)
|
||||
self.observers = getattr(obj, 'observers', None)
|
||||
|
||||
def __array_wrap__(self, out_arr, context=None):
|
||||
return out_arr.view(np.ndarray)
|
||||
|
||||
def copy(self):
|
||||
memo = {}
|
||||
memo[id(self)] = self
|
||||
return self.__deepcopy__(memo)
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy())
|
||||
memo[id(self)] = s
|
||||
import copy
|
||||
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
|
||||
return s
|
||||
|
||||
def __reduce__(self):
|
||||
func, args, state = np.ndarray.__reduce__(self)
|
||||
return func, args, (state, Observable._getstate(self))
|
||||
func, args, state = super(ObsAr, self).__reduce__()
|
||||
return func, args, (state, Pickleable.__getstate__(self))
|
||||
|
||||
def __setstate__(self, state):
|
||||
np.ndarray.__setstate__(self, state[0])
|
||||
Observable._setstate(self, state[1])
|
||||
|
||||
def _s_not_empty(self, s):
|
||||
# this checks whether there is something picked by this slice.
|
||||
return True
|
||||
# TODO: disarmed, for performance increase,
|
||||
if not isinstance(s, (list,tuple,np.ndarray)):
|
||||
return True
|
||||
if isinstance(s, (list,tuple)):
|
||||
return len(s)!=0
|
||||
if isinstance(s, np.ndarray):
|
||||
if s.dtype is bool:
|
||||
return np.all(s)
|
||||
else:
|
||||
return s.size != 0
|
||||
Pickleable.__setstate__(self, state[1])
|
||||
|
||||
def __setitem__(self, s, val):
|
||||
if self._s_not_empty(s):
|
||||
super(ObsAr, self).__setitem__(s, val)
|
||||
self.notify_observers()
|
||||
|
||||
|
|
@ -63,12 +60,6 @@ class ObsAr(np.ndarray, Observable):
|
|||
def __setslice__(self, start, stop, val):
|
||||
return self.__setitem__(slice(start, stop), val)
|
||||
|
||||
def __copy__(self, *args):
|
||||
return ObsAr(self.view(np.ndarray).copy())
|
||||
|
||||
def copy(self, *args):
|
||||
return self.__copy__(*args)
|
||||
|
||||
def __ilshift__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ilshift__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
|
|
@ -144,76 +135,3 @@ class ObsAr(np.ndarray, Observable):
|
|||
r = np.ndarray.__imul__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
# def __rrshift__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rrshift__(self, *args, **kwargs)
|
||||
# self.notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __ror__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__ror__(self, *args, **kwargs)
|
||||
# self.notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rxor__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rxor__(self, *args, **kwargs)
|
||||
# self.notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
|
||||
# def __rdivmod__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rdivmod__(self, *args, **kwargs)
|
||||
# self.notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __radd__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__radd__(self, *args, **kwargs)
|
||||
# self.notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rdiv__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rdiv__(self, *args, **kwargs)
|
||||
# self.notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rtruediv__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rtruediv__(self, *args, **kwargs)
|
||||
# self.notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rshift__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rshift__(self, *args, **kwargs)
|
||||
# self.notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rmul__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rmul__(self, *args, **kwargs)
|
||||
# self.notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rpow__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rpow__(self, *args, **kwargs)
|
||||
# self.notify_observers()
|
||||
# return r
|
||||
|
||||
|
||||
# def __rsub__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rsub__(self, *args, **kwargs)
|
||||
# self.notify_observers()
|
||||
# return r
|
||||
|
||||
# def __rfloordiv__(self, *args, **kwargs):
|
||||
# r = np.ndarray.__rfloordiv__(self, *args, **kwargs)
|
||||
# self.notify_observers()
|
||||
# return r
|
||||
|
||||
|
|
@ -3,8 +3,9 @@
|
|||
|
||||
import itertools
|
||||
import numpy
|
||||
np = numpy
|
||||
from parameter_core import OptimizationHandlable, adjust_name_for_printing
|
||||
from array_core import ObsAr
|
||||
from observable_array import ObsAr
|
||||
|
||||
###### printing
|
||||
__constraints_name__ = "Constraint"
|
||||
|
|
@ -43,14 +44,13 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
_fixes_ = None
|
||||
_parameters_ = []
|
||||
def __new__(cls, name, input_array, default_constraint=None):
|
||||
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array, name=name, default_constraint=default_constraint))
|
||||
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
|
||||
cls.__name__ = "Param"
|
||||
obj._current_slice_ = (slice(obj.shape[0]),)
|
||||
obj._realshape_ = obj.shape
|
||||
obj._realsize_ = obj.size
|
||||
obj._realndim_ = obj.ndim
|
||||
obj._original_ = True
|
||||
obj._gradient_array_ = numpy.zeros(obj.shape, dtype=numpy.float64)
|
||||
return obj
|
||||
|
||||
def __init__(self, name, input_array, default_constraint=None, *a, **kw):
|
||||
|
|
@ -60,7 +60,7 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
import pydot
|
||||
node = pydot.Node(id(self), shape='record', label=self.name)
|
||||
G.add_node(node)
|
||||
for o in self._observer_callables_.keys():
|
||||
for o in self.observers.keys():
|
||||
label = o.name if hasattr(o, 'name') else str(o)
|
||||
observed_node = pydot.Node(id(o), label=label)
|
||||
G.add_node(observed_node)
|
||||
|
|
@ -87,74 +87,24 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
self.priors = getattr(obj, 'priors', None)
|
||||
|
||||
@property
|
||||
def _param_array_(self):
|
||||
def param_array(self):
|
||||
return self
|
||||
|
||||
@property
|
||||
def gradient(self):
|
||||
"""
|
||||
Return a view on the gradient, which is in the same shape as this parameter is.
|
||||
Note: this is not the real gradient array, it is just a view on it.
|
||||
|
||||
To work on the real gradient array use: self.full_gradient
|
||||
"""
|
||||
if getattr(self, '_gradient_array_', None) is None:
|
||||
self._gradient_array_ = numpy.empty(self._realshape_, dtype=numpy.float64)
|
||||
return self._gradient_array_[self._current_slice_]
|
||||
|
||||
@gradient.setter
|
||||
def gradient(self, val):
|
||||
self.gradient[:] = val
|
||||
|
||||
#===========================================================================
|
||||
# Pickling operations
|
||||
#===========================================================================
|
||||
def __reduce__(self):
|
||||
func, args, state = super(Param, self).__reduce__()
|
||||
return func, args, (state,
|
||||
(self._name,
|
||||
self._parent_,
|
||||
self._parent_index_,
|
||||
self._default_constraint_,
|
||||
self._current_slice_,
|
||||
self._realshape_,
|
||||
self._realsize_,
|
||||
self._realndim_,
|
||||
self.constraints,
|
||||
self.priors
|
||||
)
|
||||
)
|
||||
|
||||
def __setstate__(self, state):
|
||||
super(Param, self).__setstate__(state[0])
|
||||
state = list(state[1])
|
||||
self.priors = state.pop()
|
||||
self.constraints = state.pop()
|
||||
self._realndim_ = state.pop()
|
||||
self._realsize_ = state.pop()
|
||||
self._realshape_ = state.pop()
|
||||
self._current_slice_ = state.pop()
|
||||
self._default_constraint_ = state.pop()
|
||||
self._parent_index_ = state.pop()
|
||||
self._parent_ = state.pop()
|
||||
self._name = state.pop()
|
||||
|
||||
def copy(self, *args):
|
||||
constr = self.constraints.copy()
|
||||
priors = self.priors.copy()
|
||||
p = Param(self.name, self.view(numpy.ndarray).copy(), self._default_constraint_)
|
||||
p.constraints = constr
|
||||
p.priors = priors
|
||||
return p
|
||||
#===========================================================================
|
||||
# get/set parameters
|
||||
#===========================================================================
|
||||
# def _set_params(self, param, trigger_parent=True):
|
||||
# self.flat = param
|
||||
# if trigger_parent: min_priority = None
|
||||
# else: min_priority = -numpy.inf
|
||||
# self.notify_observers(None, min_priority)
|
||||
#
|
||||
# def _get_params(self):
|
||||
# return self.flat
|
||||
#
|
||||
# def _collect_gradient(self, target):
|
||||
# target += self.gradient.flat
|
||||
#
|
||||
# def _set_gradient(self, g):
|
||||
# self.gradient = g.reshape(self._realshape_)
|
||||
self._gradient_array_[self._current_slice_] = val
|
||||
|
||||
#===========================================================================
|
||||
# Array operations -> done
|
||||
|
|
@ -169,28 +119,6 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
except AttributeError: pass # returning 0d array or float, double etc
|
||||
return new_arr
|
||||
|
||||
def __setitem__(self, s, val):
|
||||
super(Param, self).__setitem__(s, val)
|
||||
|
||||
#===========================================================================
|
||||
# Index Operations:
|
||||
#===========================================================================
|
||||
#def _internal_offset(self):
|
||||
# internal_offset = 0
|
||||
# extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1]
|
||||
# for i, si in enumerate(self._current_slice_[:self._realndim_]):
|
||||
# if numpy.all(si == Ellipsis):
|
||||
# continue
|
||||
# if isinstance(si, slice):
|
||||
# a = si.indices(self._realshape_[i])[0]
|
||||
# elif isinstance(si, (list,numpy.ndarray,tuple)):
|
||||
# a = si[0]
|
||||
# else: a = si
|
||||
# if a < 0:
|
||||
# a = self._realshape_[i] + a
|
||||
# internal_offset += a * extended_realshape[i]
|
||||
# return internal_offset
|
||||
|
||||
def _raveled_index(self, slice_index=None):
|
||||
# return an index array on the raveled array, which is formed by the current_slice
|
||||
# of this object
|
||||
|
|
@ -235,13 +163,21 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
def is_fixed(self):
|
||||
from transformations import __fixed__
|
||||
return self.constraints[__fixed__].size == self.size
|
||||
#def round(self, decimals=0, out=None):
|
||||
# view = super(Param, self).round(decimals, out).view(Param)
|
||||
# view.__array_finalize__(self)
|
||||
# return view
|
||||
#round.__doc__ = numpy.round.__doc__
|
||||
|
||||
def _get_original(self, param):
|
||||
return self
|
||||
|
||||
#===========================================================================
|
||||
# Pickling and copying
|
||||
#===========================================================================
|
||||
def __deepcopy__(self, memo):
|
||||
s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy())
|
||||
memo[id(self)] = s
|
||||
import copy
|
||||
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
|
||||
return s
|
||||
|
||||
|
||||
#===========================================================================
|
||||
# Printing -> done
|
||||
#===========================================================================
|
||||
|
|
@ -250,7 +186,8 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
if self.size <= 1:
|
||||
return [str(self.view(numpy.ndarray)[0])]
|
||||
else: return [str(self.shape)]
|
||||
def parameter_names(self, add_self=False, adjust_for_printing=False):
|
||||
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
|
||||
# this is just overwrighting the parameterized calls to parameter names, in order to maintain OOP
|
||||
if adjust_for_printing:
|
||||
return [adjust_name_for_printing(self.name)]
|
||||
return [self.name]
|
||||
|
|
@ -261,6 +198,9 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
def parameter_shapes(self):
|
||||
return [self.shape]
|
||||
@property
|
||||
def num_params(self):
|
||||
return 0
|
||||
@property
|
||||
def _constraints_str(self):
|
||||
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))]
|
||||
@property
|
||||
|
|
@ -282,8 +222,8 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
if isinstance(slice_index, (tuple, list)):
|
||||
clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
|
||||
for i in range(self._realndim_-len(clean_curr_slice)):
|
||||
i+=len(clean_curr_slice)
|
||||
clean_curr_slice += range(self._realshape_[i])
|
||||
i+=1
|
||||
clean_curr_slice += [range(self._realshape_[i])]
|
||||
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
|
||||
and len(set(map(len, clean_curr_slice))) <= 1):
|
||||
return numpy.fromiter(itertools.izip(*clean_curr_slice),
|
||||
|
|
@ -368,20 +308,20 @@ class ParamConcatenation(object):
|
|||
#===========================================================================
|
||||
def __getitem__(self, s):
|
||||
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
|
||||
params = [p._param_array_[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p._param_array_[ind[ps]])]
|
||||
params = [p.param_array.flat[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p.param_array.flat[ind[ps]])]
|
||||
if len(params)==1: return params[0]
|
||||
return ParamConcatenation(params)
|
||||
def __setitem__(self, s, val, update=True):
|
||||
if isinstance(val, ParamConcatenation):
|
||||
val = val.values()
|
||||
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
|
||||
vals = self.values(); vals[s] = val; del val
|
||||
[numpy.place(p, ind[ps], vals[ps])
|
||||
vals = self.values(); vals[s] = val
|
||||
[numpy.copyto(p, vals[ps], where=ind[ps])
|
||||
for p, ps in zip(self.params, self._param_slices_)]
|
||||
if update:
|
||||
self.update_all_params()
|
||||
def values(self):
|
||||
return numpy.hstack([p._param_array_ for p in self.params])
|
||||
return numpy.hstack([p.param_array.flat for p in self.params])
|
||||
#===========================================================================
|
||||
# parameter operations:
|
||||
#===========================================================================
|
||||
|
|
@ -399,8 +339,8 @@ class ParamConcatenation(object):
|
|||
self.update_all_params()
|
||||
constrain_positive.__doc__ = Param.constrain_positive.__doc__
|
||||
|
||||
def constrain_fixed(self, warning=True):
|
||||
[param.constrain_fixed(warning) for param in self.params]
|
||||
def constrain_fixed(self, value=None, warning=True, trigger_parent=True):
|
||||
[param.constrain_fixed(value, warning, trigger_parent) for param in self.params]
|
||||
constrain_fixed.__doc__ = Param.constrain_fixed.__doc__
|
||||
fix = constrain_fixed
|
||||
|
||||
|
|
@ -468,3 +408,42 @@ class ParamConcatenation(object):
|
|||
return "\n".join(strings)
|
||||
def __repr__(self):
|
||||
return "\n".join(map(repr,self.params))
|
||||
|
||||
def __ilshift__(self, *args, **kwargs):
|
||||
self[:] = np.ndarray.__ilshift__(self.values(), *args, **kwargs)
|
||||
|
||||
def __irshift__(self, *args, **kwargs):
|
||||
self[:] = np.ndarray.__irshift__(self.values(), *args, **kwargs)
|
||||
|
||||
def __ixor__(self, *args, **kwargs):
|
||||
self[:] = np.ndarray.__ixor__(self.values(), *args, **kwargs)
|
||||
|
||||
def __ipow__(self, *args, **kwargs):
|
||||
self[:] = np.ndarray.__ipow__(self.values(), *args, **kwargs)
|
||||
|
||||
def __ifloordiv__(self, *args, **kwargs):
|
||||
self[:] = np.ndarray.__ifloordiv__(self.values(), *args, **kwargs)
|
||||
|
||||
def __isub__(self, *args, **kwargs):
|
||||
self[:] = np.ndarray.__isub__(self.values(), *args, **kwargs)
|
||||
|
||||
def __ior__(self, *args, **kwargs):
|
||||
self[:] = np.ndarray.__ior__(self.values(), *args, **kwargs)
|
||||
|
||||
def __itruediv__(self, *args, **kwargs):
|
||||
self[:] = np.ndarray.__itruediv__(self.values(), *args, **kwargs)
|
||||
|
||||
def __idiv__(self, *args, **kwargs):
|
||||
self[:] = np.ndarray.__idiv__(self.values(), *args, **kwargs)
|
||||
|
||||
def __iand__(self, *args, **kwargs):
|
||||
self[:] = np.ndarray.__iand__(self.values(), *args, **kwargs)
|
||||
|
||||
def __imod__(self, *args, **kwargs):
|
||||
self[:] = np.ndarray.__imod__(self.values(), *args, **kwargs)
|
||||
|
||||
def __iadd__(self, *args, **kwargs):
|
||||
self[:] = np.ndarray.__iadd__(self.values(), *args, **kwargs)
|
||||
|
||||
def __imul__(self, *args, **kwargs):
|
||||
self[:] = np.ndarray.__imul__(self.values(), *args, **kwargs)
|
||||
|
|
|
|||
|
|
@ -13,10 +13,11 @@ Observable Pattern for patameterization
|
|||
|
||||
"""
|
||||
|
||||
from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
||||
from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
||||
import numpy as np
|
||||
import re
|
||||
|
||||
__updated__ = '2014-03-24'
|
||||
__updated__ = '2014-04-16'
|
||||
|
||||
class HierarchyError(Exception):
|
||||
"""
|
||||
|
|
@ -28,74 +29,19 @@ def adjust_name_for_printing(name):
|
|||
Make sure a name can be printed, alongside used as a variable name.
|
||||
"""
|
||||
if name is not None:
|
||||
return name.replace(" ", "_").replace(".", "_").replace("-", "_m_").replace("+", "_p_").replace("!", "_I_").replace("**", "_xx_").replace("*", "_x_").replace("/", "_l_").replace("@", '_at_')
|
||||
name2 = name
|
||||
name = name.replace(" ", "_").replace(".", "_").replace("-", "_m_")
|
||||
name = name.replace("+", "_p_").replace("!", "_I_")
|
||||
name = name.replace("**", "_xx_").replace("*", "_x_")
|
||||
name = name.replace("/", "_l_").replace("@", '_at_')
|
||||
name = name.replace("(", "_of_").replace(")", "")
|
||||
if re.match(r'^[a-zA-Z_][a-zA-Z0-9-_]*$', name) is None:
|
||||
raise NameError, "name {} converted to {} cannot be further converted to valid python variable name!".format(name2, name)
|
||||
return name
|
||||
return ''
|
||||
|
||||
class InterfacePickleFunctions(object):
|
||||
def __init__(self, *a, **kw):
|
||||
super(InterfacePickleFunctions, self).__init__()
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Returns the state of this class in a memento pattern.
|
||||
The state must be a list-like structure of all the fields
|
||||
this class needs to run.
|
||||
|
||||
See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
|
||||
"""
|
||||
raise NotImplementedError, "To be able to use pickling you need to implement this method"
|
||||
def _setstate(self, state):
|
||||
"""
|
||||
Set the state (memento pattern) of this class to the given state.
|
||||
Usually this is just the counterpart to _getstate, such that
|
||||
an object is a copy of another when calling
|
||||
|
||||
copy = <classname>.__new__(*args,**kw)._setstate(<to_be_copied>._getstate())
|
||||
|
||||
See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
|
||||
"""
|
||||
raise NotImplementedError, "To be able to use pickling you need to implement this method"
|
||||
|
||||
class Pickleable(InterfacePickleFunctions):
|
||||
"""
|
||||
Make an object pickleable (See python doc 'pickling').
|
||||
|
||||
This class allows for pickling support by Memento pattern.
|
||||
_getstate returns a memento of the class, which gets pickled.
|
||||
_setstate(<memento>) (re-)sets the state of the class to the memento
|
||||
"""
|
||||
def __init__(self, *a, **kw):
|
||||
super(Pickleable, self).__init__()
|
||||
#===========================================================================
|
||||
# Pickling operations
|
||||
#===========================================================================
|
||||
def pickle(self, f, protocol=-1):
|
||||
"""
|
||||
:param f: either filename or open file object to write to.
|
||||
if it is an open buffer, you have to make sure to close
|
||||
it properly.
|
||||
:param protocol: pickling protocol to use, python-pickle for details.
|
||||
"""
|
||||
import cPickle
|
||||
if isinstance(f, str):
|
||||
with open(f, 'w') as f:
|
||||
cPickle.dump(self, f, protocol)
|
||||
else:
|
||||
cPickle.dump(self, f, protocol)
|
||||
def __getstate__(self):
|
||||
if self._has_get_set_state():
|
||||
return self._getstate()
|
||||
return self.__dict__
|
||||
def __setstate__(self, state):
|
||||
if self._has_get_set_state():
|
||||
self._setstate(state)
|
||||
# TODO: maybe parameters_changed() here?
|
||||
return
|
||||
self.__dict__ = state
|
||||
def _has_get_set_state(self):
|
||||
return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__)
|
||||
|
||||
class Observable(Pickleable):
|
||||
class Observable(object):
|
||||
"""
|
||||
Observable pattern for parameterization.
|
||||
|
||||
|
|
@ -105,23 +51,25 @@ class Observable(Pickleable):
|
|||
"""
|
||||
_updated = True
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Observable, self).__init__(*args, **kwargs)
|
||||
self._observer_callables_ = []
|
||||
super(Observable, self).__init__()
|
||||
from lists_and_dicts import ObservablesList
|
||||
self.observers = ObservablesList()
|
||||
|
||||
def add_observer(self, observer, callble, priority=0):
|
||||
self._insert_sorted(priority, observer, callble)
|
||||
self.observers.add(priority, observer, callble)
|
||||
|
||||
def remove_observer(self, observer, callble=None):
|
||||
to_remove = []
|
||||
for p, obs, clble in self._observer_callables_:
|
||||
for poc in self.observers:
|
||||
_, obs, clble = poc
|
||||
if callble is not None:
|
||||
if (obs == observer) and (callble == clble):
|
||||
to_remove.append((p, obs, clble))
|
||||
to_remove.append(poc)
|
||||
else:
|
||||
if obs is observer:
|
||||
to_remove.append((p, obs, clble))
|
||||
to_remove.append(poc)
|
||||
for r in to_remove:
|
||||
self._observer_callables_.remove(r)
|
||||
self.observers.remove(*r)
|
||||
|
||||
def notify_observers(self, which=None, min_priority=None):
|
||||
"""
|
||||
|
|
@ -136,32 +84,18 @@ class Observable(Pickleable):
|
|||
if which is None:
|
||||
which = self
|
||||
if min_priority is None:
|
||||
[callble(self, which=which) for _, _, callble in self._observer_callables_]
|
||||
[callble(self, which=which) for _, _, callble in self.observers]
|
||||
else:
|
||||
for p, _, callble in self._observer_callables_:
|
||||
for p, _, callble in self.observers:
|
||||
if p <= min_priority:
|
||||
break
|
||||
callble(self, which=which)
|
||||
|
||||
def _insert_sorted(self, p, o, c):
|
||||
ins = 0
|
||||
for pr, _, _ in self._observer_callables_:
|
||||
if p > pr:
|
||||
break
|
||||
ins += 1
|
||||
self._observer_callables_.insert(ins, (p, o, c))
|
||||
|
||||
def _getstate(self):
|
||||
return [self._observer_callables_]
|
||||
|
||||
def _setstate(self, state):
|
||||
self._observer_callables_ = state.pop()
|
||||
|
||||
#===============================================================================
|
||||
# Foundation framework for parameterized and param objects:
|
||||
#===============================================================================
|
||||
|
||||
class Parentable(Observable):
|
||||
class Parentable(object):
|
||||
"""
|
||||
Enable an Object to have a parent.
|
||||
|
||||
|
|
@ -171,7 +105,7 @@ class Parentable(Observable):
|
|||
_parent_ = None
|
||||
_parent_index_ = None
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Parentable, self).__init__(*args, **kwargs)
|
||||
super(Parentable, self).__init__()
|
||||
|
||||
def has_parent(self):
|
||||
"""
|
||||
|
|
@ -207,7 +141,84 @@ class Parentable(Observable):
|
|||
"""
|
||||
pass
|
||||
|
||||
class Gradcheckable(Parentable):
|
||||
class Pickleable(object):
|
||||
"""
|
||||
Make an object pickleable (See python doc 'pickling').
|
||||
|
||||
This class allows for pickling support by Memento pattern.
|
||||
_getstate returns a memento of the class, which gets pickled.
|
||||
_setstate(<memento>) (re-)sets the state of the class to the memento
|
||||
"""
|
||||
def __init__(self, *a, **kw):
|
||||
super(Pickleable, self).__init__()
|
||||
#===========================================================================
|
||||
# Pickling operations
|
||||
#===========================================================================
|
||||
def pickle(self, f, protocol=-1):
|
||||
"""
|
||||
:param f: either filename or open file object to write to.
|
||||
if it is an open buffer, you have to make sure to close
|
||||
it properly.
|
||||
:param protocol: pickling protocol to use, python-pickle for details.
|
||||
"""
|
||||
import cPickle as pickle
|
||||
import pickle #TODO: cPickle
|
||||
if isinstance(f, str):
|
||||
with open(f, 'w') as f:
|
||||
pickle.dump(self, f, protocol)
|
||||
else:
|
||||
pickle.dump(self, f, protocol)
|
||||
|
||||
#===========================================================================
|
||||
# copy and pickling
|
||||
#===========================================================================
|
||||
def copy(self):
|
||||
"""Returns a (deep) copy of the current model"""
|
||||
#raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy"
|
||||
import copy
|
||||
memo = {}
|
||||
memo[id(self._parent_)] = None
|
||||
memo[id(self.gradient)] = None
|
||||
memo[id(self.param_array)] = None
|
||||
memo[id(self._fixes_)] = None
|
||||
c = copy.deepcopy(self, memo)
|
||||
c._parent_index_ = None
|
||||
return c
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
s = self.__new__(self.__class__)
|
||||
memo[id(self)] = s
|
||||
import copy
|
||||
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
|
||||
return s
|
||||
|
||||
def __getstate__(self):
|
||||
ignore_list = ([#'_parent_', '_parent_index_',
|
||||
#'observers',
|
||||
'_param_array_', '_gradient_array_', '_fixes_',
|
||||
'_Cacher_wrap__cachers']
|
||||
#+ self.parameter_names(recursive=False)
|
||||
)
|
||||
dc = dict()
|
||||
for k,v in self.__dict__.iteritems():
|
||||
if k not in ignore_list:
|
||||
#if hasattr(v, "__getstate__"):
|
||||
#dc[k] = v.__getstate__()
|
||||
#else:
|
||||
dc[k] = v
|
||||
return dc
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.__dict__.update(state)
|
||||
return self
|
||||
|
||||
#def __getstate__(self, memo):
|
||||
# raise NotImplementedError, "get state must be implemented to be able to pickle objects"
|
||||
|
||||
#def __setstate__(self, memo):
|
||||
# raise NotImplementedError, "set state must be implemented to be able to pickle objects"
|
||||
|
||||
class Gradcheckable(Pickleable, Parentable):
|
||||
"""
|
||||
Adds the functionality for an object to be gradcheckable.
|
||||
It is just a thin wrapper of a call to the highest parent for now.
|
||||
|
|
@ -312,7 +323,7 @@ class Indexable(object):
|
|||
raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
|
||||
|
||||
|
||||
class Constrainable(Nameable, Indexable):
|
||||
class Constrainable(Nameable, Indexable, Observable):
|
||||
"""
|
||||
Make an object constrainable with Priors and Transformations.
|
||||
TODO: Mappings!!
|
||||
|
|
@ -394,6 +405,7 @@ class Constrainable(Nameable, Indexable):
|
|||
self._fixes_[fixed_indices] = FIXED
|
||||
else:
|
||||
self._fixes_ = None
|
||||
del self.constraints[__fixed__]
|
||||
|
||||
def _has_fixes(self):
|
||||
return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size
|
||||
|
|
@ -429,14 +441,14 @@ class Constrainable(Nameable, Indexable):
|
|||
def log_prior(self):
|
||||
"""evaluate the prior"""
|
||||
if self.priors.size > 0:
|
||||
x = self._param_array_
|
||||
x = self.param_array
|
||||
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
|
||||
return 0.
|
||||
|
||||
def _log_prior_gradients(self):
|
||||
"""evaluate the gradients of the priors"""
|
||||
if self.priors.size > 0:
|
||||
x = self._param_array_
|
||||
x = self.param_array
|
||||
ret = np.zeros(x.size)
|
||||
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
|
||||
return ret
|
||||
|
|
@ -455,7 +467,7 @@ class Constrainable(Nameable, Indexable):
|
|||
Constrain the parameter to the given
|
||||
:py:class:`GPy.core.transformations.Transformation`.
|
||||
"""
|
||||
self._param_array_[:] = transform.initialize(self._param_array_)
|
||||
self.param_array[...] = transform.initialize(self.param_array)
|
||||
reconstrained = self.unconstrain()
|
||||
self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
|
||||
self.notify_observers(self, None if trigger_parent else -np.inf)
|
||||
|
|
@ -565,14 +577,14 @@ class OptimizationHandlable(Constrainable):
|
|||
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
|
||||
|
||||
def transform(self):
|
||||
[np.put(self._param_array_, ind, c.finv(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
[np.put(self.param_array, ind, c.finv(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
|
||||
def untransform(self):
|
||||
[np.put(self._param_array_, ind, c.f(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
|
||||
def _get_params_transformed(self):
|
||||
# transformed parameters (apply transformation rules)
|
||||
p = self._param_array_.copy()
|
||||
p = self.param_array.copy()
|
||||
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||
fixes = np.ones(self.size).astype(bool)
|
||||
|
|
@ -583,14 +595,14 @@ class OptimizationHandlable(Constrainable):
|
|||
return p
|
||||
|
||||
def _set_params_transformed(self, p):
|
||||
if p is self._param_array_:
|
||||
if p is self.param_array:
|
||||
p = p.copy()
|
||||
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||
fixes = np.ones(self.size).astype(bool)
|
||||
fixes[self.constraints[__fixed__]] = FIXED
|
||||
self._param_array_.flat[fixes] = p
|
||||
elif self._has_fixes(): self._param_array_.flat[self._fixes_] = p
|
||||
else: self._param_array_.flat = p
|
||||
self.param_array.flat[fixes] = p
|
||||
elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
|
||||
else: self.param_array.flat = p
|
||||
self.untransform()
|
||||
self._trigger_params_changed()
|
||||
|
||||
|
|
@ -600,36 +612,29 @@ class OptimizationHandlable(Constrainable):
|
|||
|
||||
def _size_transformed(self):
|
||||
return self.size - self.constraints[__fixed__].size
|
||||
#
|
||||
# def _untransform_params(self, p):
|
||||
# # inverse apply transformations for parameters
|
||||
# #p = p.copy()
|
||||
# if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
|
||||
# [np.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
# return p
|
||||
#
|
||||
# def _get_params(self):
|
||||
# """
|
||||
# get all parameters
|
||||
# """
|
||||
# return self._param_array_
|
||||
# p = np.empty(self.size, dtype=np.float64)
|
||||
# if self.size == 0:
|
||||
# return p
|
||||
# [np.put(p, ind, par._get_params()) for ind, par in itertools.izip(self._param)]
|
||||
# return p
|
||||
|
||||
# def _set_params(self, params, trigger_parent=True):
|
||||
# self._param_array_.flat = params
|
||||
# if trigger_parent: min_priority = None
|
||||
# else: min_priority = -np.inf
|
||||
# self.notify_observers(None, min_priority)
|
||||
# don't overwrite this anymore!
|
||||
# raise NotImplementedError, "Abstract superclass: This needs to be implemented in Param and Parameterizable"
|
||||
@property
|
||||
def num_params(self):
|
||||
"""
|
||||
Return the number of parameters of this parameter_handle.
|
||||
Param objects will allways return 0.
|
||||
"""
|
||||
raise NotImplemented, "Abstract, please implement in respective classes"
|
||||
|
||||
#===========================================================================
|
||||
# Optimization handles:
|
||||
#===========================================================================
|
||||
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
|
||||
"""
|
||||
Get the names of all parameters of this model.
|
||||
|
||||
:param bool add_self: whether to add the own name in front of names
|
||||
:param bool adjust_for_printing: whether to call `adjust_name_for_printing` on names
|
||||
:param bool recursive: whether to traverse through hierarchy and append leaf node names
|
||||
"""
|
||||
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
|
||||
else: adjust = lambda x: x
|
||||
if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)]
|
||||
else: names = [adjust(x.name) for x in self._parameters_]
|
||||
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
|
||||
return names
|
||||
def _get_param_names(self):
|
||||
n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
|
||||
return n
|
||||
|
|
@ -663,16 +668,30 @@ class OptimizationHandlable(Constrainable):
|
|||
# For shared memory arrays. This does nothing in Param, but sets the memory
|
||||
# for all parameterized objects
|
||||
#===========================================================================
|
||||
@property
|
||||
def full_gradient(self):
|
||||
"""
|
||||
Note to users:
|
||||
This does not return the gradient in the right shape! Use self.gradient
|
||||
for the right gradient array.
|
||||
|
||||
To work on the gradient array, use this as the gradient handle.
|
||||
This method exists for in memory use of parameters.
|
||||
When trying to access the true gradient array, use this.
|
||||
"""
|
||||
self.gradient # <<< ensure _gradient_array_
|
||||
return self._gradient_array_
|
||||
|
||||
def _propagate_param_grad(self, parray, garray):
|
||||
pi_old_size = 0
|
||||
for pi in self._parameters_:
|
||||
pislice = slice(pi_old_size, pi_old_size + pi.size)
|
||||
|
||||
self._param_array_[pislice] = pi._param_array_.flat # , requirements=['C', 'W']).flat
|
||||
self._gradient_array_[pislice] = pi._gradient_array_.flat # , requirements=['C', 'W']).flat
|
||||
self.param_array[pislice] = pi.param_array.flat # , requirements=['C', 'W']).flat
|
||||
self.full_gradient[pislice] = pi.full_gradient.flat # , requirements=['C', 'W']).flat
|
||||
|
||||
pi._param_array_.data = parray[pislice].data
|
||||
pi._gradient_array_.data = garray[pislice].data
|
||||
pi.param_array.data = parray[pislice].data
|
||||
pi.full_gradient.data = garray[pislice].data
|
||||
|
||||
pi._propagate_param_grad(parray[pislice], garray[pislice])
|
||||
pi_old_size += pi.size
|
||||
|
|
@ -681,26 +700,32 @@ class Parameterizable(OptimizationHandlable):
|
|||
def __init__(self, *args, **kwargs):
|
||||
super(Parameterizable, self).__init__(*args, **kwargs)
|
||||
from GPy.core.parameterization.lists_and_dicts import ArrayList
|
||||
_parameters_ = ArrayList()
|
||||
self._parameters_ = ArrayList()
|
||||
self.size = 0
|
||||
self._param_array_ = np.empty(self.size, dtype=np.float64)
|
||||
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
|
||||
self._added_names_ = set()
|
||||
|
||||
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
|
||||
"""
|
||||
Get the names of all parameters of this model.
|
||||
@property
|
||||
def param_array(self):
|
||||
if not hasattr(self, '_param_array_'):
|
||||
self._param_array_ = np.empty(self.size, dtype=np.float64)
|
||||
return self._param_array_
|
||||
|
||||
:param bool add_self: whether to add the own name in front of names
|
||||
:param bool adjust_for_printing: whether to call `adjust_name_for_printing` on names
|
||||
:param bool recursive: whether to traverse through hierarchy and append leaf node names
|
||||
"""
|
||||
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
|
||||
else: adjust = lambda x: x
|
||||
if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)]
|
||||
else: names = [adjust(x.name) for x in self._parameters_]
|
||||
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
|
||||
return names
|
||||
@param_array.setter
|
||||
def param_array(self, arr):
|
||||
self._param_array_ = arr
|
||||
|
||||
#=========================================================================
|
||||
# Gradient handling
|
||||
#=========================================================================
|
||||
@property
|
||||
def gradient(self):
|
||||
if not hasattr(self, '_gradient_array_'):
|
||||
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
|
||||
return self._gradient_array_
|
||||
|
||||
@gradient.setter
|
||||
def gradient(self, val):
|
||||
self._gradient_array_[:] = val
|
||||
|
||||
@property
|
||||
def num_params(self):
|
||||
|
|
@ -737,34 +762,6 @@ class Parameterizable(OptimizationHandlable):
|
|||
self._remove_parameter_name(None, old_name)
|
||||
self._add_parameter_name(param)
|
||||
|
||||
#=========================================================================
|
||||
# Gradient handling
|
||||
#=========================================================================
|
||||
@property
|
||||
def gradient(self):
|
||||
return self._gradient_array_
|
||||
|
||||
@gradient.setter
|
||||
def gradient(self, val):
|
||||
self._gradient_array_[:] = val
|
||||
#===========================================================================
|
||||
# def _collect_gradient(self, target):
|
||||
# [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
|
||||
#===========================================================================
|
||||
|
||||
#===========================================================================
|
||||
# def _set_params(self, params, trigger_parent=True):
|
||||
# [p._set_params(params[s], trigger_parent=False) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
|
||||
# if trigger_parent: min_priority = None
|
||||
# else: min_priority = -np.inf
|
||||
# self.notify_observers(None, min_priority)
|
||||
#===========================================================================
|
||||
|
||||
#===========================================================================
|
||||
# def _set_gradient(self, g):
|
||||
# [p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
|
||||
#===========================================================================
|
||||
|
||||
def add_parameter(self, param, index=None, _ignore_added_names=False):
|
||||
"""
|
||||
:param parameters: the parameters to add
|
||||
|
|
@ -864,7 +861,7 @@ class Parameterizable(OptimizationHandlable):
|
|||
# no parameters for this class
|
||||
return
|
||||
old_size = 0
|
||||
self._param_array_ = np.empty(self.size, dtype=np.float64)
|
||||
self.param_array = np.empty(self.size, dtype=np.float64)
|
||||
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
|
||||
|
||||
self._param_slices_ = []
|
||||
|
|
@ -874,15 +871,15 @@ class Parameterizable(OptimizationHandlable):
|
|||
|
||||
pslice = slice(old_size, old_size + p.size)
|
||||
# first connect all children
|
||||
p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice])
|
||||
p._propagate_param_grad(self.param_array[pslice], self.full_gradient[pslice])
|
||||
# then connect children to self
|
||||
self._param_array_[pslice] = p._param_array_.flat # , requirements=['C', 'W']).ravel(order='C')
|
||||
self._gradient_array_[pslice] = p._gradient_array_.flat # , requirements=['C', 'W']).ravel(order='C')
|
||||
self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C')
|
||||
self.full_gradient[pslice] = p.full_gradient.flat # , requirements=['C', 'W']).ravel(order='C')
|
||||
|
||||
if not p._param_array_.flags['C_CONTIGUOUS']:
|
||||
import ipdb;ipdb.set_trace()
|
||||
p._param_array_.data = self._param_array_[pslice].data
|
||||
p._gradient_array_.data = self._gradient_array_[pslice].data
|
||||
if not p.param_array.flags['C_CONTIGUOUS']:
|
||||
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
|
||||
p.param_array.data = self.param_array[pslice].data
|
||||
p.full_gradient.data = self.full_gradient[pslice].data
|
||||
|
||||
self._param_slices_.append(pslice)
|
||||
|
||||
|
|
@ -898,41 +895,22 @@ class Parameterizable(OptimizationHandlable):
|
|||
self.notify_observers(which=which)
|
||||
|
||||
#===========================================================================
|
||||
# TODO: not working yet
|
||||
# Pickling
|
||||
#===========================================================================
|
||||
def __setstate__(self, state):
|
||||
super(Parameterizable, self).__setstate__(state)
|
||||
self._connect_parameters()
|
||||
self._connect_fixes()
|
||||
self._notify_parent_change()
|
||||
|
||||
self.parameters_changed()
|
||||
|
||||
def copy(self):
|
||||
"""Returns a (deep) copy of the current model"""
|
||||
raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy"
|
||||
import copy
|
||||
from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView
|
||||
from .lists_and_dicts import ArrayList
|
||||
|
||||
dc = dict()
|
||||
for k, v in self.__dict__.iteritems():
|
||||
if k not in ['_parent_', '_parameters_', '_parent_index_', '_observer_callables_'] + self.parameter_names(recursive=False):
|
||||
if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)):
|
||||
dc[k] = v.copy()
|
||||
else:
|
||||
dc[k] = copy.deepcopy(v)
|
||||
if k == '_parameters_':
|
||||
params = [p.copy() for p in v]
|
||||
|
||||
dc['_parent_'] = None
|
||||
dc['_parent_index_'] = None
|
||||
dc['_observer_callables_'] = []
|
||||
dc['_parameters_'] = ArrayList()
|
||||
dc['constraints'].clear()
|
||||
dc['priors'].clear()
|
||||
dc['size'] = 0
|
||||
|
||||
s = self.__new__(self.__class__)
|
||||
s.__dict__ = dc
|
||||
|
||||
for p in params:
|
||||
s.add_parameter(p, _ignore_added_names=True)
|
||||
|
||||
return s
|
||||
|
||||
c = super(Parameterizable, self).copy()
|
||||
c._connect_parameters()
|
||||
c._connect_fixes()
|
||||
c._notify_parent_change()
|
||||
return c
|
||||
#===========================================================================
|
||||
# From being parentable, we have to define the parent_change notification
|
||||
#===========================================================================
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ class ParametersChangedMeta(type):
|
|||
instance.parameters_changed()
|
||||
return instance
|
||||
|
||||
class Parameterized(Parameterizable, Pickleable):
|
||||
class Parameterized(Parameterizable):
|
||||
"""
|
||||
Parameterized class
|
||||
|
||||
|
|
@ -90,7 +90,7 @@ class Parameterized(Parameterizable, Pickleable):
|
|||
child_node = child.build_pydot(G)
|
||||
G.add_edge(pydot.Edge(node, child_node))
|
||||
|
||||
for o in self._observer_callables_.keys():
|
||||
for o in self.observers.keys():
|
||||
label = o.name if hasattr(o, 'name') else str(o)
|
||||
observed_node = pydot.Node(id(o), label=label)
|
||||
G.add_node(observed_node)
|
||||
|
|
@ -101,48 +101,13 @@ class Parameterized(Parameterizable, Pickleable):
|
|||
return G
|
||||
return node
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Get the current state of the class,
|
||||
here just all the indices, rest can get recomputed
|
||||
For inheriting from Parameterized:
|
||||
|
||||
Allways append the state of the inherited object
|
||||
and call down to the inherited object in _setstate!!
|
||||
"""
|
||||
return [
|
||||
self._fixes_,
|
||||
self.priors,
|
||||
self.constraints,
|
||||
self._parameters_,
|
||||
self._name,
|
||||
self._added_names_,
|
||||
]
|
||||
|
||||
def _setstate(self, state):
|
||||
self._added_names_ = state.pop()
|
||||
self._name = state.pop()
|
||||
self._parameters_ = state.pop()
|
||||
self.constraints = state.pop()
|
||||
self.priors = state.pop()
|
||||
self._fixes_ = state.pop()
|
||||
self._connect_parameters()
|
||||
self.parameters_changed()
|
||||
#===========================================================================
|
||||
# Override copy to handle programmatically added observers
|
||||
#===========================================================================
|
||||
def copy(self):
|
||||
c = super(Pickleable, self).copy()
|
||||
c.add_observer(c, c._parameters_changed_notification, -100)
|
||||
return c
|
||||
|
||||
#===========================================================================
|
||||
# Gradient control
|
||||
#===========================================================================
|
||||
def _transform_gradients(self, g):
|
||||
if self.has_parent():
|
||||
return g
|
||||
[numpy.put(g, i, g[i] * c.gradfactor(self._param_array_[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||
[numpy.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||
if self._has_fixes(): return g[self._fixes_]
|
||||
return g
|
||||
|
||||
|
|
@ -206,7 +171,7 @@ class Parameterized(Parameterizable, Pickleable):
|
|||
|
||||
def __getitem__(self, name, paramlist=None):
|
||||
if isinstance(name, (int, slice, tuple, np.ndarray)):
|
||||
return self._param_array_[name]
|
||||
return self.param_array[name]
|
||||
else:
|
||||
if paramlist is None:
|
||||
paramlist = self.grep_param_names(name)
|
||||
|
|
@ -220,9 +185,11 @@ class Parameterized(Parameterizable, Pickleable):
|
|||
return ParamConcatenation(paramlist)
|
||||
|
||||
def __setitem__(self, name, value, paramlist=None):
|
||||
if value is None:
|
||||
return # nothing to do here
|
||||
if isinstance(name, (slice, tuple, np.ndarray)):
|
||||
try:
|
||||
self._param_array_[name] = value
|
||||
self.param_array[name] = value
|
||||
except:
|
||||
raise ValueError, "Setting by slice or index only allowed with array-like"
|
||||
self._trigger_params_changed()
|
||||
|
|
@ -233,7 +200,7 @@ class Parameterized(Parameterizable, Pickleable):
|
|||
|
||||
def __setattr__(self, name, val):
|
||||
# override the default behaviour, if setting a param, so broadcasting can by used
|
||||
if hasattr(self, '_parameters_'):
|
||||
if hasattr(self, "_parameters_"):
|
||||
pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
|
||||
if name in pnames: self._parameters_[pnames.index(name)][:] = val; return
|
||||
object.__setattr__(self, name, val);
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
self.pi = Param('pi', pi, Logistic(1e-10,1.-1e-10))
|
||||
self.variance = Param('variance',variance)
|
||||
self.add_parameters(self.pi)
|
||||
self.group_spike_prob = False
|
||||
|
||||
def KL_divergence(self, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
|
|
@ -55,13 +56,17 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
|
||||
if self.group_spike_prob:
|
||||
gamma_grad = np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2.
|
||||
gamma.gradient -= gamma_grad.mean(axis=0)
|
||||
else:
|
||||
gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2.
|
||||
mu.gradient -= gamma*mu
|
||||
S.gradient -= (1. - (1. / (S))) * gamma /2.
|
||||
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
|
||||
|
||||
class VariationalPosterior(Parameterized):
|
||||
def __init__(self, means=None, variances=None, name=None, *a, **kw):
|
||||
def __init__(self, means=None, variances=None, name='latent space', *a, **kw):
|
||||
super(VariationalPosterior, self).__init__(name=name, *a, **kw)
|
||||
self.mean = Param("mean", means)
|
||||
self.variance = Param("variance", variances, Logexp())
|
||||
|
|
@ -119,6 +124,7 @@ class NormalPosterior(VariationalPosterior):
|
|||
import sys
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ...plotting.matplot_dep import variational_plots
|
||||
import matplotlib
|
||||
return variational_plots.plot(self,*args)
|
||||
|
||||
class SpikeAndSlabPosterior(VariationalPosterior):
|
||||
|
|
|
|||
|
|
@ -106,15 +106,3 @@ class SparseGP(GP):
|
|||
return mu, var
|
||||
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Get the current state of the class,
|
||||
"""
|
||||
return GP._getstate(self) + [
|
||||
self.Z,
|
||||
self.num_inducing]
|
||||
|
||||
def _setstate(self, state):
|
||||
self.num_inducing = state.pop()
|
||||
self.Z = state.pop()
|
||||
GP._setstate(self, state)
|
||||
|
|
|
|||
|
|
@ -89,57 +89,57 @@ class SVIGP(GP):
|
|||
self._param_steplength_trace = []
|
||||
self._vb_steplength_trace = []
|
||||
|
||||
def _getstate(self):
|
||||
steplength_params = [self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength]
|
||||
return GP._getstate(self) + \
|
||||
[self.get_vb_param(),
|
||||
self.Z,
|
||||
self.num_inducing,
|
||||
self.has_uncertain_inputs,
|
||||
self.X_variance,
|
||||
self.X_batch,
|
||||
self.X_variance_batch,
|
||||
steplength_params,
|
||||
self.batchcounter,
|
||||
self.batchsize,
|
||||
self.epochs,
|
||||
self.momentum,
|
||||
self.data_prop,
|
||||
self._param_trace,
|
||||
self._param_steplength_trace,
|
||||
self._vb_steplength_trace,
|
||||
self._ll_trace,
|
||||
self._grad_trace,
|
||||
self.Y,
|
||||
self._permutation,
|
||||
self.iterations
|
||||
]
|
||||
|
||||
def _setstate(self, state):
|
||||
self.iterations = state.pop()
|
||||
self._permutation = state.pop()
|
||||
self.Y = state.pop()
|
||||
self._grad_trace = state.pop()
|
||||
self._ll_trace = state.pop()
|
||||
self._vb_steplength_trace = state.pop()
|
||||
self._param_steplength_trace = state.pop()
|
||||
self._param_trace = state.pop()
|
||||
self.data_prop = state.pop()
|
||||
self.momentum = state.pop()
|
||||
self.epochs = state.pop()
|
||||
self.batchsize = state.pop()
|
||||
self.batchcounter = state.pop()
|
||||
steplength_params = state.pop()
|
||||
(self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength) = steplength_params
|
||||
self.X_variance_batch = state.pop()
|
||||
self.X_batch = state.pop()
|
||||
self.X_variance = state.pop()
|
||||
self.has_uncertain_inputs = state.pop()
|
||||
self.num_inducing = state.pop()
|
||||
self.Z = state.pop()
|
||||
vb_param = state.pop()
|
||||
GP._setstate(self, state)
|
||||
self.set_vb_param(vb_param)
|
||||
# def _getstate(self):
|
||||
# steplength_params = [self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength]
|
||||
# return GP._getstate(self) + \
|
||||
# [self.get_vb_param(),
|
||||
# self.Z,
|
||||
# self.num_inducing,
|
||||
# self.has_uncertain_inputs,
|
||||
# self.X_variance,
|
||||
# self.X_batch,
|
||||
# self.X_variance_batch,
|
||||
# steplength_params,
|
||||
# self.batchcounter,
|
||||
# self.batchsize,
|
||||
# self.epochs,
|
||||
# self.momentum,
|
||||
# self.data_prop,
|
||||
# self._param_trace,
|
||||
# self._param_steplength_trace,
|
||||
# self._vb_steplength_trace,
|
||||
# self._ll_trace,
|
||||
# self._grad_trace,
|
||||
# self.Y,
|
||||
# self._permutation,
|
||||
# self.iterations
|
||||
# ]
|
||||
#
|
||||
# def _setstate(self, state):
|
||||
# self.iterations = state.pop()
|
||||
# self._permutation = state.pop()
|
||||
# self.Y = state.pop()
|
||||
# self._grad_trace = state.pop()
|
||||
# self._ll_trace = state.pop()
|
||||
# self._vb_steplength_trace = state.pop()
|
||||
# self._param_steplength_trace = state.pop()
|
||||
# self._param_trace = state.pop()
|
||||
# self.data_prop = state.pop()
|
||||
# self.momentum = state.pop()
|
||||
# self.epochs = state.pop()
|
||||
# self.batchsize = state.pop()
|
||||
# self.batchcounter = state.pop()
|
||||
# steplength_params = state.pop()
|
||||
# (self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength) = steplength_params
|
||||
# self.X_variance_batch = state.pop()
|
||||
# self.X_batch = state.pop()
|
||||
# self.X_variance = state.pop()
|
||||
# self.has_uncertain_inputs = state.pop()
|
||||
# self.num_inducing = state.pop()
|
||||
# self.Z = state.pop()
|
||||
# vb_param = state.pop()
|
||||
# GP._setstate(self, state)
|
||||
# self.set_vb_param(vb_param)
|
||||
|
||||
def _compute_kernel_matrices(self):
|
||||
# kernel computations, using BGPLVM notation
|
||||
|
|
|
|||
|
|
@ -277,6 +277,8 @@ def bgplvm_simulation(optimize=True, verbose=1,
|
|||
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||
#k = kern.RBF(Q, ARD=True, lengthscale=10.)
|
||||
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
|
||||
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
|
||||
m.likelihood.variance = .1
|
||||
|
||||
if optimize:
|
||||
print "Optimizing model:"
|
||||
|
|
@ -304,7 +306,8 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
|||
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
|
||||
m.inference_method = VarDTCMissingData()
|
||||
m.Y[inan] = _np.nan
|
||||
m.X.variance *= .1
|
||||
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
|
||||
m.likelihood.variance = .01
|
||||
m.parameters_changed()
|
||||
m.Yreal = Y
|
||||
|
||||
|
|
@ -321,18 +324,15 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
|||
def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
||||
from GPy import kern
|
||||
from GPy.models import MRD
|
||||
from GPy.likelihoods import Gaussian
|
||||
|
||||
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
|
||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||
|
||||
#Ylist = [Ylist[0]]
|
||||
k = [kern.Linear(Q, ARD=True) + kern.White(Q, 1e-4) for _ in range(len(Ylist))]
|
||||
m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="", initz='permute', **kw)
|
||||
k = kern.Linear(Q, ARD=True)
|
||||
m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="PCA_concat", initz='permute', **kw)
|
||||
|
||||
m['.*noise'] = [Y.var()/500. for Y in Ylist]
|
||||
#for i, Y in enumerate(Ylist):
|
||||
# m['.*Y_{}.*Gaussian.*noise'.format(i)] = Y.var(1) / 500.
|
||||
m['.*noise'] = [Y.var()/40. for Y in Ylist]
|
||||
|
||||
if optimize:
|
||||
print "Optimizing Model:"
|
||||
|
|
@ -409,12 +409,12 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
|
|||
# optimize
|
||||
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
|
||||
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
||||
if plot and GPy.plotting.matplot_dep.visualize.visual_available:
|
||||
if plot:
|
||||
plt.clf
|
||||
ax = m.plot_latent()
|
||||
y = m.likelihood.Y[0, :]
|
||||
y = m.Y[0, :]
|
||||
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||
vis = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, latent_axes=ax)
|
||||
raw_input('Press enter to finish')
|
||||
|
||||
return m
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ from expectation_propagation_dtc import EPDTC
|
|||
from dtc import DTC
|
||||
from fitc import FITC
|
||||
from var_dtc_parallel import VarDTC_minibatch
|
||||
from var_dtc_gpu import VarDTC_GPU
|
||||
|
||||
# class FullLatentFunctionData(object):
|
||||
#
|
||||
|
|
|
|||
|
|
@ -32,7 +32,8 @@ class ExactGaussianInference(object):
|
|||
return Y
|
||||
else:
|
||||
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
|
||||
raise NotImplementedError, 'TODO' #TODO
|
||||
#print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
|
||||
return Y
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
#
|
||||
#Parts of this file were influenced by the Matlab GPML framework written by
|
||||
|
|
@ -91,7 +91,11 @@ class Laplace(object):
|
|||
iteration = 0
|
||||
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
|
||||
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
|
||||
if np.any(np.isnan(W)):
|
||||
raise ValueError('One or more element(s) of W is NaN')
|
||||
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
|
||||
if np.any(np.isnan(grad)):
|
||||
raise ValueError('One or more element(s) of grad is NaN')
|
||||
|
||||
W_f = W*f
|
||||
|
||||
|
|
@ -141,19 +145,24 @@ class Laplace(object):
|
|||
"""
|
||||
#At this point get the hessian matrix (or vector as W is diagonal)
|
||||
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
|
||||
if np.any(np.isnan(W)):
|
||||
raise ValueError('One or more element(s) of W is NaN')
|
||||
|
||||
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
|
||||
#compute vital matrices
|
||||
C = np.dot(LiW12, K)
|
||||
Ki_W_i = K - C.T.dot(C) #Could this be wrong?
|
||||
Ki_W_i = K - C.T.dot(C)
|
||||
|
||||
#compute the log marginal
|
||||
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
|
||||
|
||||
#Compute vival matrices for derivatives
|
||||
# Compute matrices for derivatives
|
||||
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
|
||||
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9.
|
||||
if np.any(np.isnan(dW_df)):
|
||||
raise ValueError('One or more element(s) of dW_df is NaN')
|
||||
|
||||
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) # s2 in R&W p126 line 9.
|
||||
#BiK, _ = dpotrs(L, K, lower=1)
|
||||
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
|
||||
I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i)
|
||||
|
|
@ -202,12 +211,12 @@ class Laplace(object):
|
|||
def _compute_B_statistics(self, K, W, log_concave):
|
||||
"""
|
||||
Rasmussen suggests the use of a numerically stable positive definite matrix B
|
||||
Which has a positive diagonal element and can be easyily inverted
|
||||
Which has a positive diagonal elements and can be easily inverted
|
||||
|
||||
:param K: Prior Covariance matrix evaluated at locations X
|
||||
:type K: NxN matrix
|
||||
:param W: Negative hessian at a point (diagonal matrix)
|
||||
:type W: Vector of diagonal values of hessian (1xN)
|
||||
:type W: Vector of diagonal values of Hessian (1xN)
|
||||
:returns: (W12BiW12, L_B, Li_W12)
|
||||
"""
|
||||
if not log_concave:
|
||||
|
|
@ -218,7 +227,8 @@ class Laplace(object):
|
|||
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||
# To cause the posterior to become less certain than the prior and likelihood,
|
||||
# This is a property only held by non-log-concave likelihoods
|
||||
|
||||
if np.any(np.isnan(W)):
|
||||
raise ValueError('One or more element(s) of W is NaN')
|
||||
#W is diagonal so its sqrt is just the sqrt of the diagonal elements
|
||||
W_12 = np.sqrt(W)
|
||||
B = np.eye(K.shape[0]) + W_12*K*W_12.T
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ class VarDTC(object):
|
|||
def __init__(self, limit=1):
|
||||
#self._YYTfactor_cache = caching.cache()
|
||||
from ...util.caching import Cacher
|
||||
self.limit = limit
|
||||
self.get_trYYT = Cacher(self._get_trYYT, limit)
|
||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
|
||||
|
||||
|
|
@ -33,6 +34,17 @@ class VarDTC(object):
|
|||
def _get_trYYT(self, Y):
|
||||
return param_to_array(np.sum(np.square(Y)))
|
||||
|
||||
def __getstate__(self):
|
||||
# has to be overridden, as Cacher objects cannot be pickled.
|
||||
return self.limit
|
||||
|
||||
def __setstate__(self, state):
|
||||
# has to be overridden, as Cacher objects cannot be pickled.
|
||||
self.limit = state
|
||||
from ...util.caching import Cacher
|
||||
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
|
||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
|
||||
|
||||
def _get_YYTfactor(self, Y):
|
||||
"""
|
||||
find a matrix L which satisfies LLT = YYT.
|
||||
|
|
@ -179,6 +191,7 @@ class VarDTC(object):
|
|||
return post, log_marginal, grad_dict
|
||||
|
||||
class VarDTCMissingData(object):
|
||||
const_jitter = 1e-6
|
||||
def __init__(self, limit=1):
|
||||
from ...util.caching import Cacher
|
||||
self._Y = Cacher(self._subarray_computations, limit)
|
||||
|
|
@ -250,7 +263,7 @@ class VarDTCMissingData(object):
|
|||
|
||||
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
|
||||
if het_noise: beta = beta_all[ind]
|
||||
else: beta = beta_all[0]
|
||||
else: beta = beta_all
|
||||
|
||||
VVT_factor = (beta*y)
|
||||
VVT_factor_all[v, ind].flat = VVT_factor.flat
|
||||
|
|
@ -311,7 +324,7 @@ class VarDTCMissingData(object):
|
|||
het_noise, uncertain_inputs, LB,
|
||||
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
||||
psi0, psi1, beta,
|
||||
data_fit, num_data, output_dim, trYYT)
|
||||
data_fit, num_data, output_dim, trYYT, Y)
|
||||
|
||||
if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf
|
||||
else:
|
||||
|
|
|
|||
545
GPy/inference/latent_function_inference/var_dtc_gpu.py
Normal file
545
GPy/inference/latent_function_inference/var_dtc_gpu.py
Normal file
|
|
@ -0,0 +1,545 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from posterior import Posterior
|
||||
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs
|
||||
from ...util import diag
|
||||
from ...core.parameterization.variational import VariationalPosterior
|
||||
import numpy as np
|
||||
from ...util.misc import param_to_array
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
from ...util import gpu_init
|
||||
|
||||
try:
|
||||
import scikits.cuda.linalg as culinalg
|
||||
import pycuda.gpuarray as gpuarray
|
||||
from scikits.cuda import cublas
|
||||
from ...util.linalg_gpu import logDiagSum, strideSum, mul_bcast, sum_axis, outer_prod, mul_bcast_first, join_prod
|
||||
except:
|
||||
pass
|
||||
|
||||
class VarDTC_GPU(object):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
|
||||
|
||||
The function self.inference returns a Posterior object, which summarizes
|
||||
the posterior.
|
||||
|
||||
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
|
||||
|
||||
"""
|
||||
const_jitter = np.float64(1e-6)
|
||||
def __init__(self, batchsize=None, gpu_memory=4., limit=1):
|
||||
|
||||
self.batchsize = batchsize
|
||||
self.gpu_memory = gpu_memory
|
||||
|
||||
self.midRes = {}
|
||||
self.batch_pos = 0 # the starting position of the current mini-batch
|
||||
|
||||
self.cublas_handle = gpu_init.cublas_handle
|
||||
|
||||
# Initialize GPU caches
|
||||
self.gpuCache = None
|
||||
|
||||
def _initGPUCache(self, kern, num_inducing, input_dim, output_dim, Y):
|
||||
ndata = Y.shape[0]
|
||||
if self.batchsize==None:
|
||||
self.batchsize = self._estimateBatchSize(kern, ndata, num_inducing, input_dim, output_dim)
|
||||
if self.gpuCache == None:
|
||||
self.gpuCache = {# inference_likelihood
|
||||
'Kmm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'Lm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'ones_gpu' :gpuarray.empty(num_inducing, np.float64,order='F'),
|
||||
'LL_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'b_gpu' :gpuarray.empty((num_inducing,output_dim),np.float64,order='F'),
|
||||
'v_gpu' :gpuarray.empty((num_inducing,output_dim),np.float64,order='F'),
|
||||
'vvt_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'KmmInvPsi2LLInvT_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'KmmInvPsi2P_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'dL_dpsi2R_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'dL_dKmm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'psi1Y_gpu' :gpuarray.empty((num_inducing,output_dim),np.float64,order='F'),
|
||||
'psi2_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'beta_gpu' :gpuarray.empty((ndata,),np.float64,order='F'),
|
||||
'YT_gpu' :gpuarray.to_gpu(np.asfortranarray(Y.T)), # DxN
|
||||
'betaYT_gpu' :gpuarray.empty(Y.T.shape,np.float64,order='F'), # DxN
|
||||
'psi2_t_gpu' :gpuarray.empty((num_inducing*num_inducing*self.batchsize),np.float64,order='F'),
|
||||
# inference_minibatch
|
||||
'dL_dpsi0_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
|
||||
'dL_dpsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
|
||||
'dL_dpsi2_gpu' :gpuarray.empty((self.batchsize,num_inducing,num_inducing),np.float64,order='F'),
|
||||
'dL_dthetaL_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
|
||||
'betapsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
|
||||
'thetaL_t_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
|
||||
'betaYT2_gpu' :gpuarray.empty((output_dim,self.batchsize),np.float64,order='F'),
|
||||
'psi0p_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
|
||||
'psi1p_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
|
||||
'psi2p_gpu' :gpuarray.empty((self.batchsize,num_inducing,num_inducing),np.float64,order='F'),
|
||||
}
|
||||
self.gpuCache['ones_gpu'].fill(1.0)
|
||||
|
||||
YT_gpu = self.gpuCache['YT_gpu']
|
||||
self._trYYT = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, YT_gpu.gpudata, 1, YT_gpu.gpudata, 1)
|
||||
|
||||
def _estimateMemoryOccupation(self, N, M, D):
|
||||
"""
|
||||
Estimate the best batch size.
|
||||
N - the number of total datapoints
|
||||
M - the number of inducing points
|
||||
D - the number of observed (output) dimensions
|
||||
return: the constant memory size, the memory occupation of batchsize=1
|
||||
unit: GB
|
||||
"""
|
||||
return (M+9.*M*M+3*M*D+N+2.*N*D)*8./1024./1024./1024., (4.+3.*M+D+3.*M*M)*8./1024./1024./1024.
|
||||
|
||||
def _estimateBatchSize(self, kern, N, M, Q, D):
|
||||
"""
|
||||
Estimate the best batch size.
|
||||
N - the number of total datapoints
|
||||
M - the number of inducing points
|
||||
D - the number of observed (output) dimensions
|
||||
return: the constant memory size, the memory occupation of batchsize=1
|
||||
unit: GB
|
||||
"""
|
||||
if kern.useGPU:
|
||||
x0,x1 = kern.psicomp.estimateMemoryOccupation(N,M,Q)
|
||||
else:
|
||||
x0, x1 = 0.,0.
|
||||
y0, y1 = self._estimateMemoryOccupation(N, M, D)
|
||||
|
||||
opt_batchsize = min(int((self.gpu_memory-y0-x0)/(x1+y1)), N)
|
||||
|
||||
return opt_batchsize
|
||||
|
||||
def _get_YYTfactor(self, Y):
|
||||
"""
|
||||
find a matrix L which satisfies LLT = YYT.
|
||||
|
||||
Note that L may have fewer columns than Y.
|
||||
"""
|
||||
N, D = Y.shape
|
||||
if (N>=D):
|
||||
return param_to_array(Y)
|
||||
else:
|
||||
return jitchol(tdot(Y))
|
||||
|
||||
def inference_likelihood(self, kern, X, Z, likelihood, Y):
|
||||
"""
|
||||
The first phase of inference:
|
||||
Compute: log-likelihood, dL_dKmm
|
||||
|
||||
Cached intermediate results: Kmm, KmmInv,
|
||||
"""
|
||||
|
||||
num_inducing, input_dim = Z.shape[0], Z.shape[1]
|
||||
num_data, output_dim = Y.shape
|
||||
|
||||
self._initGPUCache(kern, num_inducing, input_dim, output_dim, Y)
|
||||
|
||||
if isinstance(X, VariationalPosterior):
|
||||
uncertain_inputs = True
|
||||
else:
|
||||
uncertain_inputs = False
|
||||
|
||||
#see whether we've got a different noise variance for each datum
|
||||
beta = 1./np.fmax(likelihood.variance, 1e-6)
|
||||
het_noise = beta.size > 1
|
||||
trYYT = self._trYYT
|
||||
|
||||
psi1Y_gpu = self.gpuCache['psi1Y_gpu']
|
||||
psi2_gpu = self.gpuCache['psi2_gpu']
|
||||
beta_gpu = self.gpuCache['beta_gpu']
|
||||
YT_gpu = self.gpuCache['YT_gpu']
|
||||
betaYT_gpu = self.gpuCache['betaYT_gpu']
|
||||
psi2_t_gpu = self.gpuCache['psi2_t_gpu']
|
||||
|
||||
if het_noise:
|
||||
beta_gpu.set(np.asfortranarray(beta))
|
||||
mul_bcast(betaYT_gpu,beta_gpu,YT_gpu,beta_gpu.size)
|
||||
YRY_full = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, betaYT_gpu.gpudata, 1, YT_gpu.gpudata, 1)
|
||||
else:
|
||||
beta_gpu.fill(beta)
|
||||
betaYT_gpu.fill(0.)
|
||||
cublas.cublasDaxpy(self.cublas_handle, betaYT_gpu.size, beta, YT_gpu.gpudata, 1, betaYT_gpu.gpudata, 1)
|
||||
YRY_full = trYYT*beta
|
||||
|
||||
if kern.useGPU:
|
||||
psi1Y_gpu.fill(0.)
|
||||
psi2_gpu.fill(0.)
|
||||
psi0_full = 0
|
||||
|
||||
for n_start in xrange(0,num_data,self.batchsize):
|
||||
n_end = min(self.batchsize+n_start, num_data)
|
||||
ndata = n_end - n_start
|
||||
X_slice = X[n_start:n_end]
|
||||
beta_gpu_slice = beta_gpu[n_start:n_end]
|
||||
betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
|
||||
if ndata==self.batchsize:
|
||||
psi2_t_gpu_slice = psi2_t_gpu
|
||||
else:
|
||||
psi2_t_gpu_slice = psi2_t_gpu[:num_inducing*num_inducing*ndata]
|
||||
if uncertain_inputs:
|
||||
psi0p_gpu = kern.psi0(Z, X_slice)
|
||||
psi1p_gpu = kern.psi1(Z, X_slice)
|
||||
psi2p_gpu = kern.psi2(Z, X_slice)
|
||||
else:
|
||||
psi0p_gpu = kern.Kdiag(X_slice)
|
||||
psi1p_gpu = kern.K(X_slice, Z)
|
||||
|
||||
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', num_inducing, output_dim, ndata, 1.0, psi1p_gpu.gpudata, ndata, betaYT_gpu_slice.gpudata, output_dim, 1.0, psi1Y_gpu.gpudata, num_inducing)
|
||||
|
||||
if het_noise:
|
||||
psi0_full += cublas.cublasDdot(self.cublas_handle, psi0p_gpu.size, beta_gpu_slice.gpudata, 1, psi0p_gpu.gpudata, 1)
|
||||
else:
|
||||
psi0_full += gpuarray.sum(psi0p_gpu).get()
|
||||
|
||||
if uncertain_inputs:
|
||||
if het_noise:
|
||||
mul_bcast(psi2_t_gpu_slice,beta_gpu_slice,psi2p_gpu,beta_gpu_slice.size)
|
||||
sum_axis(psi2_gpu,psi2_t_gpu_slice,1,ndata)
|
||||
else:
|
||||
sum_axis(psi2_gpu,psi2p_gpu,1,ndata)
|
||||
else:
|
||||
if het_noise:
|
||||
psi1_t_gpu = psi2_t_gpu_slice[:,num_inducing*ndata]
|
||||
mul_bcast(psi1_t_gpu,beta_gpu_slice,psi1p_gpu,beta_gpu_slice.size)
|
||||
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, 1.0, psi1p_gpu.gpudata, ndata, psi1_t_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
|
||||
else:
|
||||
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, beta, psi1p_gpu.gpudata, ndata, psi1p_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
|
||||
|
||||
if not het_noise:
|
||||
psi0_full *= beta
|
||||
if uncertain_inputs:
|
||||
cublas.cublasDscal(self.cublas_handle, psi2_gpu.size, beta, psi2_gpu.gpudata, 1)
|
||||
|
||||
else:
|
||||
psi2_full = np.zeros((num_inducing,num_inducing),order='F')
|
||||
psi1Y_full = np.zeros((num_inducing,output_dim),order='F') # MxD
|
||||
psi0_full = 0
|
||||
# YRY_full = 0
|
||||
|
||||
for n_start in xrange(0,num_data,self.batchsize):
|
||||
n_end = min(self.batchsize+n_start, num_data)
|
||||
Y_slice = Y[n_start:n_end]
|
||||
X_slice = X[n_start:n_end]
|
||||
if uncertain_inputs:
|
||||
psi0 = kern.psi0(Z, X_slice)
|
||||
psi1 = kern.psi1(Z, X_slice)
|
||||
psi2 = kern.psi2(Z, X_slice)
|
||||
else:
|
||||
psi0 = kern.Kdiag(X_slice)
|
||||
psi1 = kern.K(X_slice, Z)
|
||||
|
||||
if het_noise:
|
||||
beta_slice = beta[n_start:n_end]
|
||||
psi0_full += (beta_slice*psi0).sum()
|
||||
psi1Y_full += np.dot(psi1.T,beta_slice[:,None]*Y_slice) # MxD
|
||||
# YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
|
||||
else:
|
||||
psi0_full += psi0.sum()
|
||||
psi1Y_full += np.dot(psi1.T,Y_slice) # MxD
|
||||
|
||||
if uncertain_inputs:
|
||||
if het_noise:
|
||||
psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2)
|
||||
else:
|
||||
psi2_full += psi2.sum(axis=0)
|
||||
else:
|
||||
if het_noise:
|
||||
psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1)
|
||||
else:
|
||||
psi2_full += tdot(psi1.T)
|
||||
|
||||
if not het_noise:
|
||||
psi0_full *= beta
|
||||
psi1Y_full *= beta
|
||||
psi2_full *= beta
|
||||
# YRY_full = trYYT*beta
|
||||
|
||||
psi1Y_gpu.set(psi1Y_full)
|
||||
psi2_gpu.set(psi2_full)
|
||||
|
||||
#======================================================================
|
||||
# Compute Common Components
|
||||
#======================================================================
|
||||
|
||||
Kmm = kern.K(Z).copy()
|
||||
Kmm_gpu = self.gpuCache['Kmm_gpu']
|
||||
Kmm_gpu.set(np.asfortranarray(Kmm))
|
||||
diag.add(Kmm, self.const_jitter)
|
||||
ones_gpu = self.gpuCache['ones_gpu']
|
||||
cublas.cublasDaxpy(self.cublas_handle, num_inducing, self.const_jitter, ones_gpu.gpudata, 1, Kmm_gpu.gpudata, num_inducing+1)
|
||||
# assert np.allclose(Kmm, Kmm_gpu.get())
|
||||
|
||||
# Lm = jitchol(Kmm)
|
||||
#
|
||||
Lm_gpu = self.gpuCache['Lm_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, Kmm_gpu.size, Kmm_gpu.gpudata, 1, Lm_gpu.gpudata, 1)
|
||||
culinalg.cho_factor(Lm_gpu,'L')
|
||||
# print np.abs(np.tril(Lm)-np.tril(Lm_gpu.get())).max()
|
||||
|
||||
# Lambda = Kmm+psi2_full
|
||||
# LL = jitchol(Lambda)
|
||||
#
|
||||
Lambda_gpu = self.gpuCache['LL_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, Kmm_gpu.size, Kmm_gpu.gpudata, 1, Lambda_gpu.gpudata, 1)
|
||||
cublas.cublasDaxpy(self.cublas_handle, psi2_gpu.size, np.float64(1.0), psi2_gpu.gpudata, 1, Lambda_gpu.gpudata, 1)
|
||||
LL_gpu = Lambda_gpu
|
||||
culinalg.cho_factor(LL_gpu,'L')
|
||||
# print np.abs(np.tril(LL)-np.tril(LL_gpu.get())).max()
|
||||
|
||||
# b,_ = dtrtrs(LL, psi1Y_full)
|
||||
# bbt_cpu = np.square(b).sum()
|
||||
#
|
||||
b_gpu = self.gpuCache['b_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, b_gpu.size, psi1Y_gpu.gpudata, 1, b_gpu.gpudata, 1)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, output_dim, np.float64(1.0), LL_gpu.gpudata, num_inducing, b_gpu.gpudata, num_inducing)
|
||||
bbt = cublas.cublasDdot(self.cublas_handle, b_gpu.size, b_gpu.gpudata, 1, b_gpu.gpudata, 1)
|
||||
# print np.abs(bbt-bbt_cpu)
|
||||
|
||||
# v,_ = dtrtrs(LL.T,b,lower=False)
|
||||
# vvt = np.einsum('md,od->mo',v,v)
|
||||
# LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
|
||||
#
|
||||
v_gpu = self.gpuCache['v_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, v_gpu.size, b_gpu.gpudata, 1, v_gpu.gpudata, 1)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'T', 'N', num_inducing, output_dim, np.float64(1.0), LL_gpu.gpudata, num_inducing, v_gpu.gpudata, num_inducing)
|
||||
vvt_gpu = self.gpuCache['vvt_gpu']
|
||||
cublas.cublasDgemm(self.cublas_handle, 'N', 'T', num_inducing, num_inducing, output_dim, np.float64(1.0), v_gpu.gpudata, num_inducing, v_gpu.gpudata, num_inducing, np.float64(0.), vvt_gpu.gpudata, num_inducing)
|
||||
LmInvPsi2LmInvT_gpu = self.gpuCache['KmmInvPsi2LLInvT_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, psi2_gpu.size, psi2_gpu.gpudata, 1, LmInvPsi2LmInvT_gpu.gpudata, 1)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing)
|
||||
#tr_LmInvPsi2LmInvT = cublas.cublasDasum(self.cublas_handle, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing+1)
|
||||
tr_LmInvPsi2LmInvT = float(strideSum(LmInvPsi2LmInvT_gpu, num_inducing+1).get())
|
||||
# print np.abs(vvt-vvt_gpu.get()).max()
|
||||
# print np.abs(np.trace(LmInvPsi2LmInvT)-tr_LmInvPsi2LmInvT)
|
||||
|
||||
# Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T
|
||||
# LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0]
|
||||
# KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0]
|
||||
# KmmInvPsi2P = dtrtrs(LL,KmmInvPsi2LLInvT.T, trans=True)[0].T
|
||||
#
|
||||
KmmInvPsi2LLInvT_gpu = LmInvPsi2LmInvT_gpu # Reuse GPU memory (size:MxM)
|
||||
cublas.cublasDcopy(self.cublas_handle, psi2_gpu.size, psi2_gpu.gpudata, 1, KmmInvPsi2LLInvT_gpu.gpudata, 1)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), LL_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing)
|
||||
KmmInvPsi2P_gpu = self.gpuCache['KmmInvPsi2P_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, KmmInvPsi2LLInvT_gpu.size, KmmInvPsi2LLInvT_gpu.gpudata, 1, KmmInvPsi2P_gpu.gpudata, 1)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), LL_gpu.gpudata, num_inducing, KmmInvPsi2P_gpu.gpudata, num_inducing)
|
||||
# print np.abs(KmmInvPsi2P-KmmInvPsi2P_gpu.get()).max()
|
||||
|
||||
# dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2
|
||||
#
|
||||
dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, vvt_gpu.size, vvt_gpu.gpudata, 1, dL_dpsi2R_gpu.gpudata, 1)
|
||||
cublas.cublasDaxpy(self.cublas_handle, KmmInvPsi2P_gpu.size, np.float64(-output_dim), KmmInvPsi2P_gpu.gpudata, 1, dL_dpsi2R_gpu.gpudata, 1)
|
||||
cublas.cublasDscal(self.cublas_handle, dL_dpsi2R_gpu.size, np.float64(-0.5), dL_dpsi2R_gpu.gpudata, 1)
|
||||
# print np.abs(dL_dpsi2R_gpu.get()-dL_dpsi2R).max()
|
||||
|
||||
#======================================================================
|
||||
# Compute log-likelihood
|
||||
#======================================================================
|
||||
if het_noise:
|
||||
logL_R = -np.log(beta).sum()
|
||||
else:
|
||||
logL_R = -num_data*np.log(beta)
|
||||
# logL_old = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
|
||||
|
||||
logdetKmm = float(logDiagSum(Lm_gpu,num_inducing+1).get())
|
||||
logdetLambda = float(logDiagSum(LL_gpu,num_inducing+1).get())
|
||||
logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-tr_LmInvPsi2LmInvT)+YRY_full-bbt)/2.+output_dim*(logdetKmm-logdetLambda)
|
||||
# print np.abs(logL_old - logL)
|
||||
|
||||
#======================================================================
|
||||
# Compute dL_dKmm
|
||||
#======================================================================
|
||||
|
||||
# dL_dKmm = -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2.
|
||||
#
|
||||
dL_dKmm_gpu = self.gpuCache['dL_dKmm_gpu']
|
||||
cublas.cublasDgemm(self.cublas_handle, 'N', 'T', num_inducing, num_inducing, num_inducing, np.float64(1.0), KmmInvPsi2LLInvT_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing, np.float64(0.), dL_dKmm_gpu.gpudata, num_inducing)
|
||||
cublas.cublasDaxpy(self.cublas_handle, dL_dKmm_gpu.size, np.float64(1./output_dim), vvt_gpu.gpudata, 1, dL_dKmm_gpu.gpudata, 1)
|
||||
cublas.cublasDscal(self.cublas_handle, dL_dKmm_gpu.size, np.float64(-output_dim/2.), dL_dKmm_gpu.gpudata, 1)
|
||||
# print np.abs(dL_dKmm - dL_dKmm_gpu.get()).max()
|
||||
|
||||
#======================================================================
|
||||
# Compute the Posterior distribution of inducing points p(u|Y)
|
||||
#======================================================================
|
||||
|
||||
post = Posterior(woodbury_inv=KmmInvPsi2P_gpu.get(), woodbury_vector=v_gpu.get(), K=Kmm_gpu.get(), mean=None, cov=None, K_chol=Lm_gpu.get())
|
||||
|
||||
return logL, dL_dKmm_gpu.get(), post
|
||||
|
||||
def inference_minibatch(self, kern, X, Z, likelihood, Y):
|
||||
"""
|
||||
The second phase of inference: Computing the derivatives over a minibatch of Y
|
||||
Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
|
||||
return a flag showing whether it reached the end of Y (isEnd)
|
||||
"""
|
||||
|
||||
num_data, output_dim = Y.shape
|
||||
num_inducing = Z.shape[0]
|
||||
|
||||
if isinstance(X, VariationalPosterior):
|
||||
uncertain_inputs = True
|
||||
else:
|
||||
uncertain_inputs = False
|
||||
|
||||
beta = 1./np.fmax(likelihood.variance, 1e-6)
|
||||
het_noise = beta.size > 1
|
||||
|
||||
n_start = self.batch_pos
|
||||
n_end = min(self.batchsize+n_start, num_data)
|
||||
if n_end==num_data:
|
||||
isEnd = True
|
||||
self.batch_pos = 0
|
||||
else:
|
||||
isEnd = False
|
||||
self.batch_pos = n_end
|
||||
|
||||
nSlice = n_end-n_start
|
||||
X_slice = X[n_start:n_end]
|
||||
|
||||
if kern.useGPU:
|
||||
if uncertain_inputs:
|
||||
psi0p_gpu = kern.psi0(Z, X_slice)
|
||||
psi1p_gpu = kern.psi1(Z, X_slice)
|
||||
psi2p_gpu = kern.psi2(Z, X_slice)
|
||||
else:
|
||||
psi0p_gpu = kern.Kdiag(X_slice)
|
||||
psi1p_gpu = kern.K(X_slice, Z)
|
||||
psi2p_gpu = self.gpuCache['psi2p_gpu']
|
||||
if psi2p_gpu.shape[0] > nSlice:
|
||||
psi2p_gpu = psi2p_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
|
||||
else:
|
||||
if uncertain_inputs:
|
||||
psi0 = kern.psi0(Z, X_slice)
|
||||
psi1 = kern.psi1(Z, X_slice)
|
||||
psi2 = kern.psi2(Z, X_slice)
|
||||
else:
|
||||
psi0 = kern.Kdiag(X_slice)
|
||||
psi1 = kern.K(X_slice, Z)
|
||||
|
||||
psi0p_gpu = self.gpuCache['psi0p_gpu']
|
||||
psi1p_gpu = self.gpuCache['psi1p_gpu']
|
||||
psi2p_gpu = self.gpuCache['psi2p_gpu']
|
||||
if psi0p_gpu.shape[0] > nSlice:
|
||||
psi0p_gpu = psi0p_gpu[:nSlice]
|
||||
psi1p_gpu = psi1p_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
|
||||
psi2p_gpu = psi2p_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
|
||||
psi0p_gpu.set(np.asfortranarray(psi0))
|
||||
psi1p_gpu.set(np.asfortranarray(psi1))
|
||||
if uncertain_inputs:
|
||||
psi2p_gpu.set(np.asfortranarray(psi2))
|
||||
|
||||
#======================================================================
|
||||
# Prepare gpu memory
|
||||
#======================================================================
|
||||
|
||||
dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
|
||||
v_gpu = self.gpuCache['v_gpu']
|
||||
betaYT_gpu = self.gpuCache['betaYT_gpu']
|
||||
beta_gpu = self.gpuCache['beta_gpu']
|
||||
dL_dpsi0_gpu = self.gpuCache['dL_dpsi0_gpu']
|
||||
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
|
||||
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
|
||||
dL_dthetaL_gpu = self.gpuCache['dL_dthetaL_gpu']
|
||||
psi2R_gpu = self.gpuCache['psi2_t_gpu'][:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
|
||||
betapsi1_gpu = self.gpuCache['betapsi1_gpu']
|
||||
thetaL_t_gpu = self.gpuCache['thetaL_t_gpu']
|
||||
betaYT2_gpu = self.gpuCache['betaYT2_gpu']
|
||||
|
||||
betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
|
||||
beta_gpu_slice = beta_gpu[n_start:n_end]
|
||||
|
||||
# Adjust to the batch size
|
||||
if dL_dpsi0_gpu.shape[0] > nSlice:
|
||||
betaYT2_gpu = betaYT2_gpu[:,:nSlice]
|
||||
dL_dpsi0_gpu = dL_dpsi0_gpu.ravel()[:nSlice]
|
||||
dL_dpsi1_gpu = dL_dpsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
|
||||
dL_dpsi2_gpu = dL_dpsi2_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
|
||||
dL_dthetaL_gpu = dL_dthetaL_gpu.ravel()[:nSlice]
|
||||
psi2R_gpu = psi2R_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
|
||||
thetaL_t_gpu = thetaL_t_gpu.ravel()[:nSlice]
|
||||
betapsi1_gpu = betapsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
|
||||
|
||||
mul_bcast(betapsi1_gpu,beta_gpu_slice,psi1p_gpu,beta_gpu_slice.size)
|
||||
|
||||
#======================================================================
|
||||
# Compute dL_dpsi
|
||||
#======================================================================
|
||||
|
||||
dL_dpsi0_gpu.fill(0.)
|
||||
cublas.cublasDaxpy(self.cublas_handle, dL_dpsi0_gpu.size, output_dim/(-2.), beta_gpu_slice.gpudata, 1, dL_dpsi0_gpu.gpudata, 1)
|
||||
|
||||
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', nSlice, num_inducing, output_dim, 1.0, betaYT_gpu_slice.gpudata, output_dim, v_gpu.gpudata, num_inducing, 0., dL_dpsi1_gpu.gpudata, nSlice)
|
||||
|
||||
if uncertain_inputs:
|
||||
outer_prod(dL_dpsi2_gpu,beta_gpu_slice,dL_dpsi2R_gpu,beta_gpu_slice.size)
|
||||
else:
|
||||
cublas.cublasDgemm(self.cublas_handle, 'N', 'N', nSlice, num_inducing, output_dim, 1.0, betapsi1_gpu.gpudata, nSlice, dL_dpsi2R_gpu.gpudata, num_inducing, 1.0, dL_dpsi1_gpu.gpudata, nSlice)
|
||||
|
||||
#======================================================================
|
||||
# Compute dL_dthetaL
|
||||
#======================================================================
|
||||
|
||||
if not uncertain_inputs:
|
||||
join_prod(psi2p_gpu,psi1p_gpu,psi1p_gpu,nSlice,num_inducing)
|
||||
|
||||
mul_bcast_first(psi2R_gpu,dL_dpsi2R_gpu,psi2p_gpu,nSlice)
|
||||
|
||||
|
||||
dL_dthetaL_gpu.fill(0.)
|
||||
|
||||
cublas.cublasDcopy(self.cublas_handle, betaYT_gpu_slice.size, betaYT_gpu_slice.gpudata, 1, betaYT2_gpu.gpudata, 1)
|
||||
mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT2_gpu,betaYT2_gpu.size)
|
||||
cublas.cublasDscal(self.cublas_handle, betaYT2_gpu.size, 0.5, betaYT2_gpu.gpudata, 1)
|
||||
sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim)
|
||||
|
||||
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/(-2.0), beta_gpu_slice.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
|
||||
cublas.cublasDcopy(self.cublas_handle, beta_gpu_slice.size, beta_gpu_slice.gpudata, 1, thetaL_t_gpu.gpudata, 1)
|
||||
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu.size)
|
||||
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,psi0p_gpu,thetaL_t_gpu.size)
|
||||
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/2.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
|
||||
|
||||
thetaL_t_gpu.fill(0.)
|
||||
sum_axis(thetaL_t_gpu, psi2R_gpu, nSlice, num_inducing*num_inducing)
|
||||
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size)
|
||||
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size)
|
||||
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, -1.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
|
||||
|
||||
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', output_dim, nSlice, num_inducing, -1.0, v_gpu.gpudata, num_inducing, betapsi1_gpu.gpudata, nSlice, 0.0, betaYT2_gpu.gpudata, output_dim)
|
||||
mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT_gpu_slice,betaYT2_gpu.size)
|
||||
sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim)
|
||||
|
||||
if kern.useGPU:
|
||||
dL_dpsi0 = dL_dpsi0_gpu
|
||||
dL_dpsi1 = dL_dpsi1_gpu
|
||||
else:
|
||||
dL_dpsi0 = dL_dpsi0_gpu.get()
|
||||
dL_dpsi1 = dL_dpsi1_gpu.get()
|
||||
if uncertain_inputs:
|
||||
if kern.useGPU:
|
||||
dL_dpsi2 = dL_dpsi2_gpu
|
||||
else:
|
||||
dL_dpsi2 = dL_dpsi2_gpu.get()
|
||||
if het_noise:
|
||||
dL_dthetaL = dL_dthetaL_gpu.get()
|
||||
else:
|
||||
dL_dthetaL = gpuarray.sum(dL_dthetaL_gpu).get()
|
||||
if uncertain_inputs:
|
||||
grad_dict = {'dL_dpsi0':dL_dpsi0,
|
||||
'dL_dpsi1':dL_dpsi1,
|
||||
'dL_dpsi2':dL_dpsi2,
|
||||
'dL_dthetaL':dL_dthetaL}
|
||||
else:
|
||||
grad_dict = {'dL_dKdiag':dL_dpsi0,
|
||||
'dL_dKnm':dL_dpsi1,
|
||||
'dL_dthetaL':dL_dthetaL}
|
||||
|
||||
return isEnd, (n_start,n_end), grad_dict
|
||||
|
||||
|
|
@ -280,3 +280,54 @@ class VarDTC_minibatch(object):
|
|||
|
||||
return isEnd, (n_start,n_end), grad_dict
|
||||
|
||||
|
||||
def update_gradients(model):
|
||||
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y)
|
||||
|
||||
het_noise = model.likelihood.variance.size > 1
|
||||
|
||||
if het_noise:
|
||||
dL_dthetaL = np.empty((model.Y.shape[0],))
|
||||
else:
|
||||
dL_dthetaL = 0
|
||||
|
||||
#gradients w.r.t. kernel
|
||||
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
|
||||
kern_grad = model.kern.gradient.copy()
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z)
|
||||
|
||||
isEnd = False
|
||||
while not isEnd:
|
||||
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y)
|
||||
if isinstance(model.X, VariationalPosterior):
|
||||
X_slice = model.X[n_range[0]:n_range[1]]
|
||||
|
||||
#gradients w.r.t. kernel
|
||||
model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
kern_grad += model.kern.gradient
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations(
|
||||
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
|
||||
|
||||
#gradients w.r.t. posterior parameters of X
|
||||
X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
model.set_X_gradients(X_slice, X_grad)
|
||||
|
||||
if het_noise:
|
||||
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
|
||||
else:
|
||||
dL_dthetaL += grad_dict['dL_dthetaL']
|
||||
|
||||
# Set the gradients w.r.t. kernel
|
||||
model.kern.gradient = kern_grad
|
||||
|
||||
# Update Log-likelihood
|
||||
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
|
||||
# update for the KL divergence
|
||||
model.variational_prior.update_gradients_KL(model.X)
|
||||
|
||||
# dL_dthetaL
|
||||
model.likelihood.update_gradients(dL_dthetaL)
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
from _src.kern import Kern
|
||||
from _src.rbf import RBF
|
||||
from _src.linear import Linear
|
||||
from _src.linear import Linear, LinearFull
|
||||
from _src.static import Bias, White
|
||||
from _src.brownian import Brownian
|
||||
from _src.sympykern import Sympykern
|
||||
from _src.symbolic import Symbolic
|
||||
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
||||
from _src.mlp import MLP
|
||||
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ class Add(CombinationKernel):
|
|||
If a list of parts (of this kernel!) `which_parts` is given, only
|
||||
the parts of the list are taken to compute the covariance.
|
||||
"""
|
||||
assert X.shape[1] > max(np.r_[self.active_dims])
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
elif not isinstance(which_parts, (list, tuple)):
|
||||
|
|
@ -33,7 +32,6 @@ class Add(CombinationKernel):
|
|||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
def Kdiag(self, X, which_parts=None):
|
||||
assert X.shape[1] > max(np.r_[self.active_dims])
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
elif not isinstance(which_parts, (list, tuple)):
|
||||
|
|
@ -160,21 +158,13 @@ class Add(CombinationKernel):
|
|||
target_S += b
|
||||
return target_mu, target_S
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Get the current state of the class,
|
||||
here just all the indices, rest can get recomputed
|
||||
"""
|
||||
return super(Add, self)._getstate()
|
||||
|
||||
def _setstate(self, state):
|
||||
super(Add, self)._setstate(state)
|
||||
|
||||
def add(self, other, name='sum'):
|
||||
if isinstance(other, Add):
|
||||
other_params = other._parameters_[:]
|
||||
for p in other_params:
|
||||
other.remove_parameter(p)
|
||||
self.add_parameters(*other_params)
|
||||
else: self.add_parameter(other)
|
||||
else:
|
||||
self.add_parameter(other)
|
||||
self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts)
|
||||
return self
|
||||
|
|
@ -54,85 +54,93 @@ class IndependentOutputs(CombinationKernel):
|
|||
self.kern = kernels
|
||||
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
|
||||
self.index_dim = index_dim
|
||||
self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0])
|
||||
|
||||
def K(self,X ,X2=None):
|
||||
slices = index_to_slices(X[:,self.index_dim])
|
||||
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||
if X2 is None:
|
||||
target = np.zeros((X.shape[0], X.shape[0]))
|
||||
[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)]
|
||||
[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)]
|
||||
else:
|
||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||
[[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
|
||||
[[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)]
|
||||
return target
|
||||
|
||||
def Kdiag(self,X):
|
||||
slices = index_to_slices(X[:,self.index_dim])
|
||||
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||
target = np.zeros(X.shape[0])
|
||||
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
|
||||
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
||||
return target
|
||||
|
||||
def update_gradients_full(self,dL_dK,X,X2=None):
|
||||
slices = index_to_slices(X[:,self.index_dim])
|
||||
if self.single_kern: target = np.zeros(self.kern.size)
|
||||
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
|
||||
if self.single_kern:
|
||||
target = np.zeros(self.kern.size)
|
||||
kerns = itertools.repeat(self.kern)
|
||||
else:
|
||||
kerns = self.kern
|
||||
target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
|
||||
def collate_grads(kern, i, dL, X, X2):
|
||||
kern.update_gradients_full(dL,X,X2)
|
||||
if self.single_kern: target[:] += kern.gradient
|
||||
else: target[i][:] += kern.gradient
|
||||
if X2 is None:
|
||||
[[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))]
|
||||
[[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(kerns,slices))]
|
||||
else:
|
||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))]
|
||||
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
|
||||
if self.single_kern: kern.gradient = target
|
||||
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
|
||||
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
|
||||
|
||||
def gradients_X(self,dL_dK, X, X2=None):
|
||||
target = np.zeros(X.shape)
|
||||
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||
if X2 is None:
|
||||
# TODO: make use of index_to_slices
|
||||
values = np.unique(X[:,self.index_dim])
|
||||
slices = [X[:,self.index_dim]==i for i in values]
|
||||
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
|
||||
for kern, s in zip(self.kerns, slices)]
|
||||
for kern, s in zip(kerns, slices)]
|
||||
#slices = index_to_slices(X[:,self.index_dim])
|
||||
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
|
||||
# for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
|
||||
# for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
||||
#import ipdb;ipdb.set_trace()
|
||||
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
|
||||
# np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss]))
|
||||
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)]
|
||||
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(kerns, slices)]
|
||||
else:
|
||||
values = np.unique(X[:,self.index_dim])
|
||||
slices = [X[:,self.index_dim]==i for i in values]
|
||||
slices2 = [X2[:,self.index_dim]==i for i in values]
|
||||
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2]))
|
||||
for kern, s, s2 in zip(self.kerns, slices, slices2)]
|
||||
for kern, s, s2 in zip(kerns, slices, slices2)]
|
||||
# TODO: make work with index_to_slices
|
||||
#slices = index_to_slices(X[:,self.index_dim])
|
||||
#slices2 = index_to_slices(X2[:,self.index_dim])
|
||||
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
|
||||
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)]
|
||||
return target
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
slices = index_to_slices(X[:,self.index_dim])
|
||||
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||
target = np.zeros(X.shape)
|
||||
[[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
|
||||
[[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
||||
return target
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
slices = index_to_slices(X[:,self.index_dim])
|
||||
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||
if self.single_kern: target = np.zeros(self.kern.size)
|
||||
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
|
||||
else: target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
|
||||
def collate_grads(kern, i, dL, X):
|
||||
kern.update_gradients_diag(dL,X)
|
||||
if self.single_kern: target[:] += kern.gradient
|
||||
else: target[i][:] += kern.gradient
|
||||
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))]
|
||||
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(kerns, slices))]
|
||||
if self.single_kern: kern.gradient = target
|
||||
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
|
||||
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
|
||||
|
||||
class Hierarchical(CombinationKernel):
|
||||
"""
|
||||
|
|
@ -148,30 +156,30 @@ class Hierarchical(CombinationKernel):
|
|||
def __init__(self, kern, name='hierarchy'):
|
||||
assert all([k.input_dim==kerns[0].input_dim for k in kerns])
|
||||
super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
|
||||
self.kerns = kerns
|
||||
self.add_parameters(self.kerns)
|
||||
kerns = kerns
|
||||
self.add_parameters(kerns)
|
||||
|
||||
def K(self,X ,X2=None):
|
||||
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)]
|
||||
K = self.kerns[0].K(X, X2)
|
||||
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)]
|
||||
K = kerns[0].K(X, X2)
|
||||
if X2 is None:
|
||||
[[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)]
|
||||
[[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)]
|
||||
else:
|
||||
X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
[[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(self.kerns[1:], slices, slices2)]
|
||||
[[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(kerns[1:], slices, slices2)]
|
||||
return target
|
||||
|
||||
def Kdiag(self,X):
|
||||
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)]
|
||||
K = self.kerns[0].K(X, X2)
|
||||
[[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)]
|
||||
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)]
|
||||
K = kerns[0].K(X, X2)
|
||||
[[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)]
|
||||
return target
|
||||
|
||||
def update_gradients_full(self,dL_dK,X,X2=None):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
self.kerns[0].update_gradients_full(dL_dK, X, None)
|
||||
for k, slices_k in zip(self.kerns[1:], slices):
|
||||
kerns[0].update_gradients_full(dL_dK, X, None)
|
||||
for k, slices_k in zip(kerns[1:], slices):
|
||||
target = np.zeros(k.size)
|
||||
def collate_grads(dL, X, X2):
|
||||
k.update_gradients_full(dL,X,X2)
|
||||
|
|
@ -180,8 +188,8 @@ class Hierarchical(CombinationKernel):
|
|||
k._set_gradient(target)
|
||||
else:
|
||||
X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1])
|
||||
self.kerns[0].update_gradients_full(dL_dK, X, None)
|
||||
for k, slices_k in zip(self.kerns[1:], slices):
|
||||
kerns[0].update_gradients_full(dL_dK, X, None)
|
||||
for k, slices_k in zip(kerns[1:], slices):
|
||||
target = np.zeros(k.size)
|
||||
def collate_grads(dL, X, X2):
|
||||
k.update_gradients_full(dL,X,X2)
|
||||
|
|
|
|||
|
|
@ -15,31 +15,57 @@ class Kern(Parameterized):
|
|||
# found in kernel_slice_operations
|
||||
__metaclass__ = KernCallsViaSlicerMeta
|
||||
#===========================================================================
|
||||
_debug=False
|
||||
def __init__(self, input_dim, active_dims, name, *a, **kw):
|
||||
_support_GPU=False
|
||||
def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw):
|
||||
"""
|
||||
The base class for a kernel: a positive definite function
|
||||
which forms of a covariance function (kernel).
|
||||
|
||||
input_dim:
|
||||
|
||||
is the number of dimensions to work on. Make sure to give the
|
||||
tight dimensionality of inputs.
|
||||
You most likely want this to be the integer telling the number of
|
||||
input dimensions of the kernel.
|
||||
If this is not an integer (!) we will work on the whole input matrix X,
|
||||
and not check whether dimensions match or not (!).
|
||||
|
||||
active_dims:
|
||||
|
||||
is the active_dimensions of inputs X we will work on.
|
||||
All kernels will get sliced Xes as inputs, if active_dims is not None
|
||||
if active_dims is None, slicing is switched off and all X will be passed through as given.
|
||||
|
||||
:param int input_dim: the number of input dimensions to the function
|
||||
:param array-like|slice active_dims: list of indices on which dimensions this kernel works on
|
||||
:param array-like|slice|None active_dims: list of indices on which dimensions this kernel works on, or none if no slicing
|
||||
|
||||
Do not instantiate.
|
||||
"""
|
||||
super(Kern, self).__init__(name=name, *a, **kw)
|
||||
self.active_dims = active_dims if active_dims is not None else slice(0, input_dim)
|
||||
try:
|
||||
self.input_dim = int(input_dim)
|
||||
self.active_dims = active_dims# if active_dims is not None else slice(0, input_dim, 1)
|
||||
except TypeError:
|
||||
# input_dim is something else then an integer
|
||||
self.input_dim = input_dim
|
||||
if active_dims is not None:
|
||||
print "WARNING: given input_dim={} is not an integer and active_dims={} is given, switching off slicing"
|
||||
self.active_dims = None
|
||||
|
||||
if self.active_dims is not None and self.input_dim is not None:
|
||||
assert isinstance(self.active_dims, (slice, list, tuple, np.ndarray)), 'active_dims needs to be an array-like or slice object over dimensions, {} given'.format(self.active_dims.__class__)
|
||||
if isinstance(self.active_dims, slice):
|
||||
self.active_dims = slice(self.active_dims.start or 0, self.active_dims.stop or self.input_dim, self.active_dims.step or 1)
|
||||
active_dim_size = int(np.round((self.active_dims.stop-self.active_dims.start)/self.active_dims.step))
|
||||
elif isinstance(self.active_dims, np.ndarray):
|
||||
assert self.active_dims.ndim == 1, 'only flat indices allowed, given active_dims.shape={}, provide only indexes to the dimensions of the input'.format(self.active_dims.shape)
|
||||
#assert np.all(self.active_dims >= 0), 'active dimensions need to be positive. negative indexing is not allowed'
|
||||
assert self.active_dims.ndim == 1, 'only flat indices allowed, given active_dims.shape={}, provide only indexes to the dimensions (columns) of the input'.format(self.active_dims.shape)
|
||||
active_dim_size = self.active_dims.size
|
||||
else:
|
||||
active_dim_size = len(self.active_dims)
|
||||
assert active_dim_size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, active_dim_size, self.active_dims)
|
||||
self._sliced_X = 0
|
||||
self.useGPU = self._support_GPU and useGPU
|
||||
|
||||
@Cache_this(limit=10)
|
||||
def _slice_X(self, X):
|
||||
|
|
@ -175,22 +201,6 @@ class Kern(Parameterized):
|
|||
#else: kernels.append(other)
|
||||
return Prod([self, other], name)
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Get the current state of the class,
|
||||
here just all the indices, rest can get recomputed
|
||||
"""
|
||||
return super(Kern, self)._getstate() + [
|
||||
self.active_dims,
|
||||
self.input_dim,
|
||||
self._sliced_X]
|
||||
|
||||
def _setstate(self, state):
|
||||
self._sliced_X = state.pop()
|
||||
self.input_dim = state.pop()
|
||||
self.active_dims = state.pop()
|
||||
super(Kern, self)._setstate(state)
|
||||
|
||||
class CombinationKernel(Kern):
|
||||
"""
|
||||
Abstract super class for combination kernels.
|
||||
|
|
@ -208,9 +218,7 @@ class CombinationKernel(Kern):
|
|||
:param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on
|
||||
"""
|
||||
assert all([isinstance(k, Kern) for k in kernels])
|
||||
active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
|
||||
input_dim = active_dims.max()+1 + len(extra_dims)
|
||||
active_dims = slice(active_dims.max()+1+len(extra_dims))
|
||||
input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims)
|
||||
# initialize the kernel with the full input_dim
|
||||
super(CombinationKernel, self).__init__(input_dim, active_dims, name)
|
||||
self.extra_dims = extra_dims
|
||||
|
|
@ -220,6 +228,15 @@ class CombinationKernel(Kern):
|
|||
def parts(self):
|
||||
return self._parameters_
|
||||
|
||||
def get_input_dim_active_dims(self, kernels, extra_dims = None):
|
||||
#active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
|
||||
#active_dims = np.array(np.concatenate((active_dims, extra_dims if extra_dims is not None else [])), dtype=int)
|
||||
input_dim = np.array([k.input_dim for k in kernels])
|
||||
if np.all(input_dim[0]==input_dim):
|
||||
input_dim = input_dim[0]
|
||||
active_dims = None
|
||||
return input_dim, active_dims
|
||||
|
||||
def input_sensitivity(self):
|
||||
in_sen = np.zeros((self.num_params, self.input_dim))
|
||||
for i, p in enumerate(self.parts):
|
||||
|
|
|
|||
|
|
@ -5,130 +5,138 @@ Created on 11 Mar 2014
|
|||
'''
|
||||
from ...core.parameterization.parameterized import ParametersChangedMeta
|
||||
import numpy as np
|
||||
from functools import wraps
|
||||
|
||||
def put_clean(dct, name, func):
|
||||
if name in dct:
|
||||
dct['_clean_{}'.format(name)] = dct[name]
|
||||
dct[name] = func(dct[name])
|
||||
|
||||
class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||
def __call__(self, *args, **kw):
|
||||
instance = super(ParametersChangedMeta, self).__call__(*args, **kw)
|
||||
instance.K = _slice_wrapper(instance, instance.K)
|
||||
instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True)
|
||||
instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True)
|
||||
instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True)
|
||||
instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True)
|
||||
instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True)
|
||||
instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False)
|
||||
instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False)
|
||||
instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False)
|
||||
instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True)
|
||||
instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True)
|
||||
instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True)
|
||||
instance.parameters_changed()
|
||||
return instance
|
||||
def __new__(cls, name, bases, dct):
|
||||
put_clean(dct, 'K', _slice_K)
|
||||
put_clean(dct, 'Kdiag', _slice_Kdiag)
|
||||
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
|
||||
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
|
||||
put_clean(dct, 'gradients_X', _slice_gradients_X)
|
||||
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
|
||||
|
||||
def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False):
|
||||
"""
|
||||
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension.
|
||||
The different switches are:
|
||||
diag: if X2 exists
|
||||
derivative: if first arg is dL_dK
|
||||
psi_stat: if first 3 args are dL_dpsi0..2
|
||||
psi_stat_Z: if first 2 args are dL_dpsi1..2
|
||||
"""
|
||||
if derivative:
|
||||
if diag:
|
||||
def x_slice_wrapper(dL_dKdiag, X):
|
||||
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
||||
if ret_X_not_sliced:
|
||||
ret = np.zeros(X.shape)
|
||||
X = kern._slice_X(X) if not kern._sliced_X else X
|
||||
# if the return value is of shape X.shape, we need to make sure to return the right shape
|
||||
kern._sliced_X += 1
|
||||
try:
|
||||
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X)
|
||||
else: ret = operation(dL_dKdiag, X)
|
||||
except:
|
||||
raise
|
||||
finally:
|
||||
kern._sliced_X -= 1
|
||||
put_clean(dct, 'psi0', _slice_psi)
|
||||
put_clean(dct, 'psi1', _slice_psi)
|
||||
put_clean(dct, 'psi2', _slice_psi)
|
||||
put_clean(dct, 'update_gradients_expectations', _slice_update_gradients_expectations)
|
||||
put_clean(dct, 'gradients_Z_expectations', _slice_gradients_Z_expectations)
|
||||
put_clean(dct, 'gradients_qX_expectations', _slice_gradients_qX_expectations)
|
||||
return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
|
||||
|
||||
class _Slice_wrap(object):
|
||||
def __init__(self, k, X, X2=None):
|
||||
self.k = k
|
||||
self.shape = X.shape
|
||||
assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape)
|
||||
if X2 is not None:
|
||||
assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
|
||||
if (self.k.active_dims is not None) and (self.k._sliced_X == 0):
|
||||
assert X.shape[1] >= len(np.r_[self.k.active_dims]), "At least {} dimensional X needed, X.shape={!s}".format(len(np.r_[self.k.active_dims]), X.shape)
|
||||
self.X = self.k._slice_X(X)
|
||||
self.X2 = self.k._slice_X(X2) if X2 is not None else X2
|
||||
self.ret = True
|
||||
else:
|
||||
self.X = X
|
||||
self.X2 = X2
|
||||
self.ret = False
|
||||
def __enter__(self):
|
||||
self.k._sliced_X += 1
|
||||
return self
|
||||
def __exit__(self, *a):
|
||||
self.k._sliced_X -= 1
|
||||
def handle_return_array(self, return_val):
|
||||
if self.ret:
|
||||
ret = np.zeros(self.shape)
|
||||
ret[:, self.k.active_dims] = return_val
|
||||
return ret
|
||||
elif psi_stat:
|
||||
def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
||||
if ret_X_not_sliced:
|
||||
ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
|
||||
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
|
||||
kern._sliced_X += 1
|
||||
# if the return value is of shape X.shape, we need to make sure to return the right shape
|
||||
try:
|
||||
if ret_X_not_sliced:
|
||||
ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior))
|
||||
return return_val
|
||||
|
||||
def _slice_K(f):
|
||||
@wraps(f)
|
||||
def wrap(self, X, X2 = None, *a, **kw):
|
||||
with _Slice_wrap(self, X, X2) as s:
|
||||
ret = f(self, s.X, s.X2, *a, **kw)
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_Kdiag(f):
|
||||
@wraps(f)
|
||||
def wrap(self, X, *a, **kw):
|
||||
with _Slice_wrap(self, X, None) as s:
|
||||
ret = f(self, s.X, *a, **kw)
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_update_gradients_full(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dK, X, X2=None):
|
||||
with _Slice_wrap(self, X, X2) as s:
|
||||
ret = f(self, dL_dK, s.X, s.X2)
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_update_gradients_diag(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dKdiag, X):
|
||||
with _Slice_wrap(self, X, None) as s:
|
||||
ret = f(self, dL_dKdiag, s.X)
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_X(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dK, X, X2=None):
|
||||
with _Slice_wrap(self, X, X2) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_X_diag(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dKdiag, X):
|
||||
with _Slice_wrap(self, X, None) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dKdiag, s.X))
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_psi(f):
|
||||
@wraps(f)
|
||||
def wrap(self, Z, variational_posterior):
|
||||
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||
ret = f(self, s.X, s.X2)
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_update_gradients_expectations(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||
ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2)
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_Z_expectations(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dpsi1, dL_dpsi2, s.X, s.X2))
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_qX_expectations(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
with _Slice_wrap(self, variational_posterior, Z) as s:
|
||||
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X))
|
||||
r2 = ret[:2]
|
||||
ret[0] = ret1
|
||||
ret[1] = ret2
|
||||
ret[0][:, kern.active_dims] = r2[0]
|
||||
ret[1][:, kern.active_dims] = r2[1]
|
||||
ret[0] = s.handle_return_array(r2[0])
|
||||
ret[1] = s.handle_return_array(r2[1])
|
||||
del r2
|
||||
else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
except:
|
||||
raise
|
||||
finally:
|
||||
kern._sliced_X -= 1
|
||||
return ret
|
||||
elif psi_stat_Z:
|
||||
def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
||||
if ret_X_not_sliced: ret = np.zeros(Z.shape)
|
||||
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
|
||||
kern._sliced_X += 1
|
||||
try:
|
||||
if ret_X_not_sliced:
|
||||
ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
except:
|
||||
raise
|
||||
finally:
|
||||
kern._sliced_X -= 1
|
||||
return ret
|
||||
else:
|
||||
def x_slice_wrapper(dL_dK, X, X2=None):
|
||||
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
||||
if ret_X_not_sliced:
|
||||
ret = np.zeros(X.shape)
|
||||
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
|
||||
kern._sliced_X += 1
|
||||
try:
|
||||
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2)
|
||||
else: ret = operation(dL_dK, X, X2)
|
||||
except:
|
||||
raise
|
||||
finally:
|
||||
kern._sliced_X -= 1
|
||||
return ret
|
||||
else:
|
||||
if diag:
|
||||
def x_slice_wrapper(X, *args, **kw):
|
||||
X = kern._slice_X(X) if not kern._sliced_X else X
|
||||
kern._sliced_X += 1
|
||||
try:
|
||||
ret = operation(X, *args, **kw)
|
||||
except:
|
||||
raise
|
||||
finally:
|
||||
kern._sliced_X -= 1
|
||||
return ret
|
||||
else:
|
||||
def x_slice_wrapper(X, X2=None, *args, **kw):
|
||||
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
|
||||
kern._sliced_X += 1
|
||||
try:
|
||||
ret = operation(X, X2, *args, **kw)
|
||||
except: raise
|
||||
finally:
|
||||
kern._sliced_X -= 1
|
||||
return ret
|
||||
x_slice_wrapper._operation = operation
|
||||
x_slice_wrapper.__name__ = ("slicer("+str(operation)
|
||||
+(","+str(bool(diag)) if diag else'')
|
||||
+(','+str(bool(derivative)) if derivative else '')
|
||||
+')')
|
||||
x_slice_wrapper.__doc__ = "**sliced**\n" + (operation.__doc__ or "")
|
||||
return x_slice_wrapper
|
||||
return wrap
|
||||
|
|
|
|||
|
|
@ -313,3 +313,47 @@ class Linear(Kern):
|
|||
|
||||
def input_sensitivity(self):
|
||||
return np.ones(self.input_dim) * self.variances
|
||||
|
||||
class LinearFull(Kern):
|
||||
def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'):
|
||||
super(LinearFull, self).__init__(input_dim, active_dims, name)
|
||||
if W is None:
|
||||
W = np.ones((input_dim, rank))
|
||||
if kappa is None:
|
||||
kappa = np.ones(input_dim)
|
||||
assert W.shape == (input_dim, rank)
|
||||
assert kappa.shape == (input_dim,)
|
||||
|
||||
self.W = Param('W', W)
|
||||
self.kappa = Param('kappa', kappa, Logexp())
|
||||
self.add_parameters(self.W, self.kappa)
|
||||
|
||||
def K(self, X, X2=None):
|
||||
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||
return np.einsum('ij,jk,lk->il', X, P, X if X2 is None else X2)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
self.kappa.gradient = np.einsum('ij,ik,kj->j', X, dL_dK, X if X2 is None else X2)
|
||||
self.W.gradient = np.einsum('ij,kl,ik,lm->jm', X, X if X2 is None else X2, dL_dK, self.W)
|
||||
self.W.gradient += np.einsum('ij,kl,ik,jm->lm', X, X if X2 is None else X2, dL_dK, self.W)
|
||||
|
||||
def Kdiag(self, X):
|
||||
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||
return np.einsum('ij,jk,ik->i', X, P, X)
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
self.kappa.gradient = np.einsum('ij,i->j', np.square(X), dL_dKdiag)
|
||||
self.W.gradient = 2.*np.einsum('ij,ik,jl,i->kl', X, X, self.W, dL_dKdiag)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||
if X2 is None:
|
||||
return 2.*np.einsum('ij,jk,kl->il', dL_dK, X, P)
|
||||
else:
|
||||
return np.einsum('ij,jk,kl->il', dL_dK, X2, P)
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||
return 2.*np.einsum('jk,i,ij->ik', P, dL_dKdiag, X)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ class Prod(CombinationKernel):
|
|||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
def K(self, X, X2=None, which_parts=None):
|
||||
assert X.shape[1] == self.input_dim
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
elif not isinstance(which_parts, (list, tuple)):
|
||||
|
|
@ -33,7 +32,6 @@ class Prod(CombinationKernel):
|
|||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
def Kdiag(self, X, which_parts=None):
|
||||
assert X.shape[1] == self.input_dim
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
|
||||
|
|
@ -58,8 +56,6 @@ class Prod(CombinationKernel):
|
|||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
target = np.zeros(X.shape)
|
||||
for k1,k2 in itertools.combinations(self.parts, 2):
|
||||
target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X)
|
||||
target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X)
|
||||
target += k1.gradients_X_diag(dL_dKdiag*k2.Kdiag(X), X)
|
||||
target += k2.gradients_X_diag(dL_dKdiag*k1.Kdiag(X), X)
|
||||
return target
|
||||
|
||||
|
||||
|
|
|
|||
535
GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py
Normal file
535
GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py
Normal file
|
|
@ -0,0 +1,535 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
"""
|
||||
The package for the psi statistics computation on GPU
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from GPy.util.caching import Cache_this
|
||||
|
||||
from ....util import gpu_init
|
||||
|
||||
try:
|
||||
import pycuda.gpuarray as gpuarray
|
||||
from scikits.cuda import cublas
|
||||
from pycuda.reduction import ReductionKernel
|
||||
from pycuda.elementwise import ElementwiseKernel
|
||||
from ....util import linalg_gpu
|
||||
|
||||
|
||||
# The kernel form computing psi1 het_noise
|
||||
comp_psi1 = ElementwiseKernel(
|
||||
"double *psi1, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q",
|
||||
"psi1[i] = comp_psi1_element(var, l, Z, mu, S, logGamma, log1Gamma, logpsi1denom, N, M, Q, i)",
|
||||
"comp_psi1",
|
||||
preamble="""
|
||||
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
|
||||
#define IDX_NQ(n,q) (q*N+n)
|
||||
#define IDX_MQ(m,q) (q*M+m)
|
||||
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
|
||||
|
||||
__device__ double comp_psi1_element(double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q, int idx)
|
||||
{
|
||||
int n = idx%N;
|
||||
int m = idx/N;
|
||||
double psi1_exp=0;
|
||||
for(int q=0;q<Q;q++){
|
||||
double muZ = mu[IDX_NQ(n,q)]-Z[IDX_MQ(m,q)];
|
||||
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi1denom[IDX_NQ(n,q)] + muZ*muZ/(S[IDX_NQ(n,q)]+l[q]) )/2.0;
|
||||
double exp2 = log1Gamma[IDX_NQ(n,q)] - Z[IDX_MQ(m,q)]*Z[IDX_MQ(m,q)]/(l[q]*2.0);
|
||||
psi1_exp += LOGEXPSUM(exp1,exp2);
|
||||
}
|
||||
return var*exp(psi1_exp);
|
||||
}
|
||||
""")
|
||||
|
||||
# The kernel form computing psi2 het_noise
|
||||
comp_psi2 = ElementwiseKernel(
|
||||
"double *psi2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q",
|
||||
"psi2[i] = comp_psi2_element(var, l, Z, mu, S, logGamma, log1Gamma, logpsi2denom, N, M, Q, i)",
|
||||
"comp_psi2",
|
||||
preamble="""
|
||||
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
|
||||
#define IDX_NQ(n,q) (q*N+n)
|
||||
#define IDX_MQ(m,q) (q*M+m)
|
||||
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
|
||||
|
||||
__device__ double comp_psi2_element(double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q, int idx)
|
||||
{
|
||||
// psi2 (n,m1,m2)
|
||||
int m2 = idx/(M*N);
|
||||
int m1 = (idx%(M*N))/N;
|
||||
int n = idx%N;
|
||||
|
||||
double psi2_exp=0;
|
||||
for(int q=0;q<Q;q++){
|
||||
double dZ = Z[IDX_MQ(m1,q)]-Z[IDX_MQ(m2,q)];
|
||||
double muZ = mu[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)])/2.0;
|
||||
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi2denom[IDX_NQ(n,q)])/2.0 - dZ*dZ/(l[q]*4.0) - muZ*muZ/(2*S[IDX_NQ(n,q)]+l[q]);
|
||||
double exp2 = log1Gamma[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]*Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)]*Z[IDX_MQ(m2,q)])/(l[q]*2.0);
|
||||
psi2_exp += LOGEXPSUM(exp1,exp2);
|
||||
}
|
||||
return var*var*exp(psi2_exp);
|
||||
}
|
||||
""")
|
||||
|
||||
# compute psidenom
|
||||
comp_logpsidenom = ElementwiseKernel(
|
||||
"double *out, double *S, double *l, double scale, int N",
|
||||
"out[i] = comp_logpsidenom_element(S, l, scale, N, i)",
|
||||
"comp_logpsidenom",
|
||||
preamble="""
|
||||
__device__ double comp_logpsidenom_element(double *S, double *l, double scale, int N, int idx)
|
||||
{
|
||||
int q = idx/N;
|
||||
|
||||
return log(scale*S[idx]/l[q]+1.0);
|
||||
}
|
||||
""")
|
||||
|
||||
# The kernel form computing psi1 het_noise
|
||||
comp_dpsi1_dvar = ElementwiseKernel(
|
||||
"double *dpsi1_dvar, double *psi1_neq, double *psi1exp1, double *psi1exp2, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q",
|
||||
"dpsi1_dvar[i] = comp_dpsi1_dvar_element(psi1_neq, psi1exp1, psi1exp2, l, Z, mu, S, logGamma, log1Gamma, logpsi1denom, N, M, Q, i)",
|
||||
"comp_dpsi1_dvar",
|
||||
preamble="""
|
||||
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
|
||||
#define IDX_NQ(n,q) (q*N+n)
|
||||
#define IDX_MQ(m,q) (q*M+m)
|
||||
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
|
||||
|
||||
__device__ double comp_dpsi1_dvar_element(double *psi1_neq, double *psi1exp1, double *psi1exp2, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q, int idx)
|
||||
{
|
||||
int n = idx%N;
|
||||
int m = idx/N;
|
||||
|
||||
double psi1_sum = 0;
|
||||
for(int q=0;q<Q;q++){
|
||||
double muZ = mu[IDX_NQ(n,q)]-Z[IDX_MQ(m,q)];
|
||||
double exp1_e = -(muZ*muZ/(S[IDX_NQ(n,q)]+l[q]) )/2.0;
|
||||
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi1denom[IDX_NQ(n,q)])/2.0 + exp1_e;
|
||||
double exp2_e = - Z[IDX_MQ(m,q)]*Z[IDX_MQ(m,q)]/(l[q]*2.0);
|
||||
double exp2 = log1Gamma[IDX_NQ(n,q)] + exp2_e;
|
||||
double psi1_q = LOGEXPSUM(exp1,exp2);
|
||||
psi1_neq[IDX_NMQ(n,m,q)] = -psi1_q;
|
||||
psi1exp1[IDX_NMQ(n,m,q)] = exp(exp1_e);
|
||||
psi1exp2[IDX_MQ(m,q)] = exp(exp2_e);
|
||||
psi1_sum += psi1_q;
|
||||
}
|
||||
for(int q=0;q<Q;q++) {
|
||||
psi1_neq[IDX_NMQ(n,m,q)] = exp(psi1_neq[IDX_NMQ(n,m,q)]+psi1_sum);
|
||||
}
|
||||
return exp(psi1_sum);
|
||||
}
|
||||
""")
|
||||
|
||||
# The kernel form computing psi1 het_noise
|
||||
comp_psi1_der = ElementwiseKernel(
|
||||
"double *dpsi1_dl, double *dpsi1_dmu, double *dpsi1_dS, double *dpsi1_dgamma, double *dpsi1_dZ, double *psi1_neq, double *psi1exp1, double *psi1exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q",
|
||||
"dpsi1_dl[i] = comp_psi1_der_element(dpsi1_dmu, dpsi1_dS, dpsi1_dgamma, dpsi1_dZ, psi1_neq, psi1exp1, psi1exp2, var, l, Z, mu, S, gamma, N, M, Q, i)",
|
||||
"comp_psi1_der",
|
||||
preamble="""
|
||||
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
|
||||
#define IDX_NQ(n,q) (q*N+n)
|
||||
#define IDX_MQ(m,q) (q*M+m)
|
||||
|
||||
__device__ double comp_psi1_der_element(double *dpsi1_dmu, double *dpsi1_dS, double *dpsi1_dgamma, double *dpsi1_dZ, double *psi1_neq, double *psi1exp1, double *psi1exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q, int idx)
|
||||
{
|
||||
int q = idx/(M*N);
|
||||
int m = (idx%(M*N))/N;
|
||||
int n = idx%N;
|
||||
|
||||
double neq = psi1_neq[IDX_NMQ(n,m,q)];
|
||||
double gamma_c = gamma[IDX_NQ(n,q)];
|
||||
double Z_c = Z[IDX_MQ(m,q)];
|
||||
double S_c = S[IDX_NQ(n,q)];
|
||||
double l_c = l[q];
|
||||
double l_sqrt_c = sqrt(l[q]);
|
||||
double psi1exp1_c = psi1exp1[IDX_NMQ(n,m,q)];
|
||||
double psi1exp2_c = psi1exp2[IDX_MQ(m,q)];
|
||||
|
||||
double denom = S_c/l_c+1.0;
|
||||
double denom_sqrt = sqrt(denom);
|
||||
double Zmu = Z_c-mu[IDX_NQ(n,q)];
|
||||
double psi1_common = gamma_c/(denom_sqrt*denom*l_c);
|
||||
double gamma1 = 1-gamma_c;
|
||||
|
||||
dpsi1_dgamma[IDX_NMQ(n,m,q)] = var*neq*(psi1exp1_c/denom_sqrt - psi1exp2_c);
|
||||
dpsi1_dmu[IDX_NMQ(n,m,q)] = var*neq*(psi1_common*Zmu*psi1exp1_c);
|
||||
dpsi1_dS[IDX_NMQ(n,m,q)] = var*neq*(psi1_common*(Zmu*Zmu/(S_c+l_c)-1.0)*psi1exp1_c)/2.0;
|
||||
dpsi1_dZ[IDX_NMQ(n,m,q)] = var*neq*(-psi1_common*Zmu*psi1exp1_c-gamma1*Z_c/l_c*psi1exp2_c);
|
||||
return var*neq*(psi1_common*(S_c/l_c+Zmu*Zmu/(S_c+l_c))*psi1exp1_c+gamma1*Z_c*Z_c/l_c*psi1exp2_c)*l_sqrt_c;
|
||||
}
|
||||
""")
|
||||
|
||||
# The kernel form computing psi1 het_noise
|
||||
comp_dpsi2_dvar = ElementwiseKernel(
|
||||
"double *dpsi2_dvar, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q",
|
||||
"dpsi2_dvar[i] = comp_dpsi2_dvar_element(psi2_neq, psi2exp1, psi2exp2, var, l, Z, mu, S, logGamma, log1Gamma, logpsi2denom, N, M, Q, i)",
|
||||
"comp_dpsi2_dvar",
|
||||
preamble="""
|
||||
#define IDX_NMMQ(n,m1,m2,q) (((q*M+m2)*M+m1)*N+n)
|
||||
#define IDX_MMQ(m1,m2,q) ((q*M+m2)*M+m1)
|
||||
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
|
||||
#define IDX_NQ(n,q) (q*N+n)
|
||||
#define IDX_MQ(m,q) (q*M+m)
|
||||
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
|
||||
|
||||
__device__ double comp_dpsi2_dvar_element(double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q, int idx)
|
||||
{
|
||||
// psi2 (n,m1,m2)
|
||||
int m2 = idx/(M*N);
|
||||
int m1 = (idx%(M*N))/N;
|
||||
int n = idx%N;
|
||||
|
||||
double psi2_sum=0;
|
||||
for(int q=0;q<Q;q++){
|
||||
double dZ = Z[IDX_MQ(m1,q)]-Z[IDX_MQ(m2,q)];
|
||||
double muZ = mu[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)])/2.0;
|
||||
double exp1_e = - dZ*dZ/(l[q]*4.0) - muZ*muZ/(2*S[IDX_NQ(n,q)]+l[q]);
|
||||
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi2denom[IDX_NQ(n,q)])/2.0 +exp1_e;
|
||||
double exp2_e = - (Z[IDX_MQ(m1,q)]*Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)]*Z[IDX_MQ(m2,q)])/(l[q]*2.0);
|
||||
double exp2 = log1Gamma[IDX_NQ(n,q)] + exp2_e;
|
||||
double psi2_q = LOGEXPSUM(exp1,exp2);
|
||||
psi2_neq[IDX_NMMQ(n,m1,m2,q)] = -psi2_q;
|
||||
psi2exp1[IDX_NMMQ(n,m1,m2,q)] = exp(exp1_e);
|
||||
psi2exp2[IDX_MMQ(m1,m2,q)] = exp(exp2_e);
|
||||
psi2_sum += psi2_q;
|
||||
}
|
||||
for(int q=0;q<Q;q++) {
|
||||
psi2_neq[IDX_NMMQ(n,m1,m2,q)] = exp(psi2_neq[IDX_NMMQ(n,m1,m2,q)]+psi2_sum);
|
||||
}
|
||||
return 2*var*exp(psi2_sum);
|
||||
}
|
||||
""")
|
||||
|
||||
# The kernel form computing psi1 het_noise
|
||||
comp_psi2_der = ElementwiseKernel(
|
||||
"double *dpsi2_dl, double *dpsi2_dmu, double *dpsi2_dS, double *dpsi2_dgamma, double *dpsi2_dZ, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q",
|
||||
"dpsi2_dl[i] = comp_psi2_der_element(dpsi2_dmu, dpsi2_dS, dpsi2_dgamma, dpsi2_dZ, psi2_neq, psi2exp1, psi2exp2, var, l, Z, mu, S, gamma, N, M, Q, i)",
|
||||
"comp_psi2_der",
|
||||
preamble="""
|
||||
#define IDX_NMMQ(n,m1,m2,q) (((q*M+m2)*M+m1)*N+n)
|
||||
#define IDX_MMQ(m1,m2,q) ((q*M+m2)*M+m1)
|
||||
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
|
||||
#define IDX_NQ(n,q) (q*N+n)
|
||||
#define IDX_MQ(m,q) (q*M+m)
|
||||
|
||||
__device__ double comp_psi2_der_element(double *dpsi2_dmu, double *dpsi2_dS, double *dpsi2_dgamma, double *dpsi2_dZ, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q, int idx)
|
||||
{
|
||||
// dpsi2 (n,m1,m2,q)
|
||||
int q = idx/(M*M*N);
|
||||
int m2 = (idx%(M*M*N))/(M*N);
|
||||
int m1 = (idx%(M*N))/N;
|
||||
int n = idx%N;
|
||||
|
||||
double neq = psi2_neq[IDX_NMMQ(n,m1,m2,q)];
|
||||
double gamma_c = gamma[IDX_NQ(n,q)];
|
||||
double Z1_c = Z[IDX_MQ(m1,q)];
|
||||
double Z2_c = Z[IDX_MQ(m2,q)];
|
||||
double S_c = S[IDX_NQ(n,q)];
|
||||
double l_c = l[q];
|
||||
double l_sqrt_c = sqrt(l[q]);
|
||||
double psi2exp1_c = psi2exp1[IDX_NMMQ(n,m1,m2,q)];
|
||||
double psi2exp2_c = psi2exp2[IDX_MMQ(m1,m2,q)];
|
||||
|
||||
double dZ = Z2_c - Z1_c;
|
||||
double muZ = mu[IDX_NQ(n,q)] - (Z1_c+Z2_c)/2.0;
|
||||
double Z2 = Z1_c*Z1_c+Z2_c*Z2_c;
|
||||
double denom = 2.0*S_c/l_c+1.0;
|
||||
double denom_sqrt = sqrt(denom);
|
||||
double psi2_common = gamma_c/(denom_sqrt*denom*l_c);
|
||||
double gamma1 = 1-gamma_c;
|
||||
double var2 = var*var;
|
||||
|
||||
dpsi2_dgamma[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2exp1_c/denom_sqrt - psi2exp2_c);
|
||||
dpsi2_dmu[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(-2.0*psi2_common*muZ*psi2exp1_c);
|
||||
dpsi2_dS[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2_common*(2.0*muZ*muZ/(2.0*S_c+l_c)-1.0)*psi2exp1_c);
|
||||
dpsi2_dZ[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2_common*(dZ*denom/-2.0+muZ)*psi2exp1_c-gamma1*Z2_c/l_c*psi2exp2_c)*2.0;
|
||||
return var2*neq*(psi2_common*(S_c/l_c+dZ*dZ*denom/(4.0*l_c)+muZ*muZ/(2.0*S_c+l_c))*psi2exp1_c+gamma1*Z2/(2.0*l_c)*psi2exp2_c)*l_sqrt_c*2.0;
|
||||
}
|
||||
""")
|
||||
|
||||
except:
|
||||
pass
|
||||
|
||||
class PSICOMP_SSRBF(object):
|
||||
def __init__(self):
|
||||
assert gpu_init.initSuccess, "GPU initialization failed!"
|
||||
self.cublas_handle = gpu_init.cublas_handle
|
||||
self.gpuCache = None
|
||||
self.gpuCacheAll = None
|
||||
|
||||
def _initGPUCache(self, N, M, Q):
|
||||
if self.gpuCache!=None and self.gpuCache['mu_gpu'].shape[0] == N:
|
||||
return
|
||||
|
||||
if self.gpuCacheAll!=None and self.gpuCacheAll['mu_gpu'].shape[0]<N: # Too small cache -> reallocate
|
||||
self._releaseMemory()
|
||||
|
||||
if self.gpuCacheAll == None:
|
||||
self.gpuCacheAll = {
|
||||
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
|
||||
'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
|
||||
'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'logGamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'log1Gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'logpsi1denom_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'logpsi2denom_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'psi0_gpu' :gpuarray.empty((N,),np.float64,order='F'),
|
||||
'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
|
||||
'psi2_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'),
|
||||
# derivatives psi1
|
||||
'psi1_neq_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'psi1exp1_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'psi1exp2_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'dpsi1_dvar_gpu' :gpuarray.empty((N,M),np.float64, order='F'),
|
||||
'dpsi1_dl_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'dpsi1_dZ_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'dpsi1_dgamma_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'dpsi1_dmu_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'dpsi1_dS_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
# derivatives psi2
|
||||
'psi2_neq_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
'psi2exp1_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
'psi2exp2_gpu' :gpuarray.empty((M,M,Q),np.float64, order='F'),
|
||||
'dpsi2_dvar_gpu' :gpuarray.empty((N,M,M),np.float64, order='F'),
|
||||
'dpsi2_dl_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
'dpsi2_dZ_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
'dpsi2_dgamma_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
'dpsi2_dmu_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
'dpsi2_dS_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
# gradients
|
||||
'grad_l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
|
||||
'grad_Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
|
||||
'grad_mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'grad_S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'grad_gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
}
|
||||
self.gpuCache = self.gpuCacheAll
|
||||
elif self.gpuCacheAll['mu_gpu'].shape[0]==N:
|
||||
self.gpuCache = self.gpuCacheAll
|
||||
else:
|
||||
# remap to a smaller cache
|
||||
self.gpuCache = self.gpuCacheAll.copy()
|
||||
Nlist=['mu_gpu','S_gpu','gamma_gpu','logGamma_gpu','log1Gamma_gpu','logpsi1denom_gpu','logpsi2denom_gpu','psi0_gpu','psi1_gpu','psi2_gpu',
|
||||
'psi1_neq_gpu','psi1exp1_gpu','psi1exp2_gpu','dpsi1_dvar_gpu','dpsi1_dl_gpu','dpsi1_dZ_gpu','dpsi1_dgamma_gpu','dpsi1_dmu_gpu',
|
||||
'dpsi1_dS_gpu','psi2_neq_gpu','psi2exp1_gpu','dpsi2_dvar_gpu','dpsi2_dl_gpu','dpsi2_dZ_gpu','dpsi2_dgamma_gpu','dpsi2_dmu_gpu','dpsi2_dS_gpu','grad_mu_gpu','grad_S_gpu','grad_gamma_gpu',]
|
||||
oldN = self.gpuCacheAll['mu_gpu'].shape[0]
|
||||
for v in Nlist:
|
||||
u = self.gpuCacheAll[v]
|
||||
self.gpuCache[v] = u.ravel()[:u.size/oldN*N].reshape(*((N,)+u.shape[1:]))
|
||||
|
||||
def _releaseMemory(self):
|
||||
if self.gpuCacheAll!=None:
|
||||
[v.gpudata.free() for v in self.gpuCacheAll.values()]
|
||||
self.gpuCacheAll = None
|
||||
self.gpuCache = None
|
||||
|
||||
def estimateMemoryOccupation(self, N, M, Q):
|
||||
"""
|
||||
Estimate the best batch size.
|
||||
N - the number of total datapoints
|
||||
M - the number of inducing points
|
||||
Q - the number of hidden (input) dimensions
|
||||
return: the constant memory size, the memory occupation of batchsize=1
|
||||
unit: GB
|
||||
"""
|
||||
return (2.*Q+2.*M*Q+M*M*Q)*8./1024./1024./1024., (1.+2.*M+10.*Q+2.*M*M+8.*M*Q+7.*M*M*Q)*8./1024./1024./1024.
|
||||
|
||||
@Cache_this(limit=1,ignore_args=(0,))
|
||||
def psicomputations(self, variance, lengthscale, Z, mu, S, gamma):
|
||||
"""Compute Psi statitsitcs"""
|
||||
if isinstance(lengthscale, np.ndarray) and len(lengthscale)>1:
|
||||
ARD = True
|
||||
else:
|
||||
ARD = False
|
||||
|
||||
N = mu.shape[0]
|
||||
M = Z.shape[0]
|
||||
Q = mu.shape[1]
|
||||
|
||||
self._initGPUCache(N,M,Q)
|
||||
l_gpu = self.gpuCache['l_gpu']
|
||||
Z_gpu = self.gpuCache['Z_gpu']
|
||||
mu_gpu = self.gpuCache['mu_gpu']
|
||||
S_gpu = self.gpuCache['S_gpu']
|
||||
gamma_gpu = self.gpuCache['gamma_gpu']
|
||||
logGamma_gpu = self.gpuCache['logGamma_gpu']
|
||||
log1Gamma_gpu = self.gpuCache['log1Gamma_gpu']
|
||||
logpsi1denom_gpu = self.gpuCache['logpsi1denom_gpu']
|
||||
logpsi2denom_gpu = self.gpuCache['logpsi2denom_gpu']
|
||||
psi0_gpu = self.gpuCache['psi0_gpu']
|
||||
psi1_gpu = self.gpuCache['psi1_gpu']
|
||||
psi2_gpu = self.gpuCache['psi2_gpu']
|
||||
|
||||
if ARD:
|
||||
l_gpu.set(np.asfortranarray(lengthscale**2))
|
||||
else:
|
||||
l_gpu.fill(lengthscale*lengthscale)
|
||||
Z_gpu.set(np.asfortranarray(Z))
|
||||
mu_gpu.set(np.asfortranarray(mu))
|
||||
S_gpu.set(np.asfortranarray(S))
|
||||
gamma_gpu.set(np.asfortranarray(gamma))
|
||||
linalg_gpu.log(gamma_gpu,logGamma_gpu)
|
||||
linalg_gpu.logOne(gamma_gpu,log1Gamma_gpu)
|
||||
comp_logpsidenom(logpsi1denom_gpu, S_gpu,l_gpu,1.0,N)
|
||||
comp_logpsidenom(logpsi2denom_gpu, S_gpu,l_gpu,2.0,N)
|
||||
|
||||
psi0_gpu.fill(variance)
|
||||
comp_psi1(psi1_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi1denom_gpu, N, M, Q)
|
||||
comp_psi2(psi2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q)
|
||||
|
||||
return psi0_gpu, psi1_gpu, psi2_gpu
|
||||
|
||||
@Cache_this(limit=1,ignore_args=(0,))
|
||||
def _psiDercomputations(self, variance, lengthscale, Z, mu, S, gamma):
|
||||
"""Compute the derivatives w.r.t. Psi statistics"""
|
||||
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
|
||||
|
||||
self._initGPUCache(N,M,Q)
|
||||
l_gpu = self.gpuCache['l_gpu']
|
||||
Z_gpu = self.gpuCache['Z_gpu']
|
||||
mu_gpu = self.gpuCache['mu_gpu']
|
||||
S_gpu = self.gpuCache['S_gpu']
|
||||
gamma_gpu = self.gpuCache['gamma_gpu']
|
||||
logGamma_gpu = self.gpuCache['logGamma_gpu']
|
||||
log1Gamma_gpu = self.gpuCache['log1Gamma_gpu']
|
||||
logpsi1denom_gpu = self.gpuCache['logpsi1denom_gpu']
|
||||
logpsi2denom_gpu = self.gpuCache['logpsi2denom_gpu']
|
||||
|
||||
psi1_neq_gpu = self.gpuCache['psi1_neq_gpu']
|
||||
psi1exp1_gpu = self.gpuCache['psi1exp1_gpu']
|
||||
psi1exp2_gpu = self.gpuCache['psi1exp2_gpu']
|
||||
dpsi1_dvar_gpu = self.gpuCache['dpsi1_dvar_gpu']
|
||||
dpsi1_dl_gpu = self.gpuCache['dpsi1_dl_gpu']
|
||||
dpsi1_dZ_gpu = self.gpuCache['dpsi1_dZ_gpu']
|
||||
dpsi1_dgamma_gpu = self.gpuCache['dpsi1_dgamma_gpu']
|
||||
dpsi1_dmu_gpu = self.gpuCache['dpsi1_dmu_gpu']
|
||||
dpsi1_dS_gpu = self.gpuCache['dpsi1_dS_gpu']
|
||||
|
||||
psi2_neq_gpu = self.gpuCache['psi2_neq_gpu']
|
||||
psi2exp1_gpu = self.gpuCache['psi2exp1_gpu']
|
||||
psi2exp2_gpu = self.gpuCache['psi2exp2_gpu']
|
||||
dpsi2_dvar_gpu = self.gpuCache['dpsi2_dvar_gpu']
|
||||
dpsi2_dl_gpu = self.gpuCache['dpsi2_dl_gpu']
|
||||
dpsi2_dZ_gpu = self.gpuCache['dpsi2_dZ_gpu']
|
||||
dpsi2_dgamma_gpu = self.gpuCache['dpsi2_dgamma_gpu']
|
||||
dpsi2_dmu_gpu = self.gpuCache['dpsi2_dmu_gpu']
|
||||
dpsi2_dS_gpu = self.gpuCache['dpsi2_dS_gpu']
|
||||
|
||||
#==========================================================================================================
|
||||
# Assuming the l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, logGamma_gpu, log1Gamma_gpu,
|
||||
# logpsi1denom_gpu, logpsi2denom_gpu has been synchonized.
|
||||
#==========================================================================================================
|
||||
|
||||
# psi1 derivatives
|
||||
comp_dpsi1_dvar(dpsi1_dvar_gpu, psi1_neq_gpu, psi1exp1_gpu,psi1exp2_gpu, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi1denom_gpu, N, M, Q)
|
||||
comp_psi1_der(dpsi1_dl_gpu,dpsi1_dmu_gpu,dpsi1_dS_gpu,dpsi1_dgamma_gpu, dpsi1_dZ_gpu, psi1_neq_gpu,psi1exp1_gpu,psi1exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q)
|
||||
|
||||
# psi2 derivatives
|
||||
comp_dpsi2_dvar(dpsi2_dvar_gpu, psi2_neq_gpu, psi2exp1_gpu,psi2exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q)
|
||||
comp_psi2_der(dpsi2_dl_gpu,dpsi2_dmu_gpu,dpsi2_dS_gpu,dpsi2_dgamma_gpu, dpsi2_dZ_gpu, psi2_neq_gpu,psi2exp1_gpu,psi2exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q)
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma)
|
||||
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
|
||||
|
||||
if isinstance(lengthscale, np.ndarray) and len(lengthscale)>1:
|
||||
ARD = True
|
||||
else:
|
||||
ARD = False
|
||||
|
||||
dpsi1_dvar_gpu = self.gpuCache['dpsi1_dvar_gpu']
|
||||
dpsi2_dvar_gpu = self.gpuCache['dpsi2_dvar_gpu']
|
||||
dpsi1_dl_gpu = self.gpuCache['dpsi1_dl_gpu']
|
||||
dpsi2_dl_gpu = self.gpuCache['dpsi2_dl_gpu']
|
||||
psi1_comb_gpu = self.gpuCache['psi1_neq_gpu']
|
||||
psi2_comb_gpu = self.gpuCache['psi2_neq_gpu']
|
||||
grad_l_gpu = self.gpuCache['grad_l_gpu']
|
||||
|
||||
# variance
|
||||
variance.gradient = gpuarray.sum(dL_dpsi0).get() \
|
||||
+ cublas.cublasDdot(self.cublas_handle, dL_dpsi1.size, dL_dpsi1.gpudata, 1, dpsi1_dvar_gpu.gpudata, 1) \
|
||||
+ cublas.cublasDdot(self.cublas_handle, dL_dpsi2.size, dL_dpsi2.gpudata, 1, dpsi2_dvar_gpu.gpudata, 1)
|
||||
|
||||
# lengscale
|
||||
if ARD:
|
||||
grad_l_gpu.fill(0.)
|
||||
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dl_gpu, dL_dpsi1.size)
|
||||
linalg_gpu.sum_axis(grad_l_gpu, psi1_comb_gpu, 1, N*M)
|
||||
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dl_gpu, dL_dpsi2.size)
|
||||
linalg_gpu.sum_axis(grad_l_gpu, psi2_comb_gpu, 1, N*M*M)
|
||||
lengthscale.gradient = grad_l_gpu.get()
|
||||
else:
|
||||
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dl_gpu, dL_dpsi1.size)
|
||||
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dl_gpu, dL_dpsi2.size)
|
||||
lengthscale.gradient = gpuarray.sum(psi1_comb_gpu).get() + gpuarray.sum(psi2_comb_gpu).get()
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma)
|
||||
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
|
||||
|
||||
dpsi1_dZ_gpu = self.gpuCache['dpsi1_dZ_gpu']
|
||||
dpsi2_dZ_gpu = self.gpuCache['dpsi2_dZ_gpu']
|
||||
psi1_comb_gpu = self.gpuCache['psi1_neq_gpu']
|
||||
psi2_comb_gpu = self.gpuCache['psi2_neq_gpu']
|
||||
grad_Z_gpu = self.gpuCache['grad_Z_gpu']
|
||||
|
||||
grad_Z_gpu.fill(0.)
|
||||
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dZ_gpu, dL_dpsi1.size)
|
||||
linalg_gpu.sum_axis(grad_Z_gpu, psi1_comb_gpu, 1, N)
|
||||
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dZ_gpu, dL_dpsi2.size)
|
||||
linalg_gpu.sum_axis(grad_Z_gpu, psi2_comb_gpu, 1, N*M)
|
||||
return grad_Z_gpu.get()
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma)
|
||||
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
|
||||
|
||||
dpsi1_dmu_gpu = self.gpuCache['dpsi1_dmu_gpu']
|
||||
dpsi2_dmu_gpu = self.gpuCache['dpsi2_dmu_gpu']
|
||||
dpsi1_dS_gpu = self.gpuCache['dpsi1_dS_gpu']
|
||||
dpsi2_dS_gpu = self.gpuCache['dpsi2_dS_gpu']
|
||||
dpsi1_dgamma_gpu = self.gpuCache['dpsi1_dgamma_gpu']
|
||||
dpsi2_dgamma_gpu = self.gpuCache['dpsi2_dgamma_gpu']
|
||||
psi1_comb_gpu = self.gpuCache['psi1_neq_gpu']
|
||||
psi2_comb_gpu = self.gpuCache['psi2_neq_gpu']
|
||||
grad_mu_gpu = self.gpuCache['grad_mu_gpu']
|
||||
grad_S_gpu = self.gpuCache['grad_S_gpu']
|
||||
grad_gamma_gpu = self.gpuCache['grad_gamma_gpu']
|
||||
|
||||
# mu gradients
|
||||
grad_mu_gpu.fill(0.)
|
||||
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dmu_gpu, dL_dpsi1.size)
|
||||
linalg_gpu.sum_axis(grad_mu_gpu, psi1_comb_gpu, N, M)
|
||||
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dmu_gpu, dL_dpsi2.size)
|
||||
linalg_gpu.sum_axis(grad_mu_gpu, psi2_comb_gpu, N, M*M)
|
||||
|
||||
# S gradients
|
||||
grad_S_gpu.fill(0.)
|
||||
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dS_gpu, dL_dpsi1.size)
|
||||
linalg_gpu.sum_axis(grad_S_gpu, psi1_comb_gpu, N, M)
|
||||
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dS_gpu, dL_dpsi2.size)
|
||||
linalg_gpu.sum_axis(grad_S_gpu, psi2_comb_gpu, N, M*M)
|
||||
|
||||
# gamma gradients
|
||||
grad_gamma_gpu.fill(0.)
|
||||
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dgamma_gpu, dL_dpsi1.size)
|
||||
linalg_gpu.sum_axis(grad_gamma_gpu, psi1_comb_gpu, N, M)
|
||||
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dgamma_gpu, dL_dpsi2.size)
|
||||
linalg_gpu.sum_axis(grad_gamma_gpu, psi2_comb_gpu, N, M*M)
|
||||
|
||||
return grad_mu_gpu.get(), grad_S_gpu.get(), grad_gamma_gpu.get()
|
||||
|
|
@ -9,6 +9,7 @@ from stationary import Stationary
|
|||
from GPy.util.caching import Cache_this
|
||||
from ...core.parameterization import variational
|
||||
from psi_comp import ssrbf_psi_comp
|
||||
from psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF
|
||||
|
||||
class RBF(Stationary):
|
||||
"""
|
||||
|
|
@ -19,9 +20,15 @@ class RBF(Stationary):
|
|||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
||||
|
||||
"""
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf'):
|
||||
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
_support_GPU = True
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
|
||||
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
|
||||
self.weave_options = {}
|
||||
self.group_spike_prob = False
|
||||
|
||||
if self.useGPU:
|
||||
self.psicomp = PSICOMP_SSRBF()
|
||||
|
||||
|
||||
def K_of_r(self, r):
|
||||
return self.variance * np.exp(-0.5 * r**2)
|
||||
|
|
@ -34,10 +41,17 @@ class RBF(Stationary):
|
|||
#---------------------------------------#
|
||||
|
||||
def psi0(self, Z, variational_posterior):
|
||||
if self.useGPU:
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
|
||||
else:
|
||||
return self.Kdiag(variational_posterior.mean)
|
||||
|
||||
def psi1(self, Z, variational_posterior):
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
if self.useGPU:
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
|
||||
else:
|
||||
psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
else:
|
||||
_, _, _, psi1 = self._psi1computations(Z, variational_posterior)
|
||||
|
|
@ -45,6 +59,9 @@ class RBF(Stationary):
|
|||
|
||||
def psi2(self, Z, variational_posterior):
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
if self.useGPU:
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
|
||||
else:
|
||||
psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
else:
|
||||
_, _, _, _, psi2 = self._psi2computations(Z, variational_posterior)
|
||||
|
|
@ -53,6 +70,10 @@ class RBF(Stationary):
|
|||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
# Spike-and-Slab GPLVM
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
if self.useGPU:
|
||||
self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
|
||||
_, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
_, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
|
|
@ -80,8 +101,6 @@ class RBF(Stationary):
|
|||
|
||||
#contributions from psi0:
|
||||
self.variance.gradient = np.sum(dL_dpsi0)
|
||||
if self._debug:
|
||||
num_grad = self.lengthscale.gradient.copy()
|
||||
self.lengthscale.gradient = 0.
|
||||
|
||||
#from psi1
|
||||
|
|
@ -101,8 +120,6 @@ class RBF(Stationary):
|
|||
else:
|
||||
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
|
||||
|
||||
if self._debug:
|
||||
import ipdb;ipdb.set_trace()
|
||||
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
|
||||
|
||||
else:
|
||||
|
|
@ -111,6 +128,9 @@ class RBF(Stationary):
|
|||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
# Spike-and-Slab GPLVM
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
if self.useGPU:
|
||||
return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
_, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
|
|
@ -144,6 +164,9 @@ class RBF(Stationary):
|
|||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
# Spike-and-Slab GPLVM
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
if self.useGPU:
|
||||
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
ndata = variational_posterior.mean.shape[0]
|
||||
|
||||
_, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
|
@ -159,6 +182,9 @@ class RBF(Stationary):
|
|||
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
|
||||
if self.group_spike_prob:
|
||||
grad_gamma[:] = grad_gamma.mean(axis=0)
|
||||
|
||||
return grad_mu, grad_S, grad_gamma
|
||||
|
||||
elif isinstance(variational_posterior, variational.NormalPosterior):
|
||||
|
|
|
|||
|
|
@ -41,8 +41,8 @@ class Stationary(Kern):
|
|||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name):
|
||||
super(Stationary, self).__init__(input_dim, active_dims, name)
|
||||
def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=False):
|
||||
super(Stationary, self).__init__(input_dim, active_dims, name,useGPU=useGPU)
|
||||
self.ARD = ARD
|
||||
if not ARD:
|
||||
if lengthscale is None:
|
||||
|
|
@ -139,7 +139,7 @@ class Stationary(Kern):
|
|||
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
|
||||
tmp = dL_dr*self._inv_dist(X, X2)
|
||||
if X2 is None: X2 = X
|
||||
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(self._slice_X(X)[:,q:q+1] - self._slice_X(X2)[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
|
||||
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
|
||||
else:
|
||||
r = self._scaled_dist(X, X2)
|
||||
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
|
||||
|
|
|
|||
|
|
@ -11,9 +11,9 @@ from kern import Kern
|
|||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
|
||||
class Sympykern(Kern):
|
||||
class Symbolic(Kern):
|
||||
"""
|
||||
A kernel object, where all the hard work in done by sympy.
|
||||
A kernel object, where all the hard work is done by sympy.
|
||||
|
||||
:param k: the covariance function
|
||||
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
|
||||
|
|
@ -26,10 +26,8 @@ class Sympykern(Kern):
|
|||
- to handle multiple inputs, call them x_1, z_1, etc
|
||||
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
|
||||
"""
|
||||
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None, active_dims=None):
|
||||
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None):
|
||||
|
||||
if name is None:
|
||||
name='sympykern'
|
||||
if k is None:
|
||||
raise ValueError, "You must provide an argument for the covariance function."
|
||||
super(Sympykern, self).__init__(input_dim, active_dims, name)
|
||||
|
|
@ -60,7 +58,6 @@ class Sympykern(Kern):
|
|||
# extract parameter names from the covariance
|
||||
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
|
||||
|
||||
|
||||
# Look for parameters with index (subscripts), they are associated with different outputs.
|
||||
if self.output_dim>1:
|
||||
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
|
||||
|
|
@ -97,8 +94,8 @@ class Sympykern(Kern):
|
|||
val = 1.0
|
||||
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
|
||||
if param is not None:
|
||||
if param.has_key(theta):
|
||||
val = param[theta]
|
||||
if param.has_key(theta.name):
|
||||
val = param[theta.name]
|
||||
setattr(self, theta.name, Param(theta.name, val, None))
|
||||
self.add_parameters(getattr(self, theta.name))
|
||||
|
||||
|
|
@ -117,6 +114,12 @@ class Sympykern(Kern):
|
|||
self.arg_list += self._sp_theta_i + self._sp_theta_j
|
||||
self.diag_arg_list += self._sp_theta_i
|
||||
|
||||
# Check if there are additional linear operators on the covariance.
|
||||
self._sp_operators = operators
|
||||
# TODO: Deal with linear operators
|
||||
#if self._sp_operators:
|
||||
# for operator in self._sp_operators:
|
||||
|
||||
# psi_stats aren't yet implemented.
|
||||
if False:
|
||||
self.compute_psi_stats()
|
||||
|
|
@ -254,3 +257,176 @@ class Sympykern(Kern):
|
|||
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
|
||||
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
|
||||
|
||||
if False:
|
||||
class Symcombine(CombinationKernel):
|
||||
"""
|
||||
Combine list of given sympy covariances together with the provided operations.
|
||||
"""
|
||||
def __init__(self, subkerns, operations, name='sympy_combine'):
|
||||
super(Symcombine, self).__init__(subkerns, name)
|
||||
for subkern, operation in zip(subkerns, operations):
|
||||
self._sp_k += self._k_double_operate(subkern._sp_k, operation)
|
||||
|
||||
#def _double_operate(self, k, operation):
|
||||
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
def K(self, X, X2=None, which_parts=None):
|
||||
"""
|
||||
Combine covariances with a linear operator.
|
||||
"""
|
||||
assert X.shape[1] == self.input_dim
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
elif not isinstance(which_parts, (list, tuple)):
|
||||
# if only one part is given
|
||||
which_parts = [which_parts]
|
||||
return reduce(np.add, (p.K(X, X2) for p in which_parts))
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
def Kdiag(self, X, which_parts=None):
|
||||
assert X.shape[1] == self.input_dim
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
elif not isinstance(which_parts, (list, tuple)):
|
||||
# if only one part is given
|
||||
which_parts = [which_parts]
|
||||
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
|
||||
|
||||
def update_gradients_diag(self, dL_dK, X):
|
||||
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
"""Compute the gradient of the objective function with respect to X.
|
||||
|
||||
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
|
||||
:type dL_dK: np.ndarray (num_samples x num_inducing)
|
||||
:param X: Observed data inputs
|
||||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||
|
||||
target = np.zeros(X.shape)
|
||||
[target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
|
||||
return target
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
target = np.zeros(X.shape)
|
||||
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
|
||||
return target
|
||||
|
||||
def psi0(self, Z, variational_posterior):
|
||||
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
|
||||
|
||||
def psi1(self, Z, variational_posterior):
|
||||
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
|
||||
|
||||
def psi2(self, Z, variational_posterior):
|
||||
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
|
||||
#return psi2
|
||||
# compute the "cross" terms
|
||||
from static import White, Bias
|
||||
from rbf import RBF
|
||||
#from rbf_inv import RBFInv
|
||||
from linear import Linear
|
||||
#ffrom fixed import Fixed
|
||||
|
||||
for p1, p2 in itertools.combinations(self.parts, 2):
|
||||
# i1, i2 = p1.active_dims, p2.active_dims
|
||||
# white doesn;t combine with anything
|
||||
if isinstance(p1, White) or isinstance(p2, White):
|
||||
pass
|
||||
# rbf X bias
|
||||
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
|
||||
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
|
||||
tmp = p2.psi1(Z, variational_posterior)
|
||||
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
|
||||
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
|
||||
tmp = p1.psi1(Z, variational_posterior)
|
||||
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
|
||||
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
|
||||
tmp1 = p1.psi1(Z, variational_posterior)
|
||||
tmp2 = p2.psi1(Z, variational_posterior)
|
||||
psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
|
||||
else:
|
||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||
return psi2
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
for p1 in self.parts:
|
||||
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2 in self.parts:
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
target = np.zeros(Z.shape)
|
||||
for p1 in self.parts:
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2 in self.parts:
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
return target
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
target_mu = np.zeros(variational_posterior.shape)
|
||||
target_S = np.zeros(variational_posterior.shape)
|
||||
for p1 in self._parameters_:
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2 in self._parameters_:
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
target_mu += a
|
||||
target_S += b
|
||||
return target_mu, target_S
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Get the current state of the class,
|
||||
here just all the indices, rest can get recomputed
|
||||
"""
|
||||
return super(Add, self)._getstate()
|
||||
|
||||
def _setstate(self, state):
|
||||
super(Add, self)._setstate(state)
|
||||
|
||||
def add(self, other, name='sum'):
|
||||
if isinstance(other, Add):
|
||||
other_params = other._parameters_.copy()
|
||||
for p in other_params:
|
||||
other.remove_parameter(p)
|
||||
self.add_parameters(*other_params)
|
||||
else: self.add_parameter(other)
|
||||
return self
|
||||
BIN
GPy/likelihoods/.DS_Store
vendored
BIN
GPy/likelihoods/.DS_Store
vendored
Binary file not shown.
|
|
@ -6,3 +6,17 @@ from poisson import Poisson
|
|||
from student_t import StudentT
|
||||
from likelihood import Likelihood
|
||||
from mixed_noise import MixedNoise
|
||||
# TODO need to fix this in a config file.
|
||||
try:
|
||||
import sympy as sym
|
||||
sympy_available=True
|
||||
except ImportError:
|
||||
sympy_available=False
|
||||
if sympy_available:
|
||||
# These are likelihoods that rely on symbolic.
|
||||
from symbolic import Symbolic
|
||||
#from sstudent_t import SstudentT
|
||||
from negative_binomial import Negative_binomial
|
||||
#from skew_normal import Skew_normal
|
||||
#from skew_exponential import Skew_exponential
|
||||
#from null_category import Null_category
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class Bernoulli(Likelihood):
|
|||
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||
|
||||
.. Note::
|
||||
Y is expected to take values in {-1, 1} TODO: {0, 1}??
|
||||
Y takes values in either {-1, 1} or {0, 1}.
|
||||
link function should have the domain [0, 1], e.g. probit (default) or Heaviside
|
||||
|
||||
.. See also::
|
||||
|
|
@ -54,10 +54,10 @@ class Bernoulli(Likelihood):
|
|||
"""
|
||||
if Y_i == 1:
|
||||
sign = 1.
|
||||
elif Y_i == 0:
|
||||
elif Y_i == 0 or Y_i == -1:
|
||||
sign = -1
|
||||
else:
|
||||
raise ValueError("bad value for Bernouilli observation (0, 1)")
|
||||
raise ValueError("bad value for Bernoulli observation (0, 1)")
|
||||
if isinstance(self.gp_link, link_functions.Probit):
|
||||
z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
|
||||
Z_hat = std_norm_cdf(z)
|
||||
|
|
@ -95,15 +95,15 @@ class Bernoulli(Likelihood):
|
|||
else:
|
||||
return np.nan
|
||||
|
||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given link(f)
|
||||
Likelihood function given inverse link of f.
|
||||
|
||||
.. math::
|
||||
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
|
|
@ -113,102 +113,106 @@ class Bernoulli(Likelihood):
|
|||
.. Note:
|
||||
Each y_i must be in {0, 1}
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
#objective = (link_f**y) * ((1.-link_f)**(1.-y))
|
||||
objective = np.where(y, link_f, 1.-link_f)
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
#objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y))
|
||||
objective = np.where(y, inv_link_f, 1.-inv_link_f)
|
||||
return np.exp(np.sum(np.log(objective)))
|
||||
|
||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood function given link(f)
|
||||
Log Likelihood function given inverse link of f.
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
:returns: log likelihood evaluated at points link(f)
|
||||
:returns: log likelihood evaluated at points inverse link of f.
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
#objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f)
|
||||
state = np.seterr(divide='ignore')
|
||||
objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
|
||||
# TODO check y \in {0, 1} or {-1, 1}
|
||||
objective = np.where(y==1, np.log(inv_link_f), np.log(1-inv_link_f))
|
||||
np.seterr(**state)
|
||||
return np.sum(objective)
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the pdf at y, given link(f) w.r.t link(f)
|
||||
Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.
|
||||
|
||||
.. math::
|
||||
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
:returns: gradient of log likelihood evaluated at points link(f)
|
||||
:returns: gradient of log likelihood evaluated at points inverse link of f.
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
#grad = (y/link_f) - (1.-y)/(1-link_f)
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
#grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f)
|
||||
state = np.seterr(divide='ignore')
|
||||
grad = np.where(y, 1./link_f, -1./(1-link_f))
|
||||
# TODO check y \in {0, 1} or {-1, 1}
|
||||
grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f))
|
||||
np.seterr(**state)
|
||||
return grad
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
|
||||
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
|
||||
Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
|
||||
i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j.
|
||||
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
|
||||
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
|
||||
:rtype: Nx1 array
|
||||
|
||||
.. Note::
|
||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
(the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
#d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
#d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2)
|
||||
state = np.seterr(divide='ignore')
|
||||
d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f))
|
||||
# TODO check y \in {0, 1} or {-1, 1}
|
||||
d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f))
|
||||
np.seterr(**state)
|
||||
return d2logpdf_dlink2
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
Third order derivative log-likelihood function at y given inverse link of f w.r.t inverse link of f
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables passed through inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
:returns: third derivative of log likelihood evaluated at points link(f)
|
||||
:returns: third derivative of log likelihood evaluated at points inverse_link(f)
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
#d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
#d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3))
|
||||
state = np.seterr(divide='ignore')
|
||||
d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3))
|
||||
# TODO check y \in {0, 1} or {-1, 1}
|
||||
d3logpdf_dlink3 = np.where(y, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3))
|
||||
np.seterr(**state)
|
||||
return d3logpdf_dlink3
|
||||
|
||||
|
|
|
|||
|
|
@ -16,20 +16,20 @@ class Likelihood(Parameterized):
|
|||
Likelihood base class, used to defing p(y|f).
|
||||
|
||||
All instances use _inverse_ link functions, which can be swapped out. It is
|
||||
expected that inherriting classes define a default inverse link function
|
||||
expected that inheriting classes define a default inverse link function
|
||||
|
||||
To use this class, inherrit and define missing functionality.
|
||||
To use this class, inherit and define missing functionality.
|
||||
|
||||
Inherriting classes *must* implement:
|
||||
Inheriting classes *must* implement:
|
||||
pdf_link : a bound method which turns the output of the link function into the pdf
|
||||
logpdf_link : the logarithm of the above
|
||||
|
||||
To enable use with EP, inherriting classes *must* define:
|
||||
To enable use with EP, inheriting classes *must* define:
|
||||
TODO: a suitable derivative function for any parameters of the class
|
||||
It is also desirable to define:
|
||||
moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature.
|
||||
|
||||
To enable use with Laplace approximation, inherriting classes *must* define:
|
||||
To enable use with Laplace approximation, inheriting classes *must* define:
|
||||
Some derivative functions *AS TODO*
|
||||
|
||||
For exact Gaussian inference, define *JH TODO*
|
||||
|
|
@ -159,7 +159,7 @@ class Likelihood(Parameterized):
|
|||
|
||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||
"""
|
||||
Numerical approximation to the predictive variance: V(Y_star)
|
||||
Approximation to the predictive variance: V(Y_star)
|
||||
|
||||
The following variance decomposition is used:
|
||||
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
||||
|
|
@ -208,28 +208,28 @@ class Likelihood(Parameterized):
|
|||
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
|
||||
return exp_var + var_exp
|
||||
|
||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None):
|
||||
def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def pdf(self, f, y, Y_metadata=None):
|
||||
|
|
@ -247,8 +247,8 @@ class Likelihood(Parameterized):
|
|||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
return self.pdf_link(link_f, y, Y_metadata=Y_metadata)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
|
||||
def logpdf(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -265,8 +265,8 @@ class Likelihood(Parameterized):
|
|||
:returns: log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
return self.logpdf_link(link_f, y, Y_metadata=Y_metadata)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
|
||||
def dlogpdf_df(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -284,8 +284,8 @@ class Likelihood(Parameterized):
|
|||
:returns: derivative of log likelihood evaluated for this point
|
||||
:rtype: 1xN array
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
return chain_1(dlogpdf_dlink, dlink_df)
|
||||
|
||||
|
|
@ -305,10 +305,10 @@ class Likelihood(Parameterized):
|
|||
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
|
||||
:rtype: 1xN array
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
||||
|
||||
|
|
@ -328,12 +328,12 @@ class Likelihood(Parameterized):
|
|||
:returns: third derivative of log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d3link_df3 = self.gp_link.d3transf_df3(f)
|
||||
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
||||
|
||||
|
|
@ -342,10 +342,10 @@ class Likelihood(Parameterized):
|
|||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
link_f = self.gp_link.transf(f)
|
||||
return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
#Is no parameters so return an empty array for its derivatives
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([1, 0])
|
||||
|
||||
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
|
||||
|
|
@ -353,12 +353,12 @@ class Likelihood(Parameterized):
|
|||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
link_f = self.gp_link.transf(f)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
||||
else:
|
||||
#Is no parameters so return an empty array for its derivatives
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([f.shape[0], 0])
|
||||
|
||||
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
|
||||
|
|
@ -366,14 +366,14 @@ class Likelihood(Parameterized):
|
|||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
link_f = self.gp_link.transf(f)
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
|
||||
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
||||
else:
|
||||
#Is no parameters so return an empty array for its derivatives
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([f.shape[0], 0])
|
||||
|
||||
def _laplace_gradients(self, f, y, Y_metadata=None):
|
||||
|
|
@ -383,12 +383,9 @@ class Likelihood(Parameterized):
|
|||
|
||||
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
|
||||
# ensure we have gradients for every parameter we want to optimize
|
||||
try:
|
||||
assert len(dlogpdf_dtheta) == self.size #1 x num_param array
|
||||
assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix
|
||||
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
|
||||
except Exception as e:
|
||||
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
||||
|
||||
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
||||
|
||||
|
|
@ -411,7 +408,10 @@ class Likelihood(Parameterized):
|
|||
#compute the quantiles by sampling!!!
|
||||
N_samp = 1000
|
||||
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
|
||||
#ss_f = s.flatten()
|
||||
#ss_y = self.samples(ss_f, Y_metadata)
|
||||
ss_y = self.samples(s, Y_metadata)
|
||||
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
|
||||
|
||||
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
|
||||
|
||||
|
|
|
|||
|
|
@ -71,6 +71,7 @@ class Probit(GPTransformation):
|
|||
|
||||
g(f) = \\Phi^{-1} (mu)
|
||||
|
||||
|
||||
"""
|
||||
def transf(self,f):
|
||||
return std_norm_cdf(f)
|
||||
|
|
|
|||
46
GPy/likelihoods/negative_binomial.py
Normal file
46
GPy/likelihoods/negative_binomial.py
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
try:
|
||||
import sympy as sym
|
||||
sympy_available=True
|
||||
from sympy.utilities.lambdify import lambdify
|
||||
from GPy.util.symbolic import gammaln, ln_cum_gaussian, cum_gaussian
|
||||
except ImportError:
|
||||
sympy_available=False
|
||||
|
||||
import numpy as np
|
||||
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
||||
import link_functions
|
||||
from symbolic import Symbolic
|
||||
from scipy import stats
|
||||
|
||||
if sympy_available:
|
||||
class Negative_binomial(Symbolic):
|
||||
"""
|
||||
Negative binomial
|
||||
|
||||
.. math::
|
||||
p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
|
||||
|
||||
.. Note::
|
||||
Y takes non zero integer values..
|
||||
link function should have a positive domain, e.g. log (default).
|
||||
|
||||
.. See also::
|
||||
symbolic.py, for the parent class
|
||||
"""
|
||||
def __init__(self, gp_link=None):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Log()
|
||||
|
||||
dispersion = sym.Symbol('dispersion', positive=True, real=True)
|
||||
y = sym.Symbol('y', nonnegative=True, integer=True)
|
||||
f = sym.Symbol('f', positive=True, real=True)
|
||||
log_pdf=dispersion*sym.log(dispersion) - (dispersion+y)*sym.log(dispersion+f) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*sym.log(f)
|
||||
super(Negative_binomial, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Negative_binomial')
|
||||
|
||||
# TODO: Check this.
|
||||
self.log_concave = False
|
||||
|
||||
|
|
@ -26,8 +26,8 @@ class StudentT(Likelihood):
|
|||
gp_link = link_functions.Identity()
|
||||
|
||||
super(StudentT, self).__init__(gp_link, name='Student_T')
|
||||
|
||||
self.sigma2 = Param('t_noise', float(sigma2), Logexp())
|
||||
# sigma2 is not a noise parameter, it is a squared scale.
|
||||
self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
|
||||
self.v = Param('deg_free', float(deg_free))
|
||||
self.add_parameter(self.sigma2)
|
||||
self.add_parameter(self.v)
|
||||
|
|
@ -46,23 +46,23 @@ class StudentT(Likelihood):
|
|||
self.sigma2.gradient = grads[0]
|
||||
self.v.gradient = grads[1]
|
||||
|
||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given link(f)
|
||||
|
||||
.. math::
|
||||
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
#Careful gamma(big_number) is infinity!
|
||||
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
|
||||
/ (np.sqrt(self.v * np.pi * self.sigma2)))
|
||||
|
|
@ -70,15 +70,15 @@ class StudentT(Likelihood):
|
|||
)
|
||||
return np.prod(objective)
|
||||
|
||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
|
||||
|
||||
:param link_f: latent variables (link(f))
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables (link(f))
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
|
|
@ -86,11 +86,11 @@ class StudentT(Likelihood):
|
|||
:rtype: float
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
#FIXME:
|
||||
#Why does np.log(1 + (1/self.v)*((y-link_f)**2)/self.sigma2) suppress the divide by zero?!
|
||||
#But np.log(1 + (1/float(self.v))*((y-link_f)**2)/self.sigma2) throws it correctly
|
||||
#Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
|
||||
#But np.log(1 + (1/float(self.v))*((y-inv_link_f)**2)/self.sigma2) throws it correctly
|
||||
#print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
|
||||
objective = (+ gammaln((self.v + 1) * 0.5)
|
||||
- gammaln(self.v * 0.5)
|
||||
|
|
@ -99,15 +99,15 @@ class StudentT(Likelihood):
|
|||
)
|
||||
return np.sum(objective)
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
|
||||
|
||||
:param link_f: latent variables (f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables (f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
|
|
@ -115,12 +115,12 @@ class StudentT(Likelihood):
|
|||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
||||
return grad
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given link(f), w.r.t link(f)
|
||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||
|
|
@ -129,8 +129,8 @@ class StudentT(Likelihood):
|
|||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables inv_link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
|
|
@ -141,90 +141,90 @@ class StudentT(Likelihood):
|
|||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
||||
return hess
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
||||
((e**2 + self.sigma2*self.v)**3)
|
||||
)
|
||||
return d3lik_dlink3
|
||||
|
||||
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_link_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
||||
return np.sum(dlogpdf_dvar)
|
||||
|
||||
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
|
||||
def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
|
||||
|
||||
:param link_f: latent variables link_f
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables inv_link_f
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
||||
return dlogpdf_dlink_dvar
|
||||
|
||||
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
|
||||
def d2logpdf_dlink2_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
||||
/ ((self.sigma2*self.v + (e**2))**3)
|
||||
)
|
||||
|
|
@ -246,11 +246,12 @@ class StudentT(Likelihood):
|
|||
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
|
||||
|
||||
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
||||
return self.gp_link.transf(mu) # only true in link is monotoci, which it is.
|
||||
# The comment here confuses mean and median.
|
||||
return self.gp_link.transf(mu) # only true if link is monotonic, which it is.
|
||||
|
||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||
if self.deg_free <2.:
|
||||
return np.empty(mu.shape)*np.nan #not defined for small degress fo freedom
|
||||
if self.deg_free<=2.:
|
||||
return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2.
|
||||
else:
|
||||
return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)
|
||||
|
||||
|
|
|
|||
298
GPy/likelihoods/symbolic.py
Normal file
298
GPy/likelihoods/symbolic.py
Normal file
|
|
@ -0,0 +1,298 @@
|
|||
# Copyright (c) 2014 GPy Authors
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
try:
|
||||
import sympy as sym
|
||||
sympy_available=True
|
||||
from sympy.utilities.lambdify import lambdify
|
||||
except ImportError:
|
||||
sympy_available=False
|
||||
|
||||
import numpy as np
|
||||
import link_functions
|
||||
from scipy import stats, integrate
|
||||
from scipy.special import gammaln, gamma, erf, polygamma
|
||||
from GPy.util.functions import cum_gaussian, ln_cum_gaussian
|
||||
from likelihood import Likelihood
|
||||
from ..core.parameterization import Param
|
||||
|
||||
|
||||
if sympy_available:
|
||||
class Symbolic(Likelihood):
|
||||
"""
|
||||
Symbolic likelihood.
|
||||
|
||||
Likelihood where the form of the likelihood is provided by a sympy expression.
|
||||
|
||||
"""
|
||||
def __init__(self, log_pdf=None, logZ=None, missing_log_pdf=None, gp_link=None, name='symbolic', log_concave=False, param=None, func_modules=[]):
|
||||
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Identity()
|
||||
|
||||
if log_pdf is None:
|
||||
raise ValueError, "You must provide an argument for the log pdf."
|
||||
|
||||
self.func_modules = func_modules + [{'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma, 'cum_gaussian':cum_gaussian, 'ln_cum_gaussian':ln_cum_gaussian}, 'numpy']
|
||||
|
||||
super(Symbolic, self).__init__(gp_link, name=name)
|
||||
self.missing_data = False
|
||||
self._sym_log_pdf = log_pdf
|
||||
if missing_log_pdf:
|
||||
self.missing_data = True
|
||||
self._sym_missing_log_pdf = missing_log_pdf
|
||||
|
||||
# pull the variable names out of the symbolic pdf
|
||||
sym_vars = [e for e in self._sym_log_pdf.atoms() if e.is_Symbol]
|
||||
self._sym_f = [e for e in sym_vars if e.name=='f']
|
||||
if not self._sym_f:
|
||||
raise ValueError('No variable f in log pdf.')
|
||||
self._sym_y = [e for e in sym_vars if e.name=='y']
|
||||
if not self._sym_y:
|
||||
raise ValueError('No variable y in log pdf.')
|
||||
self._sym_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name)
|
||||
|
||||
theta_names = [theta.name for theta in self._sym_theta]
|
||||
if self.missing_data:
|
||||
# pull the variable names out of missing data
|
||||
sym_vars = [e for e in self._sym_missing_log_pdf.atoms() if e.is_Symbol]
|
||||
sym_f = [e for e in sym_vars if e.name=='f']
|
||||
if not sym_f:
|
||||
raise ValueError('No variable f in missing log pdf.')
|
||||
sym_y = [e for e in sym_vars if e.name=='y']
|
||||
if sym_y:
|
||||
raise ValueError('Data is present in missing data portion of likelihood.')
|
||||
# additional missing data parameters
|
||||
missing_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='missing' or e.name in theta_names)],key=lambda e:e.name)
|
||||
self._sym_theta += missing_theta
|
||||
self._sym_theta = sorted(self._sym_theta, key=lambda e:e.name)
|
||||
|
||||
# These are all the arguments need to compute likelihoods.
|
||||
self.arg_list = self._sym_y + self._sym_f + self._sym_theta
|
||||
|
||||
# these are arguments for computing derivatives.
|
||||
derivative_arguments = self._sym_f + self._sym_theta
|
||||
|
||||
# Do symbolic work to compute derivatives.
|
||||
self._log_pdf_derivatives = {theta.name : sym.diff(self._sym_log_pdf,theta).simplify() for theta in derivative_arguments}
|
||||
self._log_pdf_second_derivatives = {theta.name : sym.diff(self._log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments}
|
||||
self._log_pdf_third_derivatives = {theta.name : sym.diff(self._log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments}
|
||||
|
||||
if self.missing_data:
|
||||
# Do symbolic work to compute derivatives.
|
||||
self._missing_log_pdf_derivatives = {theta.name : sym.diff(self._sym_missing_log_pdf,theta).simplify() for theta in derivative_arguments}
|
||||
self._missing_log_pdf_second_derivatives = {theta.name : sym.diff(self._missing_log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments}
|
||||
self._missing_log_pdf_third_derivatives = {theta.name : sym.diff(self._missing_log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments}
|
||||
|
||||
|
||||
# Add parameters to the model.
|
||||
for theta in self._sym_theta:
|
||||
val = 1.0
|
||||
# TODO: need to decide how to handle user passing values for the se parameter vectors.
|
||||
if param is not None:
|
||||
if param.has_key(theta.name):
|
||||
val = param[theta.name]
|
||||
setattr(self, theta.name, Param(theta.name, val, None))
|
||||
self.add_parameters(getattr(self, theta.name))
|
||||
|
||||
|
||||
# Is there some way to check whether the pdf is log
|
||||
# concave? For the moment, need user to specify.
|
||||
self.log_concave = log_concave
|
||||
|
||||
# initialise code arguments
|
||||
self._arguments = {}
|
||||
|
||||
# generate the code for the pdf and derivatives
|
||||
self._gen_code()
|
||||
|
||||
def _gen_code(self):
|
||||
"""Generate the code from the symbolic parts that will be used for likleihod computation."""
|
||||
# TODO: Check here whether theano is available and set up
|
||||
# functions accordingly.
|
||||
self._log_pdf_function = lambdify(self.arg_list, self._sym_log_pdf, self.func_modules)
|
||||
|
||||
# compute code for derivatives (for implicit likelihood terms
|
||||
# we need up to 3rd derivatives)
|
||||
setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], self.func_modules) for key in self._log_pdf_derivatives.keys()})
|
||||
setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], self.func_modules) for key in self._log_pdf_second_derivatives.keys()})
|
||||
setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], self.func_modules) for key in self._log_pdf_third_derivatives.keys()})
|
||||
|
||||
if self.missing_data:
|
||||
setattr(self, '_missing_first_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_derivatives[key], self.func_modules) for key in self._missing_log_pdf_derivatives.keys()})
|
||||
setattr(self, '_missing_second_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_second_derivatives[key], self.func_modules) for key in self._missing_log_pdf_second_derivatives.keys()})
|
||||
setattr(self, '_missing_third_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_third_derivatives[key], self.func_modules) for key in self._missing_log_pdf_third_derivatives.keys()})
|
||||
|
||||
# TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta
|
||||
|
||||
def parameters_changed(self):
|
||||
pass
|
||||
|
||||
def update_gradients(self, grads):
|
||||
"""
|
||||
Pull out the gradients, be careful as the order must match the order
|
||||
in which the parameters are added
|
||||
"""
|
||||
# The way the Laplace approximation is run requires the
|
||||
# covariance function to compute the true gradient (because it
|
||||
# is dependent on the mode). This means we actually compute
|
||||
# the gradient outside this object. This function would
|
||||
# normally ask the object to update its gradients internally,
|
||||
# but here it provides them externally, because they are
|
||||
# computed in the inference code. TODO: Thought: How does this
|
||||
# effect EP? Shouldn't this be done by a separate
|
||||
# Laplace-approximation specific call?
|
||||
for grad, theta in zip(grads, self._sym_theta):
|
||||
parameter = getattr(self, theta.name)
|
||||
setattr(parameter, 'gradient', grad)
|
||||
|
||||
def _arguments_update(self, f, y):
|
||||
"""Set up argument lists for the derivatives."""
|
||||
# If we do make use of Theano, then at this point we would
|
||||
# need to do a lot of precomputation to ensure that the
|
||||
# likelihoods and gradients are computed together, then check
|
||||
# for parameter changes before updating.
|
||||
for i, fvar in enumerate(self._sym_f):
|
||||
self._arguments[fvar.name] = f
|
||||
for i, yvar in enumerate(self._sym_y):
|
||||
self._arguments[yvar.name] = y
|
||||
for theta in self._sym_theta:
|
||||
self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
|
||||
|
||||
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given inverse link of f.
|
||||
|
||||
:param inv_link_f: inverse link of latent variables.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
return np.exp(self.logpdf_link(inv_link_f, y, Y_metadata=None))
|
||||
|
||||
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood Function given inverse link of latent variables.
|
||||
|
||||
:param inv_inv_link_f: latent variables (inverse link of f)
|
||||
:type inv_inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
if self.missing_data:
|
||||
ll = np.where(np.isnan(y), self._missing_log_pdf_function(**self._missing_arguments), self._log_pdf_function(**self._arguments))
|
||||
else:
|
||||
ll = np.where(np.isnan(y), 0., self._log_pdf_function(**self._arguments))
|
||||
return np.sum(ll)
|
||||
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of log likelihood with respect to the inverse link function.
|
||||
|
||||
:param inv_inv_link_f: latent variables (inverse link of f)
|
||||
:type inv_inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata
|
||||
:returns: gradient of likelihood with respect to each point.
|
||||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
if self.missing_data:
|
||||
return np.where(np.isnan(y), self._missing_first_derivative_code['f'](**self._missing_argments), self._first_derivative_code['f'](**self._argments))
|
||||
else:
|
||||
return np.where(np.isnan(y), 0., self._first_derivative_code['f'](**self._arguments))
|
||||
|
||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian of log likelihood given inverse link of latent variables with respect to that inverse link.
|
||||
i.e. second derivative logpdf at y given inv_link(f_i) and inv_link(f_j) w.r.t inv_link(f_i) and inv_link(f_j).
|
||||
|
||||
|
||||
:param inv_link_f: inverse link of the latent variables.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: Diagonal of Hessian matrix (second derivative of likelihood evaluated at points f)
|
||||
:rtype: Nx1 array
|
||||
|
||||
.. Note::
|
||||
Returns diagonal of Hessian, since every where else it is
|
||||
0, as the likelihood factorizes over cases (the
|
||||
distribution for y_i depends only on link(f_i) not on
|
||||
link(f_(j!=i))
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
if self.missing_data:
|
||||
return np.where(np.isnan(y), self._missing_second_derivative_code['f'](**self._missing_argments), self._second_derivative_code['f'](**self._argments))
|
||||
else:
|
||||
return np.where(np.isnan(y), 0., self._second_derivative_code['f'](**self._arguments))
|
||||
|
||||
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
if self.missing_data:
|
||||
return np.where(np.isnan(y), self._missing_third_derivative_code['f'](**self._missing_argments), self._third_derivative_code['f'](**self._argments))
|
||||
else:
|
||||
return np.where(np.isnan(y), 0., self._third_derivative_code['f'](**self._arguments))
|
||||
|
||||
def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
|
||||
for i, theta in enumerate(self._sym_theta):
|
||||
if self.missing_data:
|
||||
g[:, i:i+1] = np.where(np.isnan(y), self._missing_first_derivative_code[theta.name](**self._arguments), self._first_derivative_code[theta.name](**self._arguments))
|
||||
else:
|
||||
g[:, i:i+1] = np.where(np.isnan(y), 0., self._first_derivative_code[theta.name](**self._arguments))
|
||||
return g.sum(0)
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
|
||||
for i, theta in enumerate(self._sym_theta):
|
||||
if self.missing_data:
|
||||
g[:, i:i+1] = np.where(np.isnan(y), self._missing_second_derivative_code[theta.name](**self._arguments), self._second_derivative_code[theta.name](**self._arguments))
|
||||
else:
|
||||
g[:, i:i+1] = np.where(np.isnan(y), 0., self._second_derivative_code[theta.name](**self._arguments))
|
||||
return g
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
self._arguments_update(inv_link_f, y)
|
||||
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
|
||||
for i, theta in enumerate(self._sym_theta):
|
||||
if self.missing_data:
|
||||
g[:, i:i+1] = np.where(np.isnan(y), self._missing_third_derivative_code[theta.name](**self._arguments), self._third_derivative_code[theta.name](**self._arguments))
|
||||
else:
|
||||
g[:, i:i+1] = np.where(np.isnan(y), 0., self._third_derivative_code[theta.name](**self._arguments))
|
||||
return g
|
||||
|
||||
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def conditional_mean(self, gp):
|
||||
raise NotImplementedError
|
||||
|
||||
def conditional_variance(self, gp):
|
||||
raise NotImplementedError
|
||||
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
|
@ -2,13 +2,14 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from gplvm import GPLVM
|
||||
from .. import kern
|
||||
from ..core import SparseGP
|
||||
from ..likelihoods import Gaussian
|
||||
from ..inference.optimization import SCG
|
||||
from ..util import linalg
|
||||
from ..core.parameterization.variational import NormalPosterior, NormalPrior, VariationalPosterior
|
||||
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients
|
||||
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
|
||||
|
||||
class BayesianGPLVM(SparseGP):
|
||||
"""
|
||||
|
|
@ -26,7 +27,10 @@ class BayesianGPLVM(SparseGP):
|
|||
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
|
||||
if X == None:
|
||||
from ..util.initialization import initialize_latent
|
||||
X = initialize_latent(init, input_dim, Y)
|
||||
X, fracs = initialize_latent(init, input_dim, Y)
|
||||
else:
|
||||
fracs = np.ones(input_dim)
|
||||
|
||||
self.init = init
|
||||
|
||||
if X_variance is None:
|
||||
|
|
@ -38,7 +42,7 @@ class BayesianGPLVM(SparseGP):
|
|||
assert Z.shape[1] == X.shape[1]
|
||||
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(input_dim) # + kern.white(input_dim)
|
||||
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
|
||||
|
||||
if likelihood is None:
|
||||
likelihood = Gaussian()
|
||||
|
|
@ -47,26 +51,26 @@ class BayesianGPLVM(SparseGP):
|
|||
self.variational_prior = NormalPrior()
|
||||
X = NormalPosterior(X, X_variance)
|
||||
|
||||
if inference_method is None:
|
||||
if np.any(np.isnan(Y)):
|
||||
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
|
||||
inference_method = VarDTCMissingData()
|
||||
else:
|
||||
from ..inference.latent_function_inference.var_dtc import VarDTC
|
||||
inference_method = VarDTC()
|
||||
|
||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
||||
self.add_parameter(self.X, index=0)
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Get the current state of the class,
|
||||
here just all the indices, rest can get recomputed
|
||||
"""
|
||||
return SparseGP._getstate(self) + [self.init]
|
||||
|
||||
def _setstate(self, state):
|
||||
self._const_jitter = None
|
||||
self.init = state.pop()
|
||||
SparseGP._setstate(self, state)
|
||||
|
||||
def set_X_gradients(self, X, X_grad):
|
||||
"""Set the gradients of the posterior distribution of X in its specific form."""
|
||||
X.mean.gradient, X.variance.gradient = X_grad
|
||||
|
||||
def parameters_changed(self):
|
||||
if isinstance(self.inference_method, VarDTC_GPU):
|
||||
update_gradients(self)
|
||||
return
|
||||
|
||||
super(BayesianGPLVM, self).parameters_changed()
|
||||
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||
|
||||
|
|
@ -157,56 +161,6 @@ class BayesianGPLVM(SparseGP):
|
|||
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
|
||||
|
||||
|
||||
def update_gradients(model):
|
||||
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y)
|
||||
|
||||
het_noise = model.likelihood.variance.size > 1
|
||||
|
||||
if het_noise:
|
||||
dL_dthetaL = np.empty((model.Y.shape[0],))
|
||||
else:
|
||||
dL_dthetaL = 0
|
||||
|
||||
#gradients w.r.t. kernel
|
||||
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
|
||||
kern_grad = model.kern.gradient.copy()
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z)
|
||||
|
||||
isEnd = False
|
||||
while not isEnd:
|
||||
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y)
|
||||
if isinstance(model.X, VariationalPosterior):
|
||||
|
||||
#gradients w.r.t. kernel
|
||||
model.kern.update_gradients_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
kern_grad += model.kern.gradient
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations(
|
||||
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=model.X[n_range[0]:n_range[1]])
|
||||
|
||||
#gradients w.r.t. posterior parameters of X
|
||||
X_grad = model.kern.gradients_qX_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
model.set_X_gradients(model.X[n_range[0]:n_range[1]], X_grad)
|
||||
|
||||
if het_noise:
|
||||
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
|
||||
else:
|
||||
dL_dthetaL += grad_dict['dL_dthetaL']
|
||||
|
||||
# Set the gradients w.r.t. kernel
|
||||
model.kern.gradient = kern_grad
|
||||
|
||||
# Update Log-likelihood
|
||||
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
|
||||
# update for the KL divergence
|
||||
model.variational_prior.update_gradients_KL(model.X)
|
||||
|
||||
# dL_dthetaL
|
||||
model.likelihood.update_gradients(dL_dthetaL)
|
||||
|
||||
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
||||
"""
|
||||
objective function for fitting the latent variables for test points
|
||||
|
|
|
|||
|
|
@ -2,10 +2,10 @@
|
|||
# Copyright (c) 2013, the GPy Authors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..core import GP
|
||||
from .. import likelihoods
|
||||
from .. import kern
|
||||
from ..inference.latent_function_inference.expectation_propagation import EP
|
||||
|
||||
class GPClassification(GP):
|
||||
"""
|
||||
|
|
@ -27,4 +27,4 @@ class GPClassification(GP):
|
|||
|
||||
likelihood = likelihoods.Bernoulli()
|
||||
|
||||
GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, name='GPClassification',Y_metadata=Y_metadata)
|
||||
GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=EP(), name='gp_classification')
|
||||
|
|
|
|||
|
|
@ -29,8 +29,3 @@ class GPRegression(GP):
|
|||
|
||||
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata)
|
||||
|
||||
def _getstate(self):
|
||||
return GP._getstate(self)
|
||||
|
||||
def _setstate(self, state):
|
||||
return GP._setstate(self, state)
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
import numpy as np
|
||||
import pylab as pb
|
||||
from .. import kern
|
||||
from ..util.linalg import PCA
|
||||
from ..core import GP, Param
|
||||
from ..likelihoods import Gaussian
|
||||
from .. import util
|
||||
|
|
@ -29,9 +28,11 @@ class GPLVM(GP):
|
|||
"""
|
||||
if X is None:
|
||||
from ..util.initialization import initialize_latent
|
||||
X = initialize_latent(init, input_dim, Y)
|
||||
X, fracs = initialize_latent(init, input_dim, Y)
|
||||
else:
|
||||
fracs = np.ones(input_dim)
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(input_dim, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2))
|
||||
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2))
|
||||
|
||||
likelihood = Gaussian()
|
||||
|
||||
|
|
@ -43,12 +44,6 @@ class GPLVM(GP):
|
|||
super(GPLVM, self).parameters_changed()
|
||||
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
|
||||
|
||||
def _getstate(self):
|
||||
return GP._getstate(self)
|
||||
|
||||
def _setstate(self, state):
|
||||
GP._setstate(self, state)
|
||||
|
||||
def jacobian(self,X):
|
||||
target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||
for i in range(self.output_dim):
|
||||
|
|
|
|||
|
|
@ -6,12 +6,12 @@ import itertools
|
|||
import pylab
|
||||
|
||||
from ..core import Model
|
||||
from ..util.linalg import PCA
|
||||
from ..kern import Kern
|
||||
from ..core.parameterization.variational import NormalPosterior, NormalPrior
|
||||
from ..core.parameterization import Param, Parameterized
|
||||
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC
|
||||
from ..likelihoods import Gaussian
|
||||
from GPy.util.initialization import initialize_latent
|
||||
|
||||
class MRD(Model):
|
||||
"""
|
||||
|
|
@ -51,27 +51,31 @@ class MRD(Model):
|
|||
inference_method=None, likelihood=None, name='mrd', Ynames=None):
|
||||
super(MRD, self).__init__(name)
|
||||
|
||||
# sort out the kernels
|
||||
if kernel is None:
|
||||
from ..kern import RBF
|
||||
self.kern = [RBF(input_dim, ARD=1, name='rbf'.format(i)) for i in range(len(Ylist))]
|
||||
elif isinstance(kernel, Kern):
|
||||
self.kern = [kernel.copy(name='{}'.format(kernel.name, i)) for i in range(len(Ylist))]
|
||||
else:
|
||||
assert len(kernel) == len(Ylist), "need one kernel per output"
|
||||
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
|
||||
self.kern = kernel
|
||||
self.input_dim = input_dim
|
||||
self.num_inducing = num_inducing
|
||||
|
||||
self.Ylist = Ylist
|
||||
self._in_init_ = True
|
||||
X = self._init_X(initx, Ylist)
|
||||
X, fracs = self._init_X(initx, Ylist)
|
||||
self.Z = Param('inducing inputs', self._init_Z(initz, X))
|
||||
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
|
||||
|
||||
# sort out the kernels
|
||||
if kernel is None:
|
||||
from ..kern import RBF
|
||||
self.kern = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))]
|
||||
elif isinstance(kernel, Kern):
|
||||
self.kern = []
|
||||
for i in range(len(Ylist)):
|
||||
k = kernel.copy()
|
||||
self.kern.append(k)
|
||||
else:
|
||||
assert len(kernel) == len(Ylist), "need one kernel per output"
|
||||
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
|
||||
self.kern = kernel
|
||||
|
||||
if X_variance is None:
|
||||
X_variance = np.random.uniform(0, .2, X.shape)
|
||||
X_variance = np.random.uniform(0.1, 0.2, X.shape)
|
||||
|
||||
self.variational_prior = NormalPrior()
|
||||
self.X = NormalPosterior(X, X_variance)
|
||||
|
|
@ -107,9 +111,8 @@ class MRD(Model):
|
|||
def parameters_changed(self):
|
||||
self._log_marginal_likelihood = 0
|
||||
self.posteriors = []
|
||||
self.Z.gradient = 0.
|
||||
self.X.mean.gradient = 0.
|
||||
self.X.variance.gradient = 0.
|
||||
self.Z.gradient[:] = 0.
|
||||
self.X.gradient[:] = 0.
|
||||
|
||||
for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method):
|
||||
posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
|
||||
|
|
@ -147,14 +150,22 @@ class MRD(Model):
|
|||
if Ylist is None:
|
||||
Ylist = self.Ylist
|
||||
if init in "PCA_concat":
|
||||
X = PCA(np.hstack(Ylist), self.input_dim)[0]
|
||||
X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist))
|
||||
fracs = [fracs]*self.input_dim
|
||||
elif init in "PCA_single":
|
||||
X = np.zeros((Ylist[0].shape[0], self.input_dim))
|
||||
fracs = []
|
||||
for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
|
||||
X[:, qs] = PCA(Y, len(qs))[0]
|
||||
x,frcs = initialize_latent('PCA', len(qs), Y)
|
||||
X[:, qs] = x
|
||||
fracs.append(frcs)
|
||||
else: # init == 'random':
|
||||
X = np.random.randn(Ylist[0].shape[0], self.input_dim)
|
||||
return X
|
||||
fracs = X.var(0)
|
||||
fracs = [fracs]*self.input_dim
|
||||
X -= X.mean()
|
||||
X /= X.std()
|
||||
return X, fracs
|
||||
|
||||
def _init_Z(self, init="permute", X=None):
|
||||
if X is None:
|
||||
|
|
|
|||
|
|
@ -44,11 +44,3 @@ class SparseGPClassification(SparseGP):
|
|||
|
||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=expectation_propagation_dtc.EPDTC(), name='SparseGPClassification',Y_metadata=Y_metadata)
|
||||
#def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None):
|
||||
|
||||
|
||||
def _getstate(self):
|
||||
return SparseGP._getstate(self)
|
||||
|
||||
|
||||
def _setstate(self, state):
|
||||
return SparseGP._setstate(self, state)
|
||||
|
|
|
|||
|
|
@ -51,14 +51,6 @@ class SparseGPRegression(SparseGP):
|
|||
|
||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC())
|
||||
|
||||
def _getstate(self):
|
||||
return SparseGP._getstate(self)
|
||||
|
||||
def _setstate(self, state):
|
||||
return SparseGP._setstate(self, state)
|
||||
|
||||
|
||||
|
||||
class SparseGPRegressionUncertainInput(SparseGP):
|
||||
"""
|
||||
Gaussian Process model for regression with Gaussian variance on the inputs (X_variance)
|
||||
|
|
|
|||
|
|
@ -28,14 +28,6 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
|
|||
SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
|
||||
self.ensure_default_constraints()
|
||||
|
||||
def _getstate(self):
|
||||
return SparseGPRegression._getstate(self)
|
||||
|
||||
|
||||
def _setstate(self, state):
|
||||
return SparseGPRegression._setstate(self, state)
|
||||
|
||||
|
||||
def _get_param_names(self):
|
||||
return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
|
||||
+ SparseGPRegression._get_param_names(self))
|
||||
|
|
|
|||
|
|
@ -11,6 +11,9 @@ from ..likelihoods import Gaussian
|
|||
from ..inference.optimization import SCG
|
||||
from ..util import linalg
|
||||
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior
|
||||
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients
|
||||
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
|
||||
|
||||
|
||||
class SSGPLVM(SparseGP):
|
||||
"""
|
||||
|
|
@ -25,11 +28,14 @@ class SSGPLVM(SparseGP):
|
|||
|
||||
"""
|
||||
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', **kwargs):
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', group_spike=False, **kwargs):
|
||||
|
||||
if X == None: # The mean of variational approximation (mu)
|
||||
if X == None:
|
||||
from ..util.initialization import initialize_latent
|
||||
X = initialize_latent(init, input_dim, Y)
|
||||
X, fracs = initialize_latent(init, input_dim, Y)
|
||||
else:
|
||||
fracs = np.ones(input_dim)
|
||||
|
||||
self.init = init
|
||||
|
||||
if X_variance is None: # The variance of the variational approximation (S)
|
||||
|
|
@ -38,6 +44,9 @@ class SSGPLVM(SparseGP):
|
|||
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
|
||||
gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim)
|
||||
|
||||
if group_spike:
|
||||
gamma[:] = gamma.mean(axis=0)
|
||||
|
||||
if Z is None:
|
||||
Z = np.random.permutation(X.copy())[:num_inducing]
|
||||
assert Z.shape[1] == X.shape[1]
|
||||
|
|
@ -46,18 +55,31 @@ class SSGPLVM(SparseGP):
|
|||
likelihood = Gaussian()
|
||||
|
||||
if kernel is None:
|
||||
kernel = kern.SSRBF(input_dim)
|
||||
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
|
||||
|
||||
pi = np.empty((input_dim))
|
||||
pi[:] = 0.5
|
||||
self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b
|
||||
X = SpikeAndSlabPosterior(X, X_variance, gamma)
|
||||
|
||||
if group_spike:
|
||||
kernel.group_spike_prob = True
|
||||
self.variational_prior.group_spike_prob = True
|
||||
|
||||
|
||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
||||
self.add_parameter(self.X, index=0)
|
||||
self.add_parameter(self.variational_prior)
|
||||
|
||||
def set_X_gradients(self, X, X_grad):
|
||||
"""Set the gradients of the posterior distribution of X in its specific form."""
|
||||
X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad
|
||||
|
||||
def parameters_changed(self):
|
||||
if isinstance(self.inference_method, VarDTC_GPU):
|
||||
update_gradients(self)
|
||||
return
|
||||
|
||||
super(SSGPLVM, self).parameters_changed()
|
||||
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||
|
||||
|
|
|
|||
|
|
@ -43,10 +43,3 @@ class SVIGPRegression(SVIGP):
|
|||
SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
|
||||
self.load_batch()
|
||||
|
||||
def _getstate(self):
|
||||
return GPBase._getstate(self)
|
||||
|
||||
|
||||
def _setstate(self, state):
|
||||
return GPBase._setstate(self, state)
|
||||
|
||||
|
|
|
|||
|
|
@ -30,14 +30,6 @@ class WarpedGP(GP):
|
|||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||
self._set_params(self._get_params())
|
||||
|
||||
def _getstate(self):
|
||||
return GP._getstate(self)
|
||||
|
||||
|
||||
def _setstate(self, state):
|
||||
return GP._setstate(self, state)
|
||||
|
||||
|
||||
def _scale_data(self, Y):
|
||||
self._Ymax = Y.max()
|
||||
self._Ymin = Y.min()
|
||||
|
|
|
|||
|
|
@ -15,3 +15,5 @@ import latent_space_visualizations
|
|||
import netpbmfile
|
||||
import inference_plots
|
||||
import maps
|
||||
import img_plots
|
||||
from ssgplvm import SSGPLVM_plot
|
||||
|
|
|
|||
56
GPy/plotting/matplot_dep/img_plots.py
Normal file
56
GPy/plotting/matplot_dep/img_plots.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
"""
|
||||
The module contains the tools for ploting 2D image visualizations
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from matplotlib.cm import jet
|
||||
|
||||
width_max = 15
|
||||
height_max = 12
|
||||
|
||||
def _calculateFigureSize(x_size, y_size, fig_ncols, fig_nrows, pad):
|
||||
width = (x_size*fig_ncols+pad*(fig_ncols-1))
|
||||
height = (y_size*fig_nrows+pad*(fig_nrows-1))
|
||||
if width > float(height)/height_max*width_max:
|
||||
return (width_max, float(width_max)/width*height)
|
||||
else:
|
||||
return (float(height_max)/height*width, height_max)
|
||||
|
||||
def plot_2D_images(figure, arr, symmetric=False, pad=None, zoom=None, mode=None, interpolation='nearest'):
|
||||
ax = figure.add_subplot(111)
|
||||
if len(arr.shape)==2:
|
||||
arr = arr.reshape(*((1,)+arr.shape))
|
||||
fig_num = arr.shape[0]
|
||||
y_size = arr.shape[1]
|
||||
x_size = arr.shape[2]
|
||||
fig_ncols = int(np.ceil(np.sqrt(fig_num)))
|
||||
fig_nrows = int(np.ceil((float)(fig_num)/fig_ncols))
|
||||
if pad==None:
|
||||
pad = max(int(min(y_size,x_size)/10),1)
|
||||
|
||||
figsize = _calculateFigureSize(x_size, y_size, fig_ncols, fig_nrows, pad)
|
||||
#figure.set_size_inches(figsize,forward=True)
|
||||
#figure.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95)
|
||||
|
||||
if symmetric:
|
||||
# symmetric around zero: fix zero as the middle color
|
||||
mval = max(abs(arr.max()),abs(arr.min()))
|
||||
arr = arr/(2.*mval)+0.5
|
||||
else:
|
||||
minval,maxval = arr.min(),arr.max()
|
||||
arr = (arr-minval)/(maxval-minval)
|
||||
|
||||
if mode=='L':
|
||||
arr_color = np.empty(arr.shape+(3,))
|
||||
arr_color[:] = arr.reshape(*(arr.shape+(1,)))
|
||||
elif mode==None or mode=='jet':
|
||||
arr_color = jet(arr)
|
||||
|
||||
buf = np.ones((y_size*fig_nrows+pad*(fig_nrows-1), x_size*fig_ncols+pad*(fig_ncols-1), 3),dtype=arr.dtype)
|
||||
|
||||
for y in xrange(fig_nrows):
|
||||
for x in xrange(fig_ncols):
|
||||
if y*fig_ncols+x<fig_num:
|
||||
buf[y*y_size+y*pad:(y+1)*y_size+y*pad, x*x_size+x*pad:(x+1)*x_size+x*pad] = arr_color[y*fig_ncols+x,:,:,:3]
|
||||
img_plot = ax.imshow(buf, interpolation=interpolation)
|
||||
ax.axis('off')
|
||||
|
|
@ -53,8 +53,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
which_data_rows = slice(None)
|
||||
if which_data_ycols == 'all':
|
||||
which_data_ycols = np.arange(model.output_dim)
|
||||
if len(which_data_ycols)==0:
|
||||
raise ValueError('No data selected for plotting')
|
||||
#if len(which_data_ycols)==0:
|
||||
#raise ValueError('No data selected for plotting')
|
||||
if ax is None:
|
||||
fig = pb.figure(num=fignum)
|
||||
ax = fig.add_subplot(111)
|
||||
|
|
|
|||
29
GPy/plotting/matplot_dep/ssgplvm.py
Normal file
29
GPy/plotting/matplot_dep/ssgplvm.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
"""
|
||||
The module plotting results for SSGPLVM
|
||||
"""
|
||||
|
||||
import pylab
|
||||
|
||||
from ...models import SSGPLVM
|
||||
from img_plots import plot_2D_images
|
||||
from ...util.misc import param_to_array
|
||||
|
||||
class SSGPLVM_plot(object):
|
||||
def __init__(self,model, imgsize):
|
||||
assert isinstance(model,SSGPLVM)
|
||||
self.model = model
|
||||
self.imgsize= imgsize
|
||||
assert model.Y.shape[1] == imgsize[0]*imgsize[1]
|
||||
|
||||
def plot_inducing(self):
|
||||
fig1 = pylab.figure()
|
||||
mean = self.model.posterior.mean
|
||||
arr = mean.reshape(*(mean.shape[0],self.imgsize[1],self.imgsize[0]))
|
||||
plot_2D_images(fig1, arr)
|
||||
fig1.gca().set_title('The mean of inducing points')
|
||||
|
||||
fig2 = pylab.figure()
|
||||
covar = self.model.posterior.covariance
|
||||
plot_2D_images(fig2, covar)
|
||||
fig2.gca().set_title('The variance of inducing points')
|
||||
|
||||
|
|
@ -45,7 +45,7 @@ def plot(parameterized, fignum=None, ax=None, colors=None):
|
|||
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
|
||||
return fig
|
||||
|
||||
def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
|
||||
def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_side=True):
|
||||
"""
|
||||
Plot latent space X in 1D:
|
||||
|
||||
|
|
@ -58,6 +58,9 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
|
|||
|
||||
"""
|
||||
if ax is None:
|
||||
if side_by_side:
|
||||
fig = pb.figure(num=fignum, figsize=(16, min(12, (2 * parameterized.mean.shape[1]))))
|
||||
else:
|
||||
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1]))))
|
||||
if colors is None:
|
||||
colors = pb.gca()._get_lines.color_cycle
|
||||
|
|
@ -68,8 +71,15 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
|
|||
means, variances, gamma = param_to_array(parameterized.mean, parameterized.variance, parameterized.binary_prob)
|
||||
x = np.arange(means.shape[0])
|
||||
for i in range(means.shape[1]):
|
||||
if side_by_side:
|
||||
sub1 = (means.shape[1],2,2*i+1)
|
||||
sub2 = (means.shape[1],2,2*i+2)
|
||||
else:
|
||||
sub1 = (means.shape[1]*2,1,2*i+1)
|
||||
sub2 = (means.shape[1]*2,1,2*i+2)
|
||||
|
||||
# mean and variance plot
|
||||
a = fig.add_subplot(means.shape[1]*2, 1, 2*i + 1)
|
||||
a = fig.add_subplot(*sub1)
|
||||
a.plot(means, c='k', alpha=.3)
|
||||
plots.extend(a.plot(x, means.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
|
||||
a.fill_between(x,
|
||||
|
|
@ -82,7 +92,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
|
|||
if i < means.shape[1] - 1:
|
||||
a.set_xticklabels('')
|
||||
# binary prob plot
|
||||
a = fig.add_subplot(means.shape[1]*2, 1, 2*i + 2)
|
||||
a = fig.add_subplot(*sub2)
|
||||
a.bar(x,gamma[:,i],bottom=0.,linewidth=0,align='center')
|
||||
a.set_xlim(x.min(), x.max())
|
||||
a.set_ylim([0.,1.])
|
||||
|
|
|
|||
|
|
@ -85,6 +85,7 @@ class vector_show(matplotlib_show):
|
|||
|
||||
|
||||
class lvm(matplotlib_show):
|
||||
|
||||
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]):
|
||||
"""Visualize a latent variable model
|
||||
|
||||
|
|
@ -130,10 +131,10 @@ class lvm(matplotlib_show):
|
|||
|
||||
def modify(self, vals):
|
||||
"""When latent values are modified update the latent representation and ulso update the output visualization."""
|
||||
self.vals = vals.copy()
|
||||
self.vals = vals[None,:].copy()
|
||||
y = self.model.predict(self.vals)[0]
|
||||
self.data_visualize.modify(y)
|
||||
self.latent_handle.set_data(self.vals[self.latent_index[0]], self.vals[self.latent_index[1]])
|
||||
self.latent_handle.set_data(self.vals[:,self.latent_index[0]], self.vals[:,self.latent_index[1]])
|
||||
self.axes.figure.canvas.draw()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,8 @@ class Test(unittest.TestCase):
|
|||
self.assertListEqual(self.param_index[one].tolist(), [3])
|
||||
self.assertListEqual(self.param_index.remove('not in there', [2,3,4]).tolist(), [])
|
||||
|
||||
self.assertListEqual(self.view.remove('not in there', [2,3,4]).tolist(), [])
|
||||
|
||||
def test_shift_left(self):
|
||||
self.view.shift_left(0, 2)
|
||||
self.assertListEqual(self.param_index[three].tolist(), [2,5])
|
||||
|
|
@ -82,6 +84,10 @@ class Test(unittest.TestCase):
|
|||
self.assertEqual(self.param_index.size, 6)
|
||||
self.assertEqual(self.view.size, 5)
|
||||
|
||||
def test_print(self):
|
||||
print self.param_index
|
||||
print self.view
|
||||
|
||||
if __name__ == "__main__":
|
||||
#import sys;sys.argv = ['', 'Test.test_index_view']
|
||||
unittest.main()
|
||||
|
|
@ -5,9 +5,11 @@ import unittest
|
|||
import numpy as np
|
||||
import GPy
|
||||
import sys
|
||||
from GPy.core.parameterization.param import Param
|
||||
|
||||
verbose = 0
|
||||
|
||||
np.random.seed(50)
|
||||
|
||||
|
||||
class Kern_check_model(GPy.core.Model):
|
||||
|
|
@ -29,7 +31,7 @@ class Kern_check_model(GPy.core.Model):
|
|||
dL_dK = np.ones((X.shape[0], X2.shape[0]))
|
||||
|
||||
self.kernel = kernel
|
||||
self.X = GPy.core.parameterization.Param('X',X)
|
||||
self.X = X
|
||||
self.X2 = X2
|
||||
self.dL_dK = dL_dK
|
||||
|
||||
|
|
@ -76,10 +78,11 @@ class Kern_check_dK_dX(Kern_check_model):
|
|||
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||
self.X = Param('X',X)
|
||||
self.add_parameter(self.X)
|
||||
|
||||
def parameters_changed(self):
|
||||
self.X.gradient = self.kernel.gradients_X(self.dL_dK, self.X, self.X2)
|
||||
self.X.gradient[:] = self.kernel.gradients_X(self.dL_dK, self.X, self.X2)
|
||||
|
||||
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
||||
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
|
||||
|
|
@ -90,7 +93,7 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
|||
return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
|
||||
|
||||
def parameters_changed(self):
|
||||
self.X.gradient = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X)
|
||||
self.X.gradient[:] = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X)
|
||||
|
||||
|
||||
|
||||
|
|
@ -126,6 +129,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
|||
if not result:
|
||||
print("Positive definite check failed for " + kern.name + " covariance function.")
|
||||
pass_checks = False
|
||||
assert(result)
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
|
|
@ -137,6 +141,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
|||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
assert(result)
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
|
|
@ -148,6 +153,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
|||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
assert(result)
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
|
|
@ -164,6 +170,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
|||
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
assert(result)
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
|
|
@ -182,6 +189,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
|||
if not result:
|
||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
testmodel.checkgrad(verbose=True)
|
||||
import ipdb;ipdb.set_trace()
|
||||
assert(result)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
|
@ -201,6 +210,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
|||
if not result:
|
||||
print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
testmodel.checkgrad(verbose=True)
|
||||
assert(result)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
|
|
@ -218,6 +228,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
|||
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
assert(result)
|
||||
return False
|
||||
|
||||
return pass_checks
|
||||
|
|
@ -226,7 +237,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
|||
|
||||
class KernelGradientTestsContinuous(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.N, self.D = 100, 5
|
||||
self.N, self.D = 10, 5
|
||||
self.X = np.random.randn(self.N,self.D)
|
||||
self.X2 = np.random.randn(self.N+10,self.D)
|
||||
|
||||
|
|
@ -243,6 +254,16 @@ class KernelGradientTestsContinuous(unittest.TestCase):
|
|||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
def test_Prod2(self):
|
||||
k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
|
||||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
def test_Prod3(self):
|
||||
k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
|
||||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
def test_Add(self):
|
||||
k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
||||
k += GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
||||
|
|
@ -252,7 +273,7 @@ class KernelGradientTestsContinuous(unittest.TestCase):
|
|||
def test_Add_dims(self):
|
||||
k = GPy.kern.Matern32(2, active_dims=[2,self.D]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
||||
k.randomize()
|
||||
self.assertRaises(AssertionError, k.K, self.X)
|
||||
self.assertRaises(IndexError, k.K, self.X)
|
||||
k = GPy.kern.Matern32(2, active_dims=[2,self.D-1]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
||||
k.randomize()
|
||||
# assert it runs:
|
||||
|
|
@ -276,46 +297,31 @@ class KernelGradientTestsContinuous(unittest.TestCase):
|
|||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
#TODO: turn off grad checkingwrt X for indexed kernels like coregionalize
|
||||
# class KernelGradientTestsContinuous1D(unittest.TestCase):
|
||||
# def setUp(self):
|
||||
# self.N, self.D = 100, 1
|
||||
# self.X = np.random.randn(self.N,self.D)
|
||||
# self.X2 = np.random.randn(self.N+10,self.D)
|
||||
#
|
||||
# continuous_kerns = ['RBF', 'Linear']
|
||||
# self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns]
|
||||
#
|
||||
# def test_PeriodicExponential(self):
|
||||
# k = GPy.kern.PeriodicExponential(self.D)
|
||||
# k.randomize()
|
||||
# self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
#
|
||||
# def test_PeriodicMatern32(self):
|
||||
# k = GPy.kern.PeriodicMatern32(self.D)
|
||||
# k.randomize()
|
||||
# self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
#
|
||||
# def test_PeriodicMatern52(self):
|
||||
# k = GPy.kern.PeriodicMatern52(self.D)
|
||||
# k.randomize()
|
||||
# self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
def test_LinearFull(self):
|
||||
k = GPy.kern.LinearFull(self.D, self.D-1)
|
||||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
class KernelTestsMiscellaneous(unittest.TestCase):
|
||||
def setUp(self):
|
||||
N, D = 100, 10
|
||||
self.X = np.linspace(-np.pi, +np.pi, N)[:,None] * np.ones(D)
|
||||
self.X = np.linspace(-np.pi, +np.pi, N)[:,None] * np.random.uniform(-10,10,D)
|
||||
self.rbf = GPy.kern.RBF(2, active_dims=slice(0,4,2))
|
||||
self.linear = GPy.kern.Linear(2, active_dims=(3,9))
|
||||
self.matern = GPy.kern.Matern32(3, active_dims=np.array([2,4,9]))
|
||||
self.matern = GPy.kern.Matern32(3, active_dims=np.array([1,7,9]))
|
||||
self.sumkern = self.rbf + self.linear
|
||||
self.sumkern += self.matern
|
||||
self.sumkern.randomize()
|
||||
|
||||
def test_active_dims(self):
|
||||
self.assertEqual(self.sumkern.input_dim, 10)
|
||||
self.assertEqual(self.sumkern.active_dims, slice(0, 10, 1))
|
||||
# test the automatic dim detection expression for slices:
|
||||
start, stop = 0, 277
|
||||
for i in range(start,stop,7):
|
||||
for j in range(1,4):
|
||||
GPy.kern.Kern(int(np.round((i+1)/j)), slice(0, i+1, j), "testkern")
|
||||
# test the ability to have only one dim
|
||||
sk = GPy.kern.RBF(2) + GPy.kern.Matern32(2)
|
||||
self.assertEqual(sk.input_dim, 2)
|
||||
|
||||
def test_which_parts(self):
|
||||
self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.matern]), self.linear.K(self.X)+self.matern.K(self.X)))
|
||||
|
|
@ -356,25 +362,25 @@ class KernelTestsNonContinuous(unittest.TestCase):
|
|||
|
||||
if __name__ == "__main__":
|
||||
print "Running unit tests, please be (very) patient..."
|
||||
#unittest.main()
|
||||
np.random.seed(0)
|
||||
N0 = 3
|
||||
N1 = 9
|
||||
N2 = 4
|
||||
N = N0+N1+N2
|
||||
D = 3
|
||||
X = np.random.randn(N, D+1)
|
||||
indices = np.random.random_integers(0, 2, size=N)
|
||||
X[indices==0, -1] = 0
|
||||
X[indices==1, -1] = 1
|
||||
X[indices==2, -1] = 2
|
||||
#X = X[X[:, -1].argsort(), :]
|
||||
X2 = np.random.randn((N0+N1)*2, D+1)
|
||||
X2[:(N0*2), -1] = 0
|
||||
X2[(N0*2):, -1] = 1
|
||||
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
|
||||
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
|
||||
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
|
||||
k = GPy.kern.RBF(D)
|
||||
kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
|
||||
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
|
||||
unittest.main()
|
||||
# np.random.seed(0)
|
||||
# N0 = 3
|
||||
# N1 = 9
|
||||
# N2 = 4
|
||||
# N = N0+N1+N2
|
||||
# D = 3
|
||||
# X = np.random.randn(N, D+1)
|
||||
# indices = np.random.random_integers(0, 2, size=N)
|
||||
# X[indices==0, -1] = 0
|
||||
# X[indices==1, -1] = 1
|
||||
# X[indices==2, -1] = 2
|
||||
# #X = X[X[:, -1].argsort(), :]
|
||||
# X2 = np.random.randn((N0+N1)*2, D+1)
|
||||
# X2[:(N0*2), -1] = 0
|
||||
# X2[(N0*2):, -1] = 1
|
||||
# k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
|
||||
# kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
|
||||
# assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
|
||||
# k = GPy.kern.RBF(D)
|
||||
# kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
|
||||
# assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ class TestNoiseModels(object):
|
|||
self.f = None
|
||||
self.X = None
|
||||
|
||||
def test_noise_models(self):
|
||||
def test_scale2_models(self):
|
||||
self.setUp()
|
||||
|
||||
####################################################
|
||||
|
|
@ -150,64 +150,64 @@ class TestNoiseModels(object):
|
|||
noise_models = {"Student_t_default": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_noise"],
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
#"constraints": [("t_noise", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))]
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
#"constraints": [("t_scale2", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_1_var": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_noise"],
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [1.0],
|
||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_small_deg_free": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_noise"],
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_small_var": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_noise"],
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [0.001],
|
||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_large_var": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_noise"],
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [10.0],
|
||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_approx_gauss": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_noise"],
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_log": {
|
||||
"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": [".*t_noise"],
|
||||
"names": [".*t_scale2"],
|
||||
"vals": [self.var],
|
||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
|
|
|
|||
|
|
@ -130,7 +130,25 @@ class MiscTests(unittest.TestCase):
|
|||
m2.kern[:] = m.kern[''].values()
|
||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
||||
|
||||
class GradientTests(unittest.TestCase):
|
||||
def test_model_set_params(self):
|
||||
m = GPy.models.GPRegression(self.X, self.Y)
|
||||
lengthscale = np.random.uniform()
|
||||
m.kern.lengthscale = lengthscale
|
||||
np.testing.assert_equal(m.kern.lengthscale, lengthscale)
|
||||
m.kern.lengthscale *= 1
|
||||
m['.*var'] -= .1
|
||||
np.testing.assert_equal(m.kern.lengthscale, lengthscale)
|
||||
m.optimize()
|
||||
print m
|
||||
|
||||
def test_model_optimize(self):
|
||||
X = np.random.uniform(-3., 3., (20, 1))
|
||||
Y = np.sin(X) + np.random.randn(20, 1) * 0.05
|
||||
m = GPy.models.GPRegression(X,Y)
|
||||
m.optimize()
|
||||
print m
|
||||
|
||||
class GradientTests(np.testing.TestCase):
|
||||
def setUp(self):
|
||||
######################################
|
||||
# # 1 dimensional example
|
||||
|
|
|
|||
|
|
@ -93,12 +93,12 @@ class Test(unittest.TestCase):
|
|||
|
||||
def test_set_params(self):
|
||||
self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet')
|
||||
self.par._param_array_[:] = 1
|
||||
self.par.param_array[:] = 1
|
||||
self.par._trigger_params_changed()
|
||||
self.assertEqual(self.par.params_changed_count, 1, 'now params changed')
|
||||
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
|
||||
|
||||
self.par._param_array_[:] = 2
|
||||
self.par.param_array[:] = 2
|
||||
self.par._trigger_params_changed()
|
||||
self.assertEqual(self.par.params_changed_count, 2, 'now params changed')
|
||||
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import unittest
|
|||
import GPy
|
||||
import numpy as np
|
||||
from GPy.core.parameterization.parameter_core import HierarchyError
|
||||
from GPy.core.parameterization.array_core import ObsAr
|
||||
from GPy.core.parameterization.observable_array import ObsAr
|
||||
|
||||
class ArrayCoreTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
|
|
@ -34,6 +34,7 @@ class ParameterizedTest(unittest.TestCase):
|
|||
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
|
||||
|
||||
self.test1 = GPy.core.Parameterized("test model")
|
||||
self.test1.param = self.param
|
||||
self.test1.kern = self.rbf+self.white
|
||||
self.test1.add_parameter(self.test1.kern)
|
||||
self.test1.add_parameter(self.param, 0)
|
||||
|
|
@ -58,6 +59,9 @@ class ParameterizedTest(unittest.TestCase):
|
|||
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
|
||||
self.test1.kern.rbf.fix()
|
||||
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3)
|
||||
self.test1.fix()
|
||||
self.assertTrue(self.test1.is_fixed)
|
||||
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*self.test1.size)
|
||||
|
||||
def test_remove_parameter(self):
|
||||
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
|
||||
|
|
@ -86,9 +90,9 @@ class ParameterizedTest(unittest.TestCase):
|
|||
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
|
||||
|
||||
def test_remove_parameter_param_array_grad_array(self):
|
||||
val = self.test1.kern._param_array_.copy()
|
||||
val = self.test1.kern.param_array.copy()
|
||||
self.test1.kern.remove_parameter(self.white)
|
||||
self.assertListEqual(self.test1.kern._param_array_.tolist(), val[:2].tolist())
|
||||
self.assertListEqual(self.test1.kern.param_array.tolist(), val[:2].tolist())
|
||||
|
||||
def test_add_parameter_already_in_hirarchy(self):
|
||||
self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])
|
||||
|
|
@ -138,6 +142,17 @@ class ParameterizedTest(unittest.TestCase):
|
|||
self.testmodel.randomize()
|
||||
self.assertEqual(val, self.testmodel.kern.lengthscale)
|
||||
|
||||
def test_regular_expression_misc(self):
|
||||
self.testmodel.kern.lengthscale.fix()
|
||||
val = float(self.testmodel.kern.lengthscale)
|
||||
self.testmodel.randomize()
|
||||
self.assertEqual(val, self.testmodel.kern.lengthscale)
|
||||
|
||||
variances = self.testmodel['.*var'].values()
|
||||
self.testmodel['.*var'].fix()
|
||||
self.testmodel.randomize()
|
||||
np.testing.assert_equal(variances, self.testmodel['.*var'].values())
|
||||
|
||||
def test_printing(self):
|
||||
print self.test1
|
||||
print self.param
|
||||
|
|
|
|||
208
GPy/testing/pickle_tests.py
Normal file
208
GPy/testing/pickle_tests.py
Normal file
|
|
@ -0,0 +1,208 @@
|
|||
'''
|
||||
Created on 13 Mar 2014
|
||||
|
||||
@author: maxz
|
||||
'''
|
||||
import unittest, itertools
|
||||
import cPickle as pickle
|
||||
import numpy as np
|
||||
from GPy.core.parameterization.index_operations import ParameterIndexOperations,\
|
||||
ParameterIndexOperationsView
|
||||
import tempfile
|
||||
from GPy.core.parameterization.param import Param
|
||||
from GPy.core.parameterization.observable_array import ObsAr
|
||||
from GPy.core.parameterization.priors import Gaussian
|
||||
from GPy.kern._src.rbf import RBF
|
||||
from GPy.kern._src.linear import Linear
|
||||
from GPy.kern._src.static import Bias, White
|
||||
from GPy.examples.dimensionality_reduction import mrd_simulation,\
|
||||
bgplvm_simulation
|
||||
from GPy.examples.regression import toy_rbf_1d_50
|
||||
from GPy.core.parameterization.variational import NormalPosterior
|
||||
from GPy.models.gp_regression import GPRegression
|
||||
|
||||
class ListDictTestCase(unittest.TestCase):
|
||||
def assertListDictEquals(self, d1, d2, msg=None):
|
||||
for k,v in d1.iteritems():
|
||||
self.assertListEqual(list(v), list(d2[k]), msg)
|
||||
def assertArrayListEquals(self, l1, l2):
|
||||
for a1, a2 in itertools.izip(l1,l2):
|
||||
np.testing.assert_array_equal(a1, a2)
|
||||
|
||||
class Test(ListDictTestCase):
|
||||
def test_parameter_index_operations(self):
|
||||
pio = ParameterIndexOperations(dict(test1=np.array([4,3,1,6,4]), test2=np.r_[2:130]))
|
||||
piov = ParameterIndexOperationsView(pio, 20, 250)
|
||||
self.assertListDictEquals(dict(piov.items()), dict(piov.copy().iteritems()))
|
||||
self.assertListDictEquals(dict(pio.iteritems()), dict(pio.copy().items()))
|
||||
|
||||
self.assertArrayListEquals(pio.copy().indices(), pio.indices())
|
||||
self.assertArrayListEquals(piov.copy().indices(), piov.indices())
|
||||
|
||||
with tempfile.TemporaryFile('w+b') as f:
|
||||
pickle.dump(pio, f)
|
||||
f.seek(0)
|
||||
pio2 = pickle.load(f)
|
||||
self.assertListDictEquals(pio._properties, pio2._properties)
|
||||
|
||||
with tempfile.TemporaryFile('w+b') as f:
|
||||
pickle.dump(piov, f)
|
||||
f.seek(0)
|
||||
pio2 = pickle.load(f)
|
||||
self.assertListDictEquals(dict(piov.items()), dict(pio2.iteritems()))
|
||||
|
||||
def test_param(self):
|
||||
param = Param('test', np.arange(4*2).reshape(4,2))
|
||||
param[0].constrain_positive()
|
||||
param[1].fix()
|
||||
param[2].set_prior(Gaussian(0,1))
|
||||
pcopy = param.copy()
|
||||
self.assertListEqual(param.tolist(), pcopy.tolist())
|
||||
self.assertListEqual(str(param).split('\n'), str(pcopy).split('\n'))
|
||||
self.assertIsNot(param, pcopy)
|
||||
with tempfile.TemporaryFile('w+b') as f:
|
||||
pickle.dump(param, f)
|
||||
f.seek(0)
|
||||
pcopy = pickle.load(f)
|
||||
self.assertListEqual(param.tolist(), pcopy.tolist())
|
||||
self.assertSequenceEqual(str(param), str(pcopy))
|
||||
|
||||
def test_observable_array(self):
|
||||
obs = ObsAr(np.arange(4*2).reshape(4,2))
|
||||
pcopy = obs.copy()
|
||||
self.assertListEqual(obs.tolist(), pcopy.tolist())
|
||||
with tempfile.TemporaryFile('w+b') as f:
|
||||
pickle.dump(obs, f)
|
||||
f.seek(0)
|
||||
pcopy = pickle.load(f)
|
||||
self.assertListEqual(obs.tolist(), pcopy.tolist())
|
||||
self.assertSequenceEqual(str(obs), str(pcopy))
|
||||
|
||||
def test_parameterized(self):
|
||||
par = RBF(1, active_dims=[1]) + Linear(2, active_dims=[0,2]) + Bias(3) + White(3)
|
||||
par.gradient = 10
|
||||
par.randomize()
|
||||
pcopy = par.copy()
|
||||
self.assertIsInstance(pcopy.constraints, ParameterIndexOperations)
|
||||
self.assertIsInstance(pcopy.rbf.constraints, ParameterIndexOperationsView)
|
||||
self.assertIs(pcopy.constraints, pcopy.rbf.constraints._param_index_ops)
|
||||
self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops)
|
||||
self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops)
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||
with tempfile.TemporaryFile('w+b') as f:
|
||||
par.pickle(f)
|
||||
f.seek(0)
|
||||
pcopy = pickle.load(f)
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
pcopy.gradient = 10
|
||||
np.testing.assert_allclose(par.linear.full_gradient, pcopy.linear.full_gradient)
|
||||
np.testing.assert_allclose(pcopy.linear.full_gradient, 10)
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
|
||||
def test_model(self):
|
||||
par = toy_rbf_1d_50(optimize=0, plot=0)
|
||||
pcopy = par.copy()
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||
self.assertTrue(pcopy.checkgrad())
|
||||
self.assert_(np.any(pcopy.gradient!=0.0))
|
||||
with tempfile.TemporaryFile('w+b') as f:
|
||||
par.pickle(f)
|
||||
f.seek(0)
|
||||
pcopy = pickle.load(f)
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assert_(pcopy.checkgrad())
|
||||
|
||||
def test_modelrecreation(self):
|
||||
par = toy_rbf_1d_50(optimize=0, plot=0)
|
||||
pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||
self.assertTrue(pcopy.checkgrad())
|
||||
self.assert_(np.any(pcopy.gradient!=0.0))
|
||||
with tempfile.TemporaryFile('w+b') as f:
|
||||
par.pickle(f)
|
||||
f.seek(0)
|
||||
pcopy = pickle.load(f)
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assert_(pcopy.checkgrad())
|
||||
|
||||
def test_posterior(self):
|
||||
X = np.random.randn(3,5)
|
||||
Xv = np.random.rand(*X.shape)
|
||||
par = NormalPosterior(X,Xv)
|
||||
par.gradient = 10
|
||||
pcopy = par.copy()
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||
with tempfile.TemporaryFile('w+b') as f:
|
||||
par.pickle(f)
|
||||
f.seek(0)
|
||||
pcopy = pickle.load(f)
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
pcopy.gradient = 10
|
||||
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
|
||||
np.testing.assert_allclose(pcopy.mean.full_gradient, 10)
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
|
||||
def test_model_concat(self):
|
||||
par = mrd_simulation(optimize=0, plot=0, plot_sim=0)
|
||||
par.randomize()
|
||||
pcopy = par.copy()
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||
self.assertTrue(pcopy.checkgrad())
|
||||
self.assert_(np.any(pcopy.gradient!=0.0))
|
||||
with tempfile.TemporaryFile('w+b') as f:
|
||||
par.pickle(f)
|
||||
f.seek(0)
|
||||
pcopy = pickle.load(f)
|
||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
|
||||
self.assertSequenceEqual(str(par), str(pcopy))
|
||||
self.assert_(pcopy.checkgrad())
|
||||
|
||||
def _callback(self, what, which):
|
||||
what.count += 1
|
||||
|
||||
@unittest.skip
|
||||
def test_add_observer(self):
|
||||
par = toy_rbf_1d_50(optimize=0, plot=0)
|
||||
par.name = "original"
|
||||
par.count = 0
|
||||
par.add_observer(self, self._callback, 1)
|
||||
pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
|
||||
self.assertNotIn(par.observers[0], pcopy.observers)
|
||||
pcopy = par.copy()
|
||||
pcopy.name = "copy"
|
||||
self.assertTrue(par.checkgrad())
|
||||
self.assertTrue(pcopy.checkgrad())
|
||||
self.assertTrue(pcopy.kern.checkgrad())
|
||||
import ipdb;ipdb.set_trace()
|
||||
self.assertIn(par.observers[0], pcopy.observers)
|
||||
self.assertEqual(par.count, 3)
|
||||
self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad
|
||||
|
||||
if __name__ == "__main__":
|
||||
#import sys;sys.argv = ['', 'Test.test_parameter_index_operations']
|
||||
unittest.main()
|
||||
|
|
@ -15,6 +15,7 @@ import caching
|
|||
import diag
|
||||
import initialization
|
||||
import multioutput
|
||||
import linalg_gpu
|
||||
|
||||
try:
|
||||
import sympy
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ class Cacher(object):
|
|||
if k in kw and kw[k] is not None:
|
||||
return self.operation(*args, **kw)
|
||||
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
|
||||
#return self.operation(*args)
|
||||
# return self.operation(*args, **kw)
|
||||
|
||||
#if the result is cached, return the cached computation
|
||||
state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
|
||||
|
|
@ -66,6 +66,7 @@ class Cacher(object):
|
|||
#first make sure the depth limit isn't exceeded
|
||||
if len(self.cached_inputs) == self.limit:
|
||||
args_ = self.cached_inputs.pop(0)
|
||||
args_ = [a for i,a in enumerate(args_) if i not in self.ignore_args and i not in self.force_kwargs]
|
||||
[a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None]
|
||||
self.inputs_changed.pop(0)
|
||||
self.cached_outputs.pop(0)
|
||||
|
|
@ -97,11 +98,20 @@ class Cacher(object):
|
|||
self.cached_outputs = []
|
||||
self.inputs_changed = []
|
||||
|
||||
def __deepcopy__(self, memo=None):
|
||||
return Cacher(self.operation, self.limit, self.ignore_args, self.force_kwargs)
|
||||
|
||||
def __getstate__(self, memo=None):
|
||||
raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation))
|
||||
|
||||
def __setstate__(self, memo=None):
|
||||
raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation))
|
||||
|
||||
@property
|
||||
def __name__(self):
|
||||
return self.operation.__name__
|
||||
|
||||
from functools import partial
|
||||
from functools import partial, update_wrapper
|
||||
|
||||
class Cacher_wrap(object):
|
||||
def __init__(self, f, limit, ignore_args, force_kwargs):
|
||||
|
|
@ -109,6 +119,7 @@ class Cacher_wrap(object):
|
|||
self.ignore_args = ignore_args
|
||||
self.force_kwargs = force_kwargs
|
||||
self.f = f
|
||||
update_wrapper(self, self.f)
|
||||
def __get__(self, obj, objtype=None):
|
||||
return partial(self, obj)
|
||||
def __call__(self, *args, **kwargs):
|
||||
|
|
|
|||
|
|
@ -331,7 +331,7 @@ def football_data(season='1314', data_set='football_data'):
|
|||
|
||||
# This will be for downloading google trends data.
|
||||
def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'):
|
||||
"""Data downloaded from Google trends for given query terms."""
|
||||
"""Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations."""
|
||||
# Inspired by this notebook:
|
||||
# http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb
|
||||
|
||||
|
|
|
|||
18
GPy/util/functions.py
Normal file
18
GPy/util/functions.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
import numpy as np
|
||||
from scipy.special import erf, erfcx
|
||||
import sys
|
||||
epsilon = sys.float_info.epsilon
|
||||
lim_val = -np.log(epsilon)
|
||||
|
||||
def cum_gaussian(x):
|
||||
g=0.5*(1+erf(x/np.sqrt(2)))
|
||||
return np.where(g==0, epsilon, np.where(g==1, 1-epsilon, g))
|
||||
|
||||
def ln_cum_gaussian(x):
|
||||
return np.where(x < 0, -.5*x*x + np.log(.5) + np.log(erfcx(-np.sqrt(2)/2*x)), np.log(cum_gaussian(x)))
|
||||
|
||||
def clip_exp(x):
|
||||
if any(x>=lim_val) or any(x<=-lim_val):
|
||||
return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val))
|
||||
else:
|
||||
return np.exp(x)
|
||||
16
GPy/util/gpu_init.py
Normal file
16
GPy/util/gpu_init.py
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
"""
|
||||
The package for scikits.cuda initialization
|
||||
|
||||
Global variables: initSuccess
|
||||
providing CUBLAS handle: cublas_handle
|
||||
"""
|
||||
|
||||
try:
|
||||
import pycuda.autoinit
|
||||
from scikits.cuda import cublas
|
||||
import scikits.cuda.linalg as culinalg
|
||||
culinalg.init()
|
||||
cublas_handle = cublas.cublasCreate()
|
||||
initSuccess = True
|
||||
except:
|
||||
initSuccess = False
|
||||
|
|
@ -5,13 +5,15 @@ Created on 24 Feb 2014
|
|||
'''
|
||||
|
||||
import numpy as np
|
||||
from linalg import PCA
|
||||
from GPy.util.pca import pca
|
||||
|
||||
def initialize_latent(init, input_dim, Y):
|
||||
Xr = np.random.randn(Y.shape[0], input_dim)
|
||||
if init == 'PCA':
|
||||
PC = PCA(Y, input_dim)[0]
|
||||
p = pca(Y)
|
||||
PC = p.project(Y, min(input_dim, Y.shape[1]))
|
||||
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
||||
else:
|
||||
pass
|
||||
return Xr
|
||||
var = Xr.var(0)
|
||||
return Xr, var/var.max()
|
||||
return Xr, p.fracs[:input_dim]
|
||||
|
|
@ -580,26 +580,3 @@ def backsub_both_sides(L, X, transpose='left'):
|
|||
tmp, _ = dtrtrs(L, X, lower=1, trans=0)
|
||||
return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T
|
||||
|
||||
def PCA(Y, input_dim):
|
||||
"""
|
||||
Principal component analysis: maximum likelihood solution by SVD
|
||||
|
||||
:param Y: NxD np.array of data
|
||||
:param input_dim: int, dimension of projection
|
||||
|
||||
|
||||
:rval X: - Nxinput_dim np.array of dimensionality reduced data
|
||||
:rval W: - input_dimxD mapping from X to Y
|
||||
|
||||
"""
|
||||
if not np.allclose(Y.mean(axis=0), 0.0):
|
||||
print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
|
||||
|
||||
# Y -= Y.mean(axis=0)
|
||||
|
||||
Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False)
|
||||
[X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]]
|
||||
v = X.std(axis=0)
|
||||
X /= v;
|
||||
W *= v;
|
||||
return X, W.T
|
||||
|
|
|
|||
59
GPy/util/linalg_gpu.py
Normal file
59
GPy/util/linalg_gpu.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
#
|
||||
# The utility functions for GPU computation
|
||||
#
|
||||
import numpy as np
|
||||
|
||||
from ..util import gpu_init
|
||||
|
||||
try:
|
||||
from pycuda.reduction import ReductionKernel
|
||||
from pycuda.elementwise import ElementwiseKernel
|
||||
|
||||
# log|A| for A is a low triangle matrix
|
||||
# logDiagSum(A, A.shape[0]+1)
|
||||
logDiagSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?log(x[i]):0", arguments="double *x, int step")
|
||||
|
||||
strideSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?x[i]:0", arguments="double *x, int step")
|
||||
|
||||
#=======================================================================================
|
||||
# Element-wise functions
|
||||
#=======================================================================================
|
||||
|
||||
# log(X)
|
||||
log = ElementwiseKernel("double *in, double *out", "out[i] = log(in[i])", "log_element")
|
||||
|
||||
# log(1.0-X)
|
||||
logOne = ElementwiseKernel("double *in, double *out", "out[i] = log(1.-in[i])", "logOne_element")
|
||||
|
||||
# multiplication with broadcast on the last dimension (out = shorter[:,None]*longer)
|
||||
mul_bcast = ElementwiseKernel("double *out, double *shorter, double *longer, int shorter_size", "out[i] = longer[i]*shorter[i%shorter_size]", "mul_bcast")
|
||||
|
||||
# multiplication with broadcast on the first dimension (out = shorter[None,:]*longer)
|
||||
mul_bcast_first = ElementwiseKernel("double *out, double *shorter, double *longer, int first_dim", "out[i] = longer[i]*shorter[i/first_dim]", "mul_bcast")
|
||||
|
||||
# sum through the middle dimension (size_2) of a 3D matrix (size_1, size_2, size_3)
|
||||
sum_axis = ElementwiseKernel("double *out, double *in, int size_1, int size_2", "out[i] += sum_axis_element(in, size_1, size_2, i)", "sum_axis",preamble="""
|
||||
__device__ double sum_axis_element(double *in, int size_1, int size_2, int idx)
|
||||
{
|
||||
int k = idx/size_1;
|
||||
int i = idx%size_1;
|
||||
double sum=0;
|
||||
for(int j=0;j<size_2;j++) {
|
||||
sum += in[(k*size_2+j)*size_1+i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
""")
|
||||
|
||||
# the outer product between two vectors (out = np.dot(v1,v2.T))
|
||||
outer_prod = ElementwiseKernel("double *out, double *v1, double *v2, int v1_size", "out[i] = v1[i%v1_size]*v2[i/v1_size]", "outer_prod")
|
||||
|
||||
# the outer product between two vectors (out = np.einsum('na,nb->nab',m1,m2) a=dim1, b=dim2 )
|
||||
join_prod = ElementwiseKernel("double *out, double *m1, double *m2, int dim1, int dim2", "out[i] = m1[(i%dim1)*dim1+(i%(dim1*dim2))/dim1]*m2[(i%dim1)*dim1+i/(dim1*dim2)]", "join_prod")
|
||||
|
||||
except:
|
||||
pass
|
||||
122
GPy/util/pca.py
Normal file
122
GPy/util/pca.py
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
'''
|
||||
Created on 10 Sep 2012
|
||||
|
||||
@author: Max Zwiessele
|
||||
@copyright: Max Zwiessele 2012
|
||||
'''
|
||||
import numpy
|
||||
import pylab
|
||||
import matplotlib
|
||||
from numpy.linalg.linalg import LinAlgError
|
||||
|
||||
class pca(object):
|
||||
"""
|
||||
pca module with automatic primal/dual determination.
|
||||
"""
|
||||
def __init__(self, X):
|
||||
self.mu = X.mean(0)
|
||||
self.sigma = X.std(0)
|
||||
|
||||
X = self.center(X)
|
||||
|
||||
# self.X = input
|
||||
if X.shape[0] >= X.shape[1]:
|
||||
# print "N >= D: using primal"
|
||||
self.eigvals, self.eigvectors = self._primal_eig(X)
|
||||
else:
|
||||
# print "N < D: using dual"
|
||||
self.eigvals, self.eigvectors = self._dual_eig(X)
|
||||
self.sort = numpy.argsort(self.eigvals)[::-1]
|
||||
self.eigvals = self.eigvals[self.sort]
|
||||
self.eigvectors = self.eigvectors[:, self.sort]
|
||||
self.fracs = self.eigvals / self.eigvals.sum()
|
||||
self.Q = self.eigvals.shape[0]
|
||||
|
||||
def center(self, X):
|
||||
"""
|
||||
Center `X` in pca space.
|
||||
"""
|
||||
X = X - self.mu
|
||||
X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
|
||||
return X
|
||||
|
||||
def _primal_eig(self, X):
|
||||
return numpy.linalg.eigh(numpy.einsum('ji,jk->ik',X,X))
|
||||
|
||||
def _dual_eig(self, X):
|
||||
dual_eigvals, dual_eigvects = numpy.linalg.eigh(numpy.einsum('ij,kj->ik',X,X))
|
||||
relevant_dimensions = numpy.argsort(numpy.abs(dual_eigvals))[-X.shape[1]:]
|
||||
eigvals = dual_eigvals[relevant_dimensions]
|
||||
eigvects = dual_eigvects[:, relevant_dimensions]
|
||||
eigvects = (1. / numpy.sqrt(X.shape[0] * numpy.abs(eigvals))) * X.T.dot(eigvects)
|
||||
eigvects /= numpy.sqrt(numpy.diag(eigvects.T.dot(eigvects)))
|
||||
return eigvals, eigvects
|
||||
|
||||
def project(self, X, Q=None):
|
||||
"""
|
||||
Project X into pca space, defined by the Q highest eigenvalues.
|
||||
Y = X dot V
|
||||
"""
|
||||
if Q is None:
|
||||
Q = self.Q
|
||||
if Q > X.shape[1]:
|
||||
raise IndexError("requested dimension larger then input dimension")
|
||||
X = self.center(X)
|
||||
return X.dot(self.eigvectors[:, :Q])
|
||||
|
||||
def plot_fracs(self, Q=None, ax=None, fignum=None):
|
||||
"""
|
||||
Plot fractions of Eigenvalues sorted in descending order.
|
||||
"""
|
||||
if ax is None:
|
||||
fig = pylab.figure(fignum)
|
||||
ax = fig.add_subplot(111)
|
||||
if Q is None:
|
||||
Q = self.Q
|
||||
ticks = numpy.arange(Q)
|
||||
bar = ax.bar(ticks - .4, self.fracs[:Q])
|
||||
ax.set_xticks(ticks, map(lambda x: r"${}$".format(x), ticks + 1))
|
||||
ax.set_ylabel("Eigenvalue fraction")
|
||||
ax.set_xlabel("PC")
|
||||
ax.set_ylim(0, ax.get_ylim()[1])
|
||||
ax.set_xlim(ticks.min() - .5, ticks.max() + .5)
|
||||
try:
|
||||
pylab.tight_layout()
|
||||
except:
|
||||
pass
|
||||
return bar
|
||||
|
||||
def plot_2d(self, X, labels=None, s=20, marker='o',
|
||||
dimensions=(0, 1), ax=None, colors=None,
|
||||
fignum=None, cmap=matplotlib.cm.jet, # @UndefinedVariable
|
||||
** kwargs):
|
||||
"""
|
||||
Plot dimensions `dimensions` with given labels against each other in
|
||||
PC space. Labels can be any sequence of labels of dimensions X.shape[0].
|
||||
Labels can be drawn with a subsequent call to legend()
|
||||
"""
|
||||
if ax is None:
|
||||
fig = pylab.figure(fignum)
|
||||
ax = fig.add_subplot(111)
|
||||
if labels is None:
|
||||
labels = numpy.zeros(X.shape[0])
|
||||
ulabels = []
|
||||
for lab in labels:
|
||||
if not lab in ulabels:
|
||||
ulabels.append(lab)
|
||||
nlabels = len(ulabels)
|
||||
if colors is None:
|
||||
colors = [cmap(float(i) / nlabels) for i in range(nlabels)]
|
||||
X_ = self.project(X, self.Q)[:,dimensions]
|
||||
kwargs.update(dict(s=s))
|
||||
plots = list()
|
||||
for i, l in enumerate(ulabels):
|
||||
kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)]))
|
||||
plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs))
|
||||
ax.set_xlabel(r"PC$_1$")
|
||||
ax.set_ylabel(r"PC$_2$")
|
||||
try:
|
||||
pylab.tight_layout()
|
||||
except:
|
||||
pass
|
||||
return plots
|
||||
|
|
@ -1,5 +1,55 @@
|
|||
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign
|
||||
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma,polygamma
|
||||
|
||||
class gammaln(Function):
|
||||
nargs = 1
|
||||
|
||||
def fdiff(self, argindex=1):
|
||||
x=self.args[0]
|
||||
return polygamma(0, x)
|
||||
|
||||
@classmethod
|
||||
def eval(cls, x):
|
||||
if x.is_Number:
|
||||
return log(gamma(x))
|
||||
|
||||
|
||||
class ln_cum_gaussian(Function):
|
||||
nargs = 1
|
||||
|
||||
def fdiff(self, argindex=1):
|
||||
x = self.args[0]
|
||||
return exp(-ln_cum_gaussian(x) - 0.5*x*x)/sqrt(2*pi)
|
||||
|
||||
@classmethod
|
||||
def eval(cls, x):
|
||||
if x.is_Number:
|
||||
return log(cum_gaussian(x))
|
||||
|
||||
def _eval_is_real(self):
|
||||
return self.args[0].is_real
|
||||
|
||||
class cum_gaussian(Function):
|
||||
nargs = 1
|
||||
def fdiff(self, argindex=1):
|
||||
x = self.args[0]
|
||||
return gaussian(x)
|
||||
|
||||
@classmethod
|
||||
def eval(cls, x):
|
||||
if x.is_Number:
|
||||
return 0.5*(1+erf(sqrt(2)/2*x))
|
||||
|
||||
def _eval_is_real(self):
|
||||
return self.args[0].is_real
|
||||
|
||||
class gaussian(Function):
|
||||
nargs = 1
|
||||
@classmethod
|
||||
def eval(cls, x):
|
||||
return 1/sqrt(2*pi)*exp(-0.5*x*x)
|
||||
|
||||
def _eval_is_real(self):
|
||||
return True
|
||||
|
||||
class ln_diff_erf(Function):
|
||||
nargs = 2
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue