mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-07 19:12:40 +02:00
[GPU] GPU version of varDTC is ready
This commit is contained in:
commit
bbcba2553c
59 changed files with 2012 additions and 1186 deletions
|
|
@ -4,6 +4,7 @@
|
||||||
from model import *
|
from model import *
|
||||||
from parameterization.parameterized import adjust_name_for_printing, Parameterizable
|
from parameterization.parameterized import adjust_name_for_printing, Parameterizable
|
||||||
from parameterization.param import Param, ParamConcatenation
|
from parameterization.param import Param, ParamConcatenation
|
||||||
|
from parameterization.observable_array import ObsAr
|
||||||
|
|
||||||
from gp import GP
|
from gp import GP
|
||||||
from sparse_gp import SparseGP
|
from sparse_gp import SparseGP
|
||||||
|
|
|
||||||
|
|
@ -208,27 +208,9 @@ class GP(Model):
|
||||||
from ..plotting.matplot_dep import models_plots
|
from ..plotting.matplot_dep import models_plots
|
||||||
return models_plots.plot_fit(self,*args,**kwargs)
|
return models_plots.plot_fit(self,*args,**kwargs)
|
||||||
|
|
||||||
def _getstate(self):
|
def input_sensitivity(self):
|
||||||
"""
|
"""
|
||||||
|
Returns the sensitivity for each dimension of this model
|
||||||
Get the current state of the class, here we return everything that is
|
|
||||||
needed to recompute the model.
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
return self.kern.input_sensitivity()
|
||||||
|
|
||||||
return Model._getstate(self) + [self.X,
|
|
||||||
self.num_data,
|
|
||||||
self.input_dim,
|
|
||||||
self.kern,
|
|
||||||
self.likelihood,
|
|
||||||
self.output_dim,
|
|
||||||
]
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
self.output_dim = state.pop()
|
|
||||||
self.likelihood = state.pop()
|
|
||||||
self.kern = state.pop()
|
|
||||||
self.input_dim = state.pop()
|
|
||||||
self.num_data = state.pop()
|
|
||||||
self.X = state.pop()
|
|
||||||
Model._setstate(self, state)
|
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ class Mapping(Parameterized):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def df_dtheta(self, dL_df, X):
|
def df_dtheta(self, dL_df, X):
|
||||||
"""The gradient of the outputs of the multi-layer perceptron with respect to each of the parameters.
|
"""The gradient of the outputs of the mapping with respect to each of the parameters.
|
||||||
|
|
||||||
:param dL_df: gradient of the objective with respect to the function.
|
:param dL_df: gradient of the objective with respect to the function.
|
||||||
:type dL_df: ndarray (num_data x output_dim)
|
:type dL_df: ndarray (num_data x output_dim)
|
||||||
|
|
@ -50,7 +50,7 @@ class Mapping(Parameterized):
|
||||||
"""
|
"""
|
||||||
Plots the mapping associated with the model.
|
Plots the mapping associated with the model.
|
||||||
- In one dimension, the function is plotted.
|
- In one dimension, the function is plotted.
|
||||||
- In two dimsensions, a contour-plot shows the function
|
- In two dimensions, a contour-plot shows the function
|
||||||
- In higher dimensions, we've not implemented this yet !TODO!
|
- In higher dimensions, we've not implemented this yet !TODO!
|
||||||
|
|
||||||
Can plot only part of the data and part of the posterior functions
|
Can plot only part of the data and part of the posterior functions
|
||||||
|
|
|
||||||
|
|
@ -24,37 +24,9 @@ class Model(Parameterized):
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
return self.gradient
|
return self.gradient
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
"""
|
|
||||||
Get the current state of the class.
|
|
||||||
Inherited from Parameterized, so add those parameters to the state
|
|
||||||
|
|
||||||
:return: list of states from the model.
|
|
||||||
|
|
||||||
"""
|
|
||||||
return Parameterized._getstate(self) + \
|
|
||||||
[self.priors, self.optimization_runs,
|
|
||||||
self.sampling_runs, self.preferred_optimizer]
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
"""
|
|
||||||
set state from previous call to _getstate
|
|
||||||
call Parameterized with the rest of the state
|
|
||||||
|
|
||||||
:param state: the state of the model.
|
|
||||||
:type state: list as returned from _getstate.
|
|
||||||
|
|
||||||
"""
|
|
||||||
self.preferred_optimizer = state.pop()
|
|
||||||
self.sampling_runs = state.pop()
|
|
||||||
self.optimization_runs = state.pop()
|
|
||||||
self.priors = state.pop()
|
|
||||||
Parameterized._setstate(self, state)
|
|
||||||
|
|
||||||
def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
|
def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
|
||||||
"""
|
"""
|
||||||
Perform random restarts of the model, and set the model to the best
|
Perform random restarts of the model, and set the model to the best
|
||||||
|
|
@ -142,13 +114,60 @@ class Model(Parameterized):
|
||||||
"""
|
"""
|
||||||
raise DeprecationWarning, 'parameters now have default constraints'
|
raise DeprecationWarning, 'parameters now have default constraints'
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def objective_function(self):
|
||||||
"""
|
"""
|
||||||
Returns the sensitivity for each dimension of this kernel.
|
The objective function for the given algorithm.
|
||||||
"""
|
|
||||||
return self.kern.input_sensitivity()
|
|
||||||
|
|
||||||
def objective_function(self, x):
|
This function is the true objective, which wants to be minimized.
|
||||||
|
Note that all parameters are already set and in place, so you just need
|
||||||
|
to return the objective function here.
|
||||||
|
|
||||||
|
For probabilistic models this is the negative log_likelihood
|
||||||
|
(including the MAP prior), so we return it here. If your model is not
|
||||||
|
probabilistic, just return your objective here!
|
||||||
|
"""
|
||||||
|
return -float(self.log_likelihood()) - self.log_prior()
|
||||||
|
|
||||||
|
def objective_function_gradients(self):
|
||||||
|
"""
|
||||||
|
The gradients for the objective function for the given algorithm.
|
||||||
|
|
||||||
|
You can find the gradient for the parameters in self.gradient at all times.
|
||||||
|
This is the place, where gradients get stored for parameters.
|
||||||
|
|
||||||
|
This function is the true objective, which wants to be minimized.
|
||||||
|
Note that all parameters are already set and in place, so you just need
|
||||||
|
to return the gradient here.
|
||||||
|
|
||||||
|
For probabilistic models this is the gradient of the negative log_likelihood
|
||||||
|
(including the MAP prior), so we return it here. If your model is not
|
||||||
|
probabilistic, just return your gradient here!
|
||||||
|
"""
|
||||||
|
return -(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||||
|
|
||||||
|
def _grads(self, x):
|
||||||
|
"""
|
||||||
|
Gets the gradients from the likelihood and the priors.
|
||||||
|
|
||||||
|
Failures are handled robustly. The algorithm will try several times to
|
||||||
|
return the gradients, and will raise the original exception if
|
||||||
|
the objective cannot be computed.
|
||||||
|
|
||||||
|
:param x: the parameters of the model.
|
||||||
|
:type x: np.array
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self._set_params_transformed(x)
|
||||||
|
obj_grads = self._transform_gradients(self.objective_function_gradients())
|
||||||
|
self._fail_count = 0
|
||||||
|
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||||
|
if self._fail_count >= self._allowed_failures:
|
||||||
|
raise
|
||||||
|
self._fail_count += 1
|
||||||
|
obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100)
|
||||||
|
return obj_grads
|
||||||
|
|
||||||
|
def _objective(self, x):
|
||||||
"""
|
"""
|
||||||
The objective function passed to the optimizer. It combines
|
The objective function passed to the optimizer. It combines
|
||||||
the likelihood and the priors.
|
the likelihood and the priors.
|
||||||
|
|
@ -162,55 +181,26 @@ class Model(Parameterized):
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
self._set_params_transformed(x)
|
self._set_params_transformed(x)
|
||||||
|
obj = self.objective_function()
|
||||||
self._fail_count = 0
|
self._fail_count = 0
|
||||||
except (LinAlgError, ZeroDivisionError, ValueError) as e:
|
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||||
if self._fail_count >= self._allowed_failures:
|
if self._fail_count >= self._allowed_failures:
|
||||||
raise e
|
raise
|
||||||
self._fail_count += 1
|
self._fail_count += 1
|
||||||
return np.inf
|
return np.inf
|
||||||
return -float(self.log_likelihood()) - self.log_prior()
|
return obj
|
||||||
|
|
||||||
def objective_function_gradients(self, x):
|
def _objective_grads(self, x):
|
||||||
"""
|
|
||||||
Gets the gradients from the likelihood and the priors.
|
|
||||||
|
|
||||||
Failures are handled robustly. The algorithm will try several times to
|
|
||||||
return the gradients, and will raise the original exception if
|
|
||||||
the objective cannot be computed.
|
|
||||||
|
|
||||||
:param x: the parameters of the model.
|
|
||||||
:type x: np.array
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
self._set_params_transformed(x)
|
self._set_params_transformed(x)
|
||||||
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
|
||||||
self._fail_count = 0
|
self._fail_count = 0
|
||||||
except (LinAlgError, ZeroDivisionError, ValueError) as e:
|
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||||
if self._fail_count >= self._allowed_failures:
|
if self._fail_count >= self._allowed_failures:
|
||||||
raise e
|
raise
|
||||||
self._fail_count += 1
|
|
||||||
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
|
|
||||||
return obj_grads
|
|
||||||
|
|
||||||
def objective_and_gradients(self, x):
|
|
||||||
"""
|
|
||||||
Compute the objective function of the model and the gradient of the model at the point given by x.
|
|
||||||
|
|
||||||
:param x: the point at which gradients are to be computed.
|
|
||||||
:type x: np.array
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
self._set_params_transformed(x)
|
|
||||||
obj_f = -float(self.log_likelihood()) - self.log_prior()
|
|
||||||
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
|
||||||
self._fail_count = 0
|
|
||||||
except (LinAlgError, ZeroDivisionError, ValueError) as e:
|
|
||||||
if self._fail_count >= self._allowed_failures:
|
|
||||||
raise e
|
|
||||||
self._fail_count += 1
|
self._fail_count += 1
|
||||||
obj_f = np.inf
|
obj_f = np.inf
|
||||||
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
|
obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100)
|
||||||
return obj_f, obj_grads
|
return obj_f, obj_grads
|
||||||
|
|
||||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||||
|
|
@ -241,7 +231,7 @@ class Model(Parameterized):
|
||||||
optimizer = optimization.get_optimizer(optimizer)
|
optimizer = optimization.get_optimizer(optimizer)
|
||||||
opt = optimizer(start, model=self, **kwargs)
|
opt = optimizer(start, model=self, **kwargs)
|
||||||
|
|
||||||
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
|
opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)
|
||||||
|
|
||||||
self.optimization_runs.append(opt)
|
self.optimization_runs.append(opt)
|
||||||
|
|
||||||
|
|
@ -289,19 +279,22 @@ class Model(Parameterized):
|
||||||
|
|
||||||
# just check the global ratio
|
# just check the global ratio
|
||||||
dx = np.zeros(x.shape)
|
dx = np.zeros(x.shape)
|
||||||
dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size))
|
dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.)
|
||||||
|
|
||||||
# evaulate around the point x
|
# evaulate around the point x
|
||||||
f1 = self.objective_function(x + dx)
|
f1 = self._objective(x + dx)
|
||||||
f2 = self.objective_function(x - dx)
|
f2 = self._objective(x - dx)
|
||||||
gradient = self.objective_function_gradients(x)
|
gradient = self._grads(x)
|
||||||
|
|
||||||
dx = dx[transformed_index]
|
dx = dx[transformed_index]
|
||||||
gradient = gradient[transformed_index]
|
gradient = gradient[transformed_index]
|
||||||
|
|
||||||
denominator = (2 * np.dot(dx, gradient))
|
denominator = (2 * np.dot(dx, gradient))
|
||||||
global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator)
|
global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator)
|
||||||
return np.abs(1. - global_ratio) < tolerance or np.abs(f1-f2).sum() + np.abs((2 * np.dot(dx, gradient))).sum() < tolerance
|
global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
|
||||||
|
if global_ratio is np.nan:
|
||||||
|
global_ratio = 0
|
||||||
|
return np.abs(1. - global_ratio) < tolerance or global_diff
|
||||||
else:
|
else:
|
||||||
# check the gradient of each parameter individually, and do some pretty printing
|
# check the gradient of each parameter individually, and do some pretty printing
|
||||||
try:
|
try:
|
||||||
|
|
@ -337,15 +330,15 @@ class Model(Parameterized):
|
||||||
print "No free parameters to check"
|
print "No free parameters to check"
|
||||||
return
|
return
|
||||||
|
|
||||||
gradient = self.objective_function_gradients(x).copy()
|
gradient = self._grads(x).copy()
|
||||||
np.where(gradient == 0, 1e-312, gradient)
|
np.where(gradient == 0, 1e-312, gradient)
|
||||||
ret = True
|
ret = True
|
||||||
for nind, xind in itertools.izip(param_index, transformed_index):
|
for nind, xind in itertools.izip(param_index, transformed_index):
|
||||||
xx = x.copy()
|
xx = x.copy()
|
||||||
xx[xind] += step
|
xx[xind] += step
|
||||||
f1 = self.objective_function(xx)
|
f1 = self._objective(xx)
|
||||||
xx[xind] -= 2.*step
|
xx[xind] -= 2.*step
|
||||||
f2 = self.objective_function(xx)
|
f2 = self._objective(xx)
|
||||||
numerical_gradient = (f1 - f2) / (2 * step)
|
numerical_gradient = (f1 - f2) / (2 * step)
|
||||||
if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind]
|
if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind]
|
||||||
else: ratio = (f1 - f2) / (2 * step * gradient[xind])
|
else: ratio = (f1 - f2) / (2 * step * gradient[xind])
|
||||||
|
|
|
||||||
|
|
@ -24,12 +24,6 @@ class ParameterIndexOperations(object):
|
||||||
for t, i in constraints.iteritems():
|
for t, i in constraints.iteritems():
|
||||||
self.add(t, i)
|
self.add(t, i)
|
||||||
|
|
||||||
def __getstate__(self):
|
|
||||||
return self._properties
|
|
||||||
|
|
||||||
def __setstate__(self, state):
|
|
||||||
self._properties = state
|
|
||||||
|
|
||||||
def iteritems(self):
|
def iteritems(self):
|
||||||
return self._properties.iteritems()
|
return self._properties.iteritems()
|
||||||
|
|
||||||
|
|
@ -92,13 +86,18 @@ class ParameterIndexOperations(object):
|
||||||
for i, v in parameter_index_view.iteritems():
|
for i, v in parameter_index_view.iteritems():
|
||||||
self.add(i, v+offset)
|
self.add(i, v+offset)
|
||||||
|
|
||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
|
return self.__deepcopy__(None)
|
||||||
|
|
||||||
|
def __deepcopy__(self, memo):
|
||||||
return ParameterIndexOperations(dict(self.iteritems()))
|
return ParameterIndexOperations(dict(self.iteritems()))
|
||||||
|
|
||||||
def __getitem__(self, prop):
|
def __getitem__(self, prop):
|
||||||
return self._properties[prop]
|
return self._properties[prop]
|
||||||
|
|
||||||
|
def __delitem__(self, prop):
|
||||||
|
del self._properties[prop]
|
||||||
|
|
||||||
def __str__(self, *args, **kwargs):
|
def __str__(self, *args, **kwargs):
|
||||||
import pprint
|
import pprint
|
||||||
return pprint.pformat(dict(self._properties))
|
return pprint.pformat(dict(self._properties))
|
||||||
|
|
@ -183,7 +182,7 @@ class ParameterIndexOperationsView(object):
|
||||||
|
|
||||||
|
|
||||||
def remove(self, prop, indices):
|
def remove(self, prop, indices):
|
||||||
removed = self._param_index_ops.remove(prop, indices+self._offset)
|
removed = self._param_index_ops.remove(prop, numpy.array(indices)+self._offset)
|
||||||
if removed.size > 0:
|
if removed.size > 0:
|
||||||
return removed - self._size + 1
|
return removed - self._size + 1
|
||||||
return removed
|
return removed
|
||||||
|
|
@ -193,6 +192,9 @@ class ParameterIndexOperationsView(object):
|
||||||
ind = self._filter_index(self._param_index_ops[prop])
|
ind = self._filter_index(self._param_index_ops[prop])
|
||||||
return ind
|
return ind
|
||||||
|
|
||||||
|
def __delitem__(self, prop):
|
||||||
|
self.remove(prop, self[prop])
|
||||||
|
|
||||||
def __str__(self, *args, **kwargs):
|
def __str__(self, *args, **kwargs):
|
||||||
import pprint
|
import pprint
|
||||||
return pprint.pformat(dict(self.iteritems()))
|
return pprint.pformat(dict(self.iteritems()))
|
||||||
|
|
@ -203,6 +205,9 @@ class ParameterIndexOperationsView(object):
|
||||||
|
|
||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
|
return self.__deepcopy__(None)
|
||||||
|
|
||||||
|
def __deepcopy__(self, memo):
|
||||||
return ParameterIndexOperations(dict(self.iteritems()))
|
return ParameterIndexOperations(dict(self.iteritems()))
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ Created on 27 Feb 2014
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
import weakref
|
||||||
|
|
||||||
def intarray_default_factory():
|
def intarray_default_factory():
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
@ -28,4 +29,90 @@ class ArrayList(list):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def index(self, item):
|
||||||
|
index = 0
|
||||||
|
for el in self:
|
||||||
|
if el is item:
|
||||||
|
return index
|
||||||
|
index += 1
|
||||||
|
raise ValueError, "{} is not in list".format(item)
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ObservablesList(object):
|
||||||
|
def __init__(self):
|
||||||
|
self._poc = []
|
||||||
|
|
||||||
|
def __getitem__(self, ind):
|
||||||
|
p,o,c = self._poc[ind]
|
||||||
|
return p, o(), c
|
||||||
|
|
||||||
|
def remove(self, priority, observable, callble):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
self.flush()
|
||||||
|
for i in range(len(self) - 1, -1, -1):
|
||||||
|
p,o,c = self[i]
|
||||||
|
if priority==p and observable==o and callble==c:
|
||||||
|
del self._poc[i]
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self._poc.__repr__()
|
||||||
|
|
||||||
|
def add(self, priority, observable, callble):
|
||||||
|
ins = 0
|
||||||
|
for pr, _, _ in self:
|
||||||
|
if priority > pr:
|
||||||
|
break
|
||||||
|
ins += 1
|
||||||
|
self._poc.insert(ins, (priority, weakref.ref(observable), callble))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ret = []
|
||||||
|
curr_p = None
|
||||||
|
for p, o, c in self:
|
||||||
|
curr = ''
|
||||||
|
if curr_p != p:
|
||||||
|
pre = "{!s}: ".format(p)
|
||||||
|
curr_pre = pre
|
||||||
|
else: curr_pre = " "*len(pre)
|
||||||
|
curr_p = p
|
||||||
|
curr += curr_pre
|
||||||
|
ret.append(curr + ", ".join(map(repr, [o,c])))
|
||||||
|
return '\n'.join(ret)
|
||||||
|
|
||||||
|
def flush(self):
|
||||||
|
self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None]
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
self.flush()
|
||||||
|
for p, o, c in self._poc:
|
||||||
|
if o() is not None:
|
||||||
|
yield p, o(), c
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
self.flush()
|
||||||
|
return self._poc.__len__()
|
||||||
|
|
||||||
|
def __deepcopy__(self, memo):
|
||||||
|
self.flush()
|
||||||
|
s = ObservablesList()
|
||||||
|
import copy
|
||||||
|
s._poc = copy.deepcopy(self._poc, memo)
|
||||||
|
return s
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
self.flush()
|
||||||
|
from ...util.caching import Cacher
|
||||||
|
obs = []
|
||||||
|
for p, o, c in self:
|
||||||
|
if (getattr(o, c.__name__, None) is not None
|
||||||
|
and not isinstance(o, Cacher)):
|
||||||
|
obs.append((p,o,c.__name__))
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
self._poc = []
|
||||||
|
for p, o, c in state:
|
||||||
|
self.add(p,o,getattr(o, c))
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
__updated__ = '2014-03-21'
|
__updated__ = '2014-03-31'
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from parameter_core import Observable
|
from parameter_core import Observable, Pickleable
|
||||||
|
|
||||||
class ObsAr(np.ndarray, Observable):
|
class ObsAr(np.ndarray, Pickleable, Observable):
|
||||||
"""
|
"""
|
||||||
An ndarray which reports changes to its observers.
|
An ndarray which reports changes to its observers.
|
||||||
The observers can add themselves with a callable, which
|
The observers can add themselves with a callable, which
|
||||||
|
|
@ -25,37 +25,34 @@ class ObsAr(np.ndarray, Observable):
|
||||||
def __array_finalize__(self, obj):
|
def __array_finalize__(self, obj):
|
||||||
# see InfoArray.__array_finalize__ for comments
|
# see InfoArray.__array_finalize__ for comments
|
||||||
if obj is None: return
|
if obj is None: return
|
||||||
self._observer_callables_ = getattr(obj, '_observer_callables_', None)
|
self.observers = getattr(obj, 'observers', None)
|
||||||
|
|
||||||
def __array_wrap__(self, out_arr, context=None):
|
def __array_wrap__(self, out_arr, context=None):
|
||||||
return out_arr.view(np.ndarray)
|
return out_arr.view(np.ndarray)
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
memo = {}
|
||||||
|
memo[id(self)] = self
|
||||||
|
return self.__deepcopy__(memo)
|
||||||
|
|
||||||
|
def __deepcopy__(self, memo):
|
||||||
|
s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy())
|
||||||
|
memo[id(self)] = s
|
||||||
|
import copy
|
||||||
|
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
|
||||||
|
return s
|
||||||
|
|
||||||
def __reduce__(self):
|
def __reduce__(self):
|
||||||
func, args, state = np.ndarray.__reduce__(self)
|
func, args, state = super(ObsAr, self).__reduce__()
|
||||||
return func, args, (state, Observable._getstate(self))
|
return func, args, (state, Pickleable.__getstate__(self))
|
||||||
|
|
||||||
def __setstate__(self, state):
|
def __setstate__(self, state):
|
||||||
np.ndarray.__setstate__(self, state[0])
|
np.ndarray.__setstate__(self, state[0])
|
||||||
Observable._setstate(self, state[1])
|
Pickleable.__setstate__(self, state[1])
|
||||||
|
|
||||||
def _s_not_empty(self, s):
|
|
||||||
# this checks whether there is something picked by this slice.
|
|
||||||
return True
|
|
||||||
# TODO: disarmed, for performance increase,
|
|
||||||
if not isinstance(s, (list,tuple,np.ndarray)):
|
|
||||||
return True
|
|
||||||
if isinstance(s, (list,tuple)):
|
|
||||||
return len(s)!=0
|
|
||||||
if isinstance(s, np.ndarray):
|
|
||||||
if s.dtype is bool:
|
|
||||||
return np.all(s)
|
|
||||||
else:
|
|
||||||
return s.size != 0
|
|
||||||
|
|
||||||
def __setitem__(self, s, val):
|
def __setitem__(self, s, val):
|
||||||
if self._s_not_empty(s):
|
super(ObsAr, self).__setitem__(s, val)
|
||||||
super(ObsAr, self).__setitem__(s, val)
|
self.notify_observers()
|
||||||
self.notify_observers()
|
|
||||||
|
|
||||||
def __getslice__(self, start, stop):
|
def __getslice__(self, start, stop):
|
||||||
return self.__getitem__(slice(start, stop))
|
return self.__getitem__(slice(start, stop))
|
||||||
|
|
@ -63,12 +60,6 @@ class ObsAr(np.ndarray, Observable):
|
||||||
def __setslice__(self, start, stop, val):
|
def __setslice__(self, start, stop, val):
|
||||||
return self.__setitem__(slice(start, stop), val)
|
return self.__setitem__(slice(start, stop), val)
|
||||||
|
|
||||||
def __copy__(self, *args):
|
|
||||||
return ObsAr(self.view(np.ndarray).copy())
|
|
||||||
|
|
||||||
def copy(self, *args):
|
|
||||||
return self.__copy__(*args)
|
|
||||||
|
|
||||||
def __ilshift__(self, *args, **kwargs):
|
def __ilshift__(self, *args, **kwargs):
|
||||||
r = np.ndarray.__ilshift__(self, *args, **kwargs)
|
r = np.ndarray.__ilshift__(self, *args, **kwargs)
|
||||||
self.notify_observers()
|
self.notify_observers()
|
||||||
|
|
@ -144,76 +135,3 @@ class ObsAr(np.ndarray, Observable):
|
||||||
r = np.ndarray.__imul__(self, *args, **kwargs)
|
r = np.ndarray.__imul__(self, *args, **kwargs)
|
||||||
self.notify_observers()
|
self.notify_observers()
|
||||||
return r
|
return r
|
||||||
|
|
||||||
|
|
||||||
# def __rrshift__(self, *args, **kwargs):
|
|
||||||
# r = np.ndarray.__rrshift__(self, *args, **kwargs)
|
|
||||||
# self.notify_observers()
|
|
||||||
# return r
|
|
||||||
|
|
||||||
|
|
||||||
# def __ror__(self, *args, **kwargs):
|
|
||||||
# r = np.ndarray.__ror__(self, *args, **kwargs)
|
|
||||||
# self.notify_observers()
|
|
||||||
# return r
|
|
||||||
|
|
||||||
|
|
||||||
# def __rxor__(self, *args, **kwargs):
|
|
||||||
# r = np.ndarray.__rxor__(self, *args, **kwargs)
|
|
||||||
# self.notify_observers()
|
|
||||||
# return r
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# def __rdivmod__(self, *args, **kwargs):
|
|
||||||
# r = np.ndarray.__rdivmod__(self, *args, **kwargs)
|
|
||||||
# self.notify_observers()
|
|
||||||
# return r
|
|
||||||
|
|
||||||
|
|
||||||
# def __radd__(self, *args, **kwargs):
|
|
||||||
# r = np.ndarray.__radd__(self, *args, **kwargs)
|
|
||||||
# self.notify_observers()
|
|
||||||
# return r
|
|
||||||
|
|
||||||
|
|
||||||
# def __rdiv__(self, *args, **kwargs):
|
|
||||||
# r = np.ndarray.__rdiv__(self, *args, **kwargs)
|
|
||||||
# self.notify_observers()
|
|
||||||
# return r
|
|
||||||
|
|
||||||
|
|
||||||
# def __rtruediv__(self, *args, **kwargs):
|
|
||||||
# r = np.ndarray.__rtruediv__(self, *args, **kwargs)
|
|
||||||
# self.notify_observers()
|
|
||||||
# return r
|
|
||||||
|
|
||||||
|
|
||||||
# def __rshift__(self, *args, **kwargs):
|
|
||||||
# r = np.ndarray.__rshift__(self, *args, **kwargs)
|
|
||||||
# self.notify_observers()
|
|
||||||
# return r
|
|
||||||
|
|
||||||
|
|
||||||
# def __rmul__(self, *args, **kwargs):
|
|
||||||
# r = np.ndarray.__rmul__(self, *args, **kwargs)
|
|
||||||
# self.notify_observers()
|
|
||||||
# return r
|
|
||||||
|
|
||||||
|
|
||||||
# def __rpow__(self, *args, **kwargs):
|
|
||||||
# r = np.ndarray.__rpow__(self, *args, **kwargs)
|
|
||||||
# self.notify_observers()
|
|
||||||
# return r
|
|
||||||
|
|
||||||
|
|
||||||
# def __rsub__(self, *args, **kwargs):
|
|
||||||
# r = np.ndarray.__rsub__(self, *args, **kwargs)
|
|
||||||
# self.notify_observers()
|
|
||||||
# return r
|
|
||||||
|
|
||||||
# def __rfloordiv__(self, *args, **kwargs):
|
|
||||||
# r = np.ndarray.__rfloordiv__(self, *args, **kwargs)
|
|
||||||
# self.notify_observers()
|
|
||||||
# return r
|
|
||||||
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
import itertools
|
import itertools
|
||||||
import numpy
|
import numpy
|
||||||
from parameter_core import OptimizationHandlable, adjust_name_for_printing
|
from parameter_core import OptimizationHandlable, adjust_name_for_printing
|
||||||
from array_core import ObsAr
|
from observable_array import ObsAr
|
||||||
|
|
||||||
###### printing
|
###### printing
|
||||||
__constraints_name__ = "Constraint"
|
__constraints_name__ = "Constraint"
|
||||||
|
|
@ -43,14 +43,13 @@ class Param(OptimizationHandlable, ObsAr):
|
||||||
_fixes_ = None
|
_fixes_ = None
|
||||||
_parameters_ = []
|
_parameters_ = []
|
||||||
def __new__(cls, name, input_array, default_constraint=None):
|
def __new__(cls, name, input_array, default_constraint=None):
|
||||||
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array, name=name, default_constraint=default_constraint))
|
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
|
||||||
cls.__name__ = "Param"
|
cls.__name__ = "Param"
|
||||||
obj._current_slice_ = (slice(obj.shape[0]),)
|
obj._current_slice_ = (slice(obj.shape[0]),)
|
||||||
obj._realshape_ = obj.shape
|
obj._realshape_ = obj.shape
|
||||||
obj._realsize_ = obj.size
|
obj._realsize_ = obj.size
|
||||||
obj._realndim_ = obj.ndim
|
obj._realndim_ = obj.ndim
|
||||||
obj._original_ = True
|
obj._original_ = True
|
||||||
obj._gradient_array_ = numpy.zeros(obj.shape, dtype=numpy.float64)
|
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def __init__(self, name, input_array, default_constraint=None, *a, **kw):
|
def __init__(self, name, input_array, default_constraint=None, *a, **kw):
|
||||||
|
|
@ -60,7 +59,7 @@ class Param(OptimizationHandlable, ObsAr):
|
||||||
import pydot
|
import pydot
|
||||||
node = pydot.Node(id(self), shape='record', label=self.name)
|
node = pydot.Node(id(self), shape='record', label=self.name)
|
||||||
G.add_node(node)
|
G.add_node(node)
|
||||||
for o in self._observer_callables_.keys():
|
for o in self.observers.keys():
|
||||||
label = o.name if hasattr(o, 'name') else str(o)
|
label = o.name if hasattr(o, 'name') else str(o)
|
||||||
observed_node = pydot.Node(id(o), label=label)
|
observed_node = pydot.Node(id(o), label=label)
|
||||||
G.add_node(observed_node)
|
G.add_node(observed_node)
|
||||||
|
|
@ -87,74 +86,24 @@ class Param(OptimizationHandlable, ObsAr):
|
||||||
self.priors = getattr(obj, 'priors', None)
|
self.priors = getattr(obj, 'priors', None)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _param_array_(self):
|
def param_array(self):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def gradient(self):
|
def gradient(self):
|
||||||
|
"""
|
||||||
|
Return a view on the gradient, which is in the same shape as this parameter is.
|
||||||
|
Note: this is not the real gradient array, it is just a view on it.
|
||||||
|
|
||||||
|
To work on the real gradient array use: self.full_gradient
|
||||||
|
"""
|
||||||
|
if getattr(self, '_gradient_array_', None) is None:
|
||||||
|
self._gradient_array_ = numpy.empty(self._realshape_, dtype=numpy.float64)
|
||||||
return self._gradient_array_[self._current_slice_]
|
return self._gradient_array_[self._current_slice_]
|
||||||
|
|
||||||
@gradient.setter
|
@gradient.setter
|
||||||
def gradient(self, val):
|
def gradient(self, val):
|
||||||
self.gradient[:] = val
|
self._gradient_array_[self._current_slice_] = val
|
||||||
|
|
||||||
#===========================================================================
|
|
||||||
# Pickling operations
|
|
||||||
#===========================================================================
|
|
||||||
def __reduce__(self):
|
|
||||||
func, args, state = super(Param, self).__reduce__()
|
|
||||||
return func, args, (state,
|
|
||||||
(self._name,
|
|
||||||
self._parent_,
|
|
||||||
self._parent_index_,
|
|
||||||
self._default_constraint_,
|
|
||||||
self._current_slice_,
|
|
||||||
self._realshape_,
|
|
||||||
self._realsize_,
|
|
||||||
self._realndim_,
|
|
||||||
self.constraints,
|
|
||||||
self.priors
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def __setstate__(self, state):
|
|
||||||
super(Param, self).__setstate__(state[0])
|
|
||||||
state = list(state[1])
|
|
||||||
self.priors = state.pop()
|
|
||||||
self.constraints = state.pop()
|
|
||||||
self._realndim_ = state.pop()
|
|
||||||
self._realsize_ = state.pop()
|
|
||||||
self._realshape_ = state.pop()
|
|
||||||
self._current_slice_ = state.pop()
|
|
||||||
self._default_constraint_ = state.pop()
|
|
||||||
self._parent_index_ = state.pop()
|
|
||||||
self._parent_ = state.pop()
|
|
||||||
self._name = state.pop()
|
|
||||||
|
|
||||||
def copy(self, *args):
|
|
||||||
constr = self.constraints.copy()
|
|
||||||
priors = self.priors.copy()
|
|
||||||
p = Param(self.name, self.view(numpy.ndarray).copy(), self._default_constraint_)
|
|
||||||
p.constraints = constr
|
|
||||||
p.priors = priors
|
|
||||||
return p
|
|
||||||
#===========================================================================
|
|
||||||
# get/set parameters
|
|
||||||
#===========================================================================
|
|
||||||
# def _set_params(self, param, trigger_parent=True):
|
|
||||||
# self.flat = param
|
|
||||||
# if trigger_parent: min_priority = None
|
|
||||||
# else: min_priority = -numpy.inf
|
|
||||||
# self.notify_observers(None, min_priority)
|
|
||||||
#
|
|
||||||
# def _get_params(self):
|
|
||||||
# return self.flat
|
|
||||||
#
|
|
||||||
# def _collect_gradient(self, target):
|
|
||||||
# target += self.gradient.flat
|
|
||||||
#
|
|
||||||
# def _set_gradient(self, g):
|
|
||||||
# self.gradient = g.reshape(self._realshape_)
|
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Array operations -> done
|
# Array operations -> done
|
||||||
|
|
@ -172,24 +121,6 @@ class Param(OptimizationHandlable, ObsAr):
|
||||||
def __setitem__(self, s, val):
|
def __setitem__(self, s, val):
|
||||||
super(Param, self).__setitem__(s, val)
|
super(Param, self).__setitem__(s, val)
|
||||||
|
|
||||||
#===========================================================================
|
|
||||||
# Index Operations:
|
|
||||||
#===========================================================================
|
|
||||||
#def _internal_offset(self):
|
|
||||||
# internal_offset = 0
|
|
||||||
# extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1]
|
|
||||||
# for i, si in enumerate(self._current_slice_[:self._realndim_]):
|
|
||||||
# if numpy.all(si == Ellipsis):
|
|
||||||
# continue
|
|
||||||
# if isinstance(si, slice):
|
|
||||||
# a = si.indices(self._realshape_[i])[0]
|
|
||||||
# elif isinstance(si, (list,numpy.ndarray,tuple)):
|
|
||||||
# a = si[0]
|
|
||||||
# else: a = si
|
|
||||||
# if a < 0:
|
|
||||||
# a = self._realshape_[i] + a
|
|
||||||
# internal_offset += a * extended_realshape[i]
|
|
||||||
# return internal_offset
|
|
||||||
|
|
||||||
def _raveled_index(self, slice_index=None):
|
def _raveled_index(self, slice_index=None):
|
||||||
# return an index array on the raveled array, which is formed by the current_slice
|
# return an index array on the raveled array, which is formed by the current_slice
|
||||||
|
|
@ -235,13 +166,21 @@ class Param(OptimizationHandlable, ObsAr):
|
||||||
def is_fixed(self):
|
def is_fixed(self):
|
||||||
from transformations import __fixed__
|
from transformations import __fixed__
|
||||||
return self.constraints[__fixed__].size == self.size
|
return self.constraints[__fixed__].size == self.size
|
||||||
#def round(self, decimals=0, out=None):
|
|
||||||
# view = super(Param, self).round(decimals, out).view(Param)
|
|
||||||
# view.__array_finalize__(self)
|
|
||||||
# return view
|
|
||||||
#round.__doc__ = numpy.round.__doc__
|
|
||||||
def _get_original(self, param):
|
def _get_original(self, param):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
#===========================================================================
|
||||||
|
# Pickling and copying
|
||||||
|
#===========================================================================
|
||||||
|
def __deepcopy__(self, memo):
|
||||||
|
s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy())
|
||||||
|
memo[id(self)] = s
|
||||||
|
import copy
|
||||||
|
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Printing -> done
|
# Printing -> done
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
|
@ -250,7 +189,8 @@ class Param(OptimizationHandlable, ObsAr):
|
||||||
if self.size <= 1:
|
if self.size <= 1:
|
||||||
return [str(self.view(numpy.ndarray)[0])]
|
return [str(self.view(numpy.ndarray)[0])]
|
||||||
else: return [str(self.shape)]
|
else: return [str(self.shape)]
|
||||||
def parameter_names(self, add_self=False, adjust_for_printing=False):
|
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
|
||||||
|
# this is just overwrighting the parameterized calls to parameter names, in order to maintain OOP
|
||||||
if adjust_for_printing:
|
if adjust_for_printing:
|
||||||
return [adjust_name_for_printing(self.name)]
|
return [adjust_name_for_printing(self.name)]
|
||||||
return [self.name]
|
return [self.name]
|
||||||
|
|
@ -261,6 +201,9 @@ class Param(OptimizationHandlable, ObsAr):
|
||||||
def parameter_shapes(self):
|
def parameter_shapes(self):
|
||||||
return [self.shape]
|
return [self.shape]
|
||||||
@property
|
@property
|
||||||
|
def num_params(self):
|
||||||
|
return 0
|
||||||
|
@property
|
||||||
def _constraints_str(self):
|
def _constraints_str(self):
|
||||||
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))]
|
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))]
|
||||||
@property
|
@property
|
||||||
|
|
@ -282,8 +225,8 @@ class Param(OptimizationHandlable, ObsAr):
|
||||||
if isinstance(slice_index, (tuple, list)):
|
if isinstance(slice_index, (tuple, list)):
|
||||||
clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
|
clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
|
||||||
for i in range(self._realndim_-len(clean_curr_slice)):
|
for i in range(self._realndim_-len(clean_curr_slice)):
|
||||||
i+=len(clean_curr_slice)
|
i+=1
|
||||||
clean_curr_slice += range(self._realshape_[i])
|
clean_curr_slice += [range(self._realshape_[i])]
|
||||||
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
|
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
|
||||||
and len(set(map(len, clean_curr_slice))) <= 1):
|
and len(set(map(len, clean_curr_slice))) <= 1):
|
||||||
return numpy.fromiter(itertools.izip(*clean_curr_slice),
|
return numpy.fromiter(itertools.izip(*clean_curr_slice),
|
||||||
|
|
@ -368,7 +311,7 @@ class ParamConcatenation(object):
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
def __getitem__(self, s):
|
def __getitem__(self, s):
|
||||||
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
|
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
|
||||||
params = [p._param_array_[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p._param_array_[ind[ps]])]
|
params = [p.param_array[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p.param_array[ind[ps]])]
|
||||||
if len(params)==1: return params[0]
|
if len(params)==1: return params[0]
|
||||||
return ParamConcatenation(params)
|
return ParamConcatenation(params)
|
||||||
def __setitem__(self, s, val, update=True):
|
def __setitem__(self, s, val, update=True):
|
||||||
|
|
@ -381,7 +324,7 @@ class ParamConcatenation(object):
|
||||||
if update:
|
if update:
|
||||||
self.update_all_params()
|
self.update_all_params()
|
||||||
def values(self):
|
def values(self):
|
||||||
return numpy.hstack([p._param_array_ for p in self.params])
|
return numpy.hstack([p.param_array.flat for p in self.params])
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# parameter operations:
|
# parameter operations:
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
|
|
||||||
|
|
@ -13,10 +13,10 @@ Observable Pattern for patameterization
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
__updated__ = '2014-03-24'
|
__updated__ = '2014-03-31'
|
||||||
|
|
||||||
class HierarchyError(Exception):
|
class HierarchyError(Exception):
|
||||||
"""
|
"""
|
||||||
|
|
@ -31,71 +31,8 @@ def adjust_name_for_printing(name):
|
||||||
return name.replace(" ", "_").replace(".", "_").replace("-", "_m_").replace("+", "_p_").replace("!", "_I_").replace("**", "_xx_").replace("*", "_x_").replace("/", "_l_").replace("@", '_at_')
|
return name.replace(" ", "_").replace(".", "_").replace("-", "_m_").replace("+", "_p_").replace("!", "_I_").replace("**", "_xx_").replace("*", "_x_").replace("/", "_l_").replace("@", '_at_')
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
class InterfacePickleFunctions(object):
|
|
||||||
def __init__(self, *a, **kw):
|
|
||||||
super(InterfacePickleFunctions, self).__init__()
|
|
||||||
|
|
||||||
def _getstate(self):
|
class Observable(object):
|
||||||
"""
|
|
||||||
Returns the state of this class in a memento pattern.
|
|
||||||
The state must be a list-like structure of all the fields
|
|
||||||
this class needs to run.
|
|
||||||
|
|
||||||
See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError, "To be able to use pickling you need to implement this method"
|
|
||||||
def _setstate(self, state):
|
|
||||||
"""
|
|
||||||
Set the state (memento pattern) of this class to the given state.
|
|
||||||
Usually this is just the counterpart to _getstate, such that
|
|
||||||
an object is a copy of another when calling
|
|
||||||
|
|
||||||
copy = <classname>.__new__(*args,**kw)._setstate(<to_be_copied>._getstate())
|
|
||||||
|
|
||||||
See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError, "To be able to use pickling you need to implement this method"
|
|
||||||
|
|
||||||
class Pickleable(InterfacePickleFunctions):
|
|
||||||
"""
|
|
||||||
Make an object pickleable (See python doc 'pickling').
|
|
||||||
|
|
||||||
This class allows for pickling support by Memento pattern.
|
|
||||||
_getstate returns a memento of the class, which gets pickled.
|
|
||||||
_setstate(<memento>) (re-)sets the state of the class to the memento
|
|
||||||
"""
|
|
||||||
def __init__(self, *a, **kw):
|
|
||||||
super(Pickleable, self).__init__()
|
|
||||||
#===========================================================================
|
|
||||||
# Pickling operations
|
|
||||||
#===========================================================================
|
|
||||||
def pickle(self, f, protocol=-1):
|
|
||||||
"""
|
|
||||||
:param f: either filename or open file object to write to.
|
|
||||||
if it is an open buffer, you have to make sure to close
|
|
||||||
it properly.
|
|
||||||
:param protocol: pickling protocol to use, python-pickle for details.
|
|
||||||
"""
|
|
||||||
import cPickle
|
|
||||||
if isinstance(f, str):
|
|
||||||
with open(f, 'w') as f:
|
|
||||||
cPickle.dump(self, f, protocol)
|
|
||||||
else:
|
|
||||||
cPickle.dump(self, f, protocol)
|
|
||||||
def __getstate__(self):
|
|
||||||
if self._has_get_set_state():
|
|
||||||
return self._getstate()
|
|
||||||
return self.__dict__
|
|
||||||
def __setstate__(self, state):
|
|
||||||
if self._has_get_set_state():
|
|
||||||
self._setstate(state)
|
|
||||||
# TODO: maybe parameters_changed() here?
|
|
||||||
return
|
|
||||||
self.__dict__ = state
|
|
||||||
def _has_get_set_state(self):
|
|
||||||
return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__)
|
|
||||||
|
|
||||||
class Observable(Pickleable):
|
|
||||||
"""
|
"""
|
||||||
Observable pattern for parameterization.
|
Observable pattern for parameterization.
|
||||||
|
|
||||||
|
|
@ -105,23 +42,25 @@ class Observable(Pickleable):
|
||||||
"""
|
"""
|
||||||
_updated = True
|
_updated = True
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(Observable, self).__init__(*args, **kwargs)
|
super(Observable, self).__init__()
|
||||||
self._observer_callables_ = []
|
from lists_and_dicts import ObservablesList
|
||||||
|
self.observers = ObservablesList()
|
||||||
|
|
||||||
def add_observer(self, observer, callble, priority=0):
|
def add_observer(self, observer, callble, priority=0):
|
||||||
self._insert_sorted(priority, observer, callble)
|
self.observers.add(priority, observer, callble)
|
||||||
|
|
||||||
def remove_observer(self, observer, callble=None):
|
def remove_observer(self, observer, callble=None):
|
||||||
to_remove = []
|
to_remove = []
|
||||||
for p, obs, clble in self._observer_callables_:
|
for poc in self.observers:
|
||||||
|
_, obs, clble = poc
|
||||||
if callble is not None:
|
if callble is not None:
|
||||||
if (obs == observer) and (callble == clble):
|
if (obs == observer) and (callble == clble):
|
||||||
to_remove.append((p, obs, clble))
|
to_remove.append(poc)
|
||||||
else:
|
else:
|
||||||
if obs is observer:
|
if obs is observer:
|
||||||
to_remove.append((p, obs, clble))
|
to_remove.append(poc)
|
||||||
for r in to_remove:
|
for r in to_remove:
|
||||||
self._observer_callables_.remove(r)
|
self.observers.remove(*r)
|
||||||
|
|
||||||
def notify_observers(self, which=None, min_priority=None):
|
def notify_observers(self, which=None, min_priority=None):
|
||||||
"""
|
"""
|
||||||
|
|
@ -136,32 +75,18 @@ class Observable(Pickleable):
|
||||||
if which is None:
|
if which is None:
|
||||||
which = self
|
which = self
|
||||||
if min_priority is None:
|
if min_priority is None:
|
||||||
[callble(self, which=which) for _, _, callble in self._observer_callables_]
|
[callble(self, which=which) for _, _, callble in self.observers]
|
||||||
else:
|
else:
|
||||||
for p, _, callble in self._observer_callables_:
|
for p, _, callble in self.observers:
|
||||||
if p <= min_priority:
|
if p <= min_priority:
|
||||||
break
|
break
|
||||||
callble(self, which=which)
|
callble(self, which=which)
|
||||||
|
|
||||||
def _insert_sorted(self, p, o, c):
|
|
||||||
ins = 0
|
|
||||||
for pr, _, _ in self._observer_callables_:
|
|
||||||
if p > pr:
|
|
||||||
break
|
|
||||||
ins += 1
|
|
||||||
self._observer_callables_.insert(ins, (p, o, c))
|
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
return [self._observer_callables_]
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
self._observer_callables_ = state.pop()
|
|
||||||
|
|
||||||
#===============================================================================
|
#===============================================================================
|
||||||
# Foundation framework for parameterized and param objects:
|
# Foundation framework for parameterized and param objects:
|
||||||
#===============================================================================
|
#===============================================================================
|
||||||
|
|
||||||
class Parentable(Observable):
|
class Parentable(object):
|
||||||
"""
|
"""
|
||||||
Enable an Object to have a parent.
|
Enable an Object to have a parent.
|
||||||
|
|
||||||
|
|
@ -171,7 +96,7 @@ class Parentable(Observable):
|
||||||
_parent_ = None
|
_parent_ = None
|
||||||
_parent_index_ = None
|
_parent_index_ = None
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(Parentable, self).__init__(*args, **kwargs)
|
super(Parentable, self).__init__()
|
||||||
|
|
||||||
def has_parent(self):
|
def has_parent(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -207,7 +132,84 @@ class Parentable(Observable):
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class Gradcheckable(Parentable):
|
class Pickleable(object):
|
||||||
|
"""
|
||||||
|
Make an object pickleable (See python doc 'pickling').
|
||||||
|
|
||||||
|
This class allows for pickling support by Memento pattern.
|
||||||
|
_getstate returns a memento of the class, which gets pickled.
|
||||||
|
_setstate(<memento>) (re-)sets the state of the class to the memento
|
||||||
|
"""
|
||||||
|
def __init__(self, *a, **kw):
|
||||||
|
super(Pickleable, self).__init__()
|
||||||
|
#===========================================================================
|
||||||
|
# Pickling operations
|
||||||
|
#===========================================================================
|
||||||
|
def pickle(self, f, protocol=-1):
|
||||||
|
"""
|
||||||
|
:param f: either filename or open file object to write to.
|
||||||
|
if it is an open buffer, you have to make sure to close
|
||||||
|
it properly.
|
||||||
|
:param protocol: pickling protocol to use, python-pickle for details.
|
||||||
|
"""
|
||||||
|
import cPickle as pickle
|
||||||
|
import pickle #TODO: cPickle
|
||||||
|
if isinstance(f, str):
|
||||||
|
with open(f, 'w') as f:
|
||||||
|
pickle.dump(self, f, protocol)
|
||||||
|
else:
|
||||||
|
pickle.dump(self, f, protocol)
|
||||||
|
|
||||||
|
#===========================================================================
|
||||||
|
# copy and pickling
|
||||||
|
#===========================================================================
|
||||||
|
def copy(self):
|
||||||
|
"""Returns a (deep) copy of the current model"""
|
||||||
|
#raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy"
|
||||||
|
import copy
|
||||||
|
memo = {}
|
||||||
|
memo[id(self._parent_)] = None
|
||||||
|
memo[id(self.gradient)] = None
|
||||||
|
memo[id(self.param_array)] = None
|
||||||
|
memo[id(self._fixes_)] = None
|
||||||
|
c = copy.deepcopy(self, memo)
|
||||||
|
c._parent_index_ = None
|
||||||
|
return c
|
||||||
|
|
||||||
|
def __deepcopy__(self, memo):
|
||||||
|
s = self.__new__(self.__class__)
|
||||||
|
memo[id(self)] = s
|
||||||
|
import copy
|
||||||
|
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
|
||||||
|
return s
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
ignore_list = ([#'_parent_', '_parent_index_',
|
||||||
|
#'observers',
|
||||||
|
'_param_array_', '_gradient_array_', '_fixes_',
|
||||||
|
'_Cacher_wrap__cachers']
|
||||||
|
#+ self.parameter_names(recursive=False)
|
||||||
|
)
|
||||||
|
dc = dict()
|
||||||
|
for k,v in self.__dict__.iteritems():
|
||||||
|
if k not in ignore_list:
|
||||||
|
#if hasattr(v, "__getstate__"):
|
||||||
|
#dc[k] = v.__getstate__()
|
||||||
|
#else:
|
||||||
|
dc[k] = v
|
||||||
|
return dc
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
self.__dict__.update(state)
|
||||||
|
return self
|
||||||
|
|
||||||
|
#def __getstate__(self, memo):
|
||||||
|
# raise NotImplementedError, "get state must be implemented to be able to pickle objects"
|
||||||
|
|
||||||
|
#def __setstate__(self, memo):
|
||||||
|
# raise NotImplementedError, "set state must be implemented to be able to pickle objects"
|
||||||
|
|
||||||
|
class Gradcheckable(Pickleable, Parentable):
|
||||||
"""
|
"""
|
||||||
Adds the functionality for an object to be gradcheckable.
|
Adds the functionality for an object to be gradcheckable.
|
||||||
It is just a thin wrapper of a call to the highest parent for now.
|
It is just a thin wrapper of a call to the highest parent for now.
|
||||||
|
|
@ -312,7 +314,7 @@ class Indexable(object):
|
||||||
raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
|
raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
|
||||||
|
|
||||||
|
|
||||||
class Constrainable(Nameable, Indexable):
|
class Constrainable(Nameable, Indexable, Observable):
|
||||||
"""
|
"""
|
||||||
Make an object constrainable with Priors and Transformations.
|
Make an object constrainable with Priors and Transformations.
|
||||||
TODO: Mappings!!
|
TODO: Mappings!!
|
||||||
|
|
@ -394,6 +396,7 @@ class Constrainable(Nameable, Indexable):
|
||||||
self._fixes_[fixed_indices] = FIXED
|
self._fixes_[fixed_indices] = FIXED
|
||||||
else:
|
else:
|
||||||
self._fixes_ = None
|
self._fixes_ = None
|
||||||
|
del self.constraints[__fixed__]
|
||||||
|
|
||||||
def _has_fixes(self):
|
def _has_fixes(self):
|
||||||
return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size
|
return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size
|
||||||
|
|
@ -429,14 +432,14 @@ class Constrainable(Nameable, Indexable):
|
||||||
def log_prior(self):
|
def log_prior(self):
|
||||||
"""evaluate the prior"""
|
"""evaluate the prior"""
|
||||||
if self.priors.size > 0:
|
if self.priors.size > 0:
|
||||||
x = self._param_array_
|
x = self.param_array
|
||||||
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
|
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
|
||||||
return 0.
|
return 0.
|
||||||
|
|
||||||
def _log_prior_gradients(self):
|
def _log_prior_gradients(self):
|
||||||
"""evaluate the gradients of the priors"""
|
"""evaluate the gradients of the priors"""
|
||||||
if self.priors.size > 0:
|
if self.priors.size > 0:
|
||||||
x = self._param_array_
|
x = self.param_array
|
||||||
ret = np.zeros(x.size)
|
ret = np.zeros(x.size)
|
||||||
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
|
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
|
||||||
return ret
|
return ret
|
||||||
|
|
@ -455,7 +458,7 @@ class Constrainable(Nameable, Indexable):
|
||||||
Constrain the parameter to the given
|
Constrain the parameter to the given
|
||||||
:py:class:`GPy.core.transformations.Transformation`.
|
:py:class:`GPy.core.transformations.Transformation`.
|
||||||
"""
|
"""
|
||||||
self._param_array_[:] = transform.initialize(self._param_array_)
|
self.param_array[:] = transform.initialize(self.param_array)
|
||||||
reconstrained = self.unconstrain()
|
reconstrained = self.unconstrain()
|
||||||
self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
|
self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
|
||||||
self.notify_observers(self, None if trigger_parent else -np.inf)
|
self.notify_observers(self, None if trigger_parent else -np.inf)
|
||||||
|
|
@ -565,14 +568,14 @@ class OptimizationHandlable(Constrainable):
|
||||||
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
|
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
|
||||||
|
|
||||||
def transform(self):
|
def transform(self):
|
||||||
[np.put(self._param_array_, ind, c.finv(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
[np.put(self.param_array, ind, c.finv(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||||
|
|
||||||
def untransform(self):
|
def untransform(self):
|
||||||
[np.put(self._param_array_, ind, c.f(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||||
|
|
||||||
def _get_params_transformed(self):
|
def _get_params_transformed(self):
|
||||||
# transformed parameters (apply transformation rules)
|
# transformed parameters (apply transformation rules)
|
||||||
p = self._param_array_.copy()
|
p = self.param_array.copy()
|
||||||
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||||
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||||
fixes = np.ones(self.size).astype(bool)
|
fixes = np.ones(self.size).astype(bool)
|
||||||
|
|
@ -583,14 +586,14 @@ class OptimizationHandlable(Constrainable):
|
||||||
return p
|
return p
|
||||||
|
|
||||||
def _set_params_transformed(self, p):
|
def _set_params_transformed(self, p):
|
||||||
if p is self._param_array_:
|
if p is self.param_array:
|
||||||
p = p.copy()
|
p = p.copy()
|
||||||
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||||
fixes = np.ones(self.size).astype(bool)
|
fixes = np.ones(self.size).astype(bool)
|
||||||
fixes[self.constraints[__fixed__]] = FIXED
|
fixes[self.constraints[__fixed__]] = FIXED
|
||||||
self._param_array_.flat[fixes] = p
|
self.param_array.flat[fixes] = p
|
||||||
elif self._has_fixes(): self._param_array_.flat[self._fixes_] = p
|
elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
|
||||||
else: self._param_array_.flat = p
|
else: self.param_array.flat = p
|
||||||
self.untransform()
|
self.untransform()
|
||||||
self._trigger_params_changed()
|
self._trigger_params_changed()
|
||||||
|
|
||||||
|
|
@ -600,36 +603,29 @@ class OptimizationHandlable(Constrainable):
|
||||||
|
|
||||||
def _size_transformed(self):
|
def _size_transformed(self):
|
||||||
return self.size - self.constraints[__fixed__].size
|
return self.size - self.constraints[__fixed__].size
|
||||||
#
|
|
||||||
# def _untransform_params(self, p):
|
|
||||||
# # inverse apply transformations for parameters
|
|
||||||
# #p = p.copy()
|
|
||||||
# if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
|
|
||||||
# [np.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
|
||||||
# return p
|
|
||||||
#
|
|
||||||
# def _get_params(self):
|
|
||||||
# """
|
|
||||||
# get all parameters
|
|
||||||
# """
|
|
||||||
# return self._param_array_
|
|
||||||
# p = np.empty(self.size, dtype=np.float64)
|
|
||||||
# if self.size == 0:
|
|
||||||
# return p
|
|
||||||
# [np.put(p, ind, par._get_params()) for ind, par in itertools.izip(self._param)]
|
|
||||||
# return p
|
|
||||||
|
|
||||||
# def _set_params(self, params, trigger_parent=True):
|
@property
|
||||||
# self._param_array_.flat = params
|
def num_params(self):
|
||||||
# if trigger_parent: min_priority = None
|
"""
|
||||||
# else: min_priority = -np.inf
|
Return the number of parameters of this parameter_handle.
|
||||||
# self.notify_observers(None, min_priority)
|
Param objects will allways return 0.
|
||||||
# don't overwrite this anymore!
|
"""
|
||||||
# raise NotImplementedError, "Abstract superclass: This needs to be implemented in Param and Parameterizable"
|
raise NotImplemented, "Abstract, please implement in respective classes"
|
||||||
|
|
||||||
#===========================================================================
|
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
|
||||||
# Optimization handles:
|
"""
|
||||||
#===========================================================================
|
Get the names of all parameters of this model.
|
||||||
|
|
||||||
|
:param bool add_self: whether to add the own name in front of names
|
||||||
|
:param bool adjust_for_printing: whether to call `adjust_name_for_printing` on names
|
||||||
|
:param bool recursive: whether to traverse through hierarchy and append leaf node names
|
||||||
|
"""
|
||||||
|
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
|
||||||
|
else: adjust = lambda x: x
|
||||||
|
if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)]
|
||||||
|
else: names = [adjust(x.name) for x in self._parameters_]
|
||||||
|
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
|
||||||
|
return names
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
|
n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
|
||||||
return n
|
return n
|
||||||
|
|
@ -663,16 +659,30 @@ class OptimizationHandlable(Constrainable):
|
||||||
# For shared memory arrays. This does nothing in Param, but sets the memory
|
# For shared memory arrays. This does nothing in Param, but sets the memory
|
||||||
# for all parameterized objects
|
# for all parameterized objects
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
@property
|
||||||
|
def full_gradient(self):
|
||||||
|
"""
|
||||||
|
Note to users:
|
||||||
|
This does not return the gradient in the right shape! Use self.gradient
|
||||||
|
for the right gradient array.
|
||||||
|
|
||||||
|
To work on the gradient array, use this as the gradient handle.
|
||||||
|
This method exists for in memory use of parameters.
|
||||||
|
When trying to access the true gradient array, use this.
|
||||||
|
"""
|
||||||
|
self.gradient # <<< ensure _gradient_array_
|
||||||
|
return self._gradient_array_
|
||||||
|
|
||||||
def _propagate_param_grad(self, parray, garray):
|
def _propagate_param_grad(self, parray, garray):
|
||||||
pi_old_size = 0
|
pi_old_size = 0
|
||||||
for pi in self._parameters_:
|
for pi in self._parameters_:
|
||||||
pislice = slice(pi_old_size, pi_old_size + pi.size)
|
pislice = slice(pi_old_size, pi_old_size + pi.size)
|
||||||
|
|
||||||
self._param_array_[pislice] = pi._param_array_.flat # , requirements=['C', 'W']).flat
|
self.param_array[pislice] = pi.param_array.flat # , requirements=['C', 'W']).flat
|
||||||
self._gradient_array_[pislice] = pi._gradient_array_.flat # , requirements=['C', 'W']).flat
|
self.full_gradient[pislice] = pi.full_gradient.flat # , requirements=['C', 'W']).flat
|
||||||
|
|
||||||
pi._param_array_.data = parray[pislice].data
|
pi.param_array.data = parray[pislice].data
|
||||||
pi._gradient_array_.data = garray[pislice].data
|
pi.full_gradient.data = garray[pislice].data
|
||||||
|
|
||||||
pi._propagate_param_grad(parray[pislice], garray[pislice])
|
pi._propagate_param_grad(parray[pislice], garray[pislice])
|
||||||
pi_old_size += pi.size
|
pi_old_size += pi.size
|
||||||
|
|
@ -681,26 +691,32 @@ class Parameterizable(OptimizationHandlable):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(Parameterizable, self).__init__(*args, **kwargs)
|
super(Parameterizable, self).__init__(*args, **kwargs)
|
||||||
from GPy.core.parameterization.lists_and_dicts import ArrayList
|
from GPy.core.parameterization.lists_and_dicts import ArrayList
|
||||||
_parameters_ = ArrayList()
|
self._parameters_ = ArrayList()
|
||||||
self.size = 0
|
self.size = 0
|
||||||
self._param_array_ = np.empty(self.size, dtype=np.float64)
|
|
||||||
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
|
|
||||||
self._added_names_ = set()
|
self._added_names_ = set()
|
||||||
|
|
||||||
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
|
@property
|
||||||
"""
|
def param_array(self):
|
||||||
Get the names of all parameters of this model.
|
if not hasattr(self, '_param_array_'):
|
||||||
|
self._param_array_ = np.empty(self.size, dtype=np.float64)
|
||||||
|
return self._param_array_
|
||||||
|
|
||||||
:param bool add_self: whether to add the own name in front of names
|
@param_array.setter
|
||||||
:param bool adjust_for_printing: whether to call `adjust_name_for_printing` on names
|
def param_array(self, arr):
|
||||||
:param bool recursive: whether to traverse through hierarchy and append leaf node names
|
self._param_array_ = arr
|
||||||
"""
|
|
||||||
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
|
#=========================================================================
|
||||||
else: adjust = lambda x: x
|
# Gradient handling
|
||||||
if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)]
|
#=========================================================================
|
||||||
else: names = [adjust(x.name) for x in self._parameters_]
|
@property
|
||||||
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
|
def gradient(self):
|
||||||
return names
|
if not hasattr(self, '_gradient_array_'):
|
||||||
|
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
|
||||||
|
return self._gradient_array_
|
||||||
|
|
||||||
|
@gradient.setter
|
||||||
|
def gradient(self, val):
|
||||||
|
self._gradient_array_[:] = val
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def num_params(self):
|
def num_params(self):
|
||||||
|
|
@ -737,34 +753,6 @@ class Parameterizable(OptimizationHandlable):
|
||||||
self._remove_parameter_name(None, old_name)
|
self._remove_parameter_name(None, old_name)
|
||||||
self._add_parameter_name(param)
|
self._add_parameter_name(param)
|
||||||
|
|
||||||
#=========================================================================
|
|
||||||
# Gradient handling
|
|
||||||
#=========================================================================
|
|
||||||
@property
|
|
||||||
def gradient(self):
|
|
||||||
return self._gradient_array_
|
|
||||||
|
|
||||||
@gradient.setter
|
|
||||||
def gradient(self, val):
|
|
||||||
self._gradient_array_[:] = val
|
|
||||||
#===========================================================================
|
|
||||||
# def _collect_gradient(self, target):
|
|
||||||
# [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
|
|
||||||
#===========================================================================
|
|
||||||
|
|
||||||
#===========================================================================
|
|
||||||
# def _set_params(self, params, trigger_parent=True):
|
|
||||||
# [p._set_params(params[s], trigger_parent=False) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
|
|
||||||
# if trigger_parent: min_priority = None
|
|
||||||
# else: min_priority = -np.inf
|
|
||||||
# self.notify_observers(None, min_priority)
|
|
||||||
#===========================================================================
|
|
||||||
|
|
||||||
#===========================================================================
|
|
||||||
# def _set_gradient(self, g):
|
|
||||||
# [p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
|
|
||||||
#===========================================================================
|
|
||||||
|
|
||||||
def add_parameter(self, param, index=None, _ignore_added_names=False):
|
def add_parameter(self, param, index=None, _ignore_added_names=False):
|
||||||
"""
|
"""
|
||||||
:param parameters: the parameters to add
|
:param parameters: the parameters to add
|
||||||
|
|
@ -864,7 +852,7 @@ class Parameterizable(OptimizationHandlable):
|
||||||
# no parameters for this class
|
# no parameters for this class
|
||||||
return
|
return
|
||||||
old_size = 0
|
old_size = 0
|
||||||
self._param_array_ = np.empty(self.size, dtype=np.float64)
|
self.param_array = np.empty(self.size, dtype=np.float64)
|
||||||
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
|
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
|
||||||
|
|
||||||
self._param_slices_ = []
|
self._param_slices_ = []
|
||||||
|
|
@ -874,15 +862,15 @@ class Parameterizable(OptimizationHandlable):
|
||||||
|
|
||||||
pslice = slice(old_size, old_size + p.size)
|
pslice = slice(old_size, old_size + p.size)
|
||||||
# first connect all children
|
# first connect all children
|
||||||
p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice])
|
p._propagate_param_grad(self.param_array[pslice], self.full_gradient[pslice])
|
||||||
# then connect children to self
|
# then connect children to self
|
||||||
self._param_array_[pslice] = p._param_array_.flat # , requirements=['C', 'W']).ravel(order='C')
|
self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C')
|
||||||
self._gradient_array_[pslice] = p._gradient_array_.flat # , requirements=['C', 'W']).ravel(order='C')
|
self.full_gradient[pslice] = p.full_gradient.flat # , requirements=['C', 'W']).ravel(order='C')
|
||||||
|
|
||||||
if not p._param_array_.flags['C_CONTIGUOUS']:
|
if not p.param_array.flags['C_CONTIGUOUS']:
|
||||||
import ipdb;ipdb.set_trace()
|
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
|
||||||
p._param_array_.data = self._param_array_[pslice].data
|
p.param_array.data = self.param_array[pslice].data
|
||||||
p._gradient_array_.data = self._gradient_array_[pslice].data
|
p.full_gradient.data = self.full_gradient[pslice].data
|
||||||
|
|
||||||
self._param_slices_.append(pslice)
|
self._param_slices_.append(pslice)
|
||||||
|
|
||||||
|
|
@ -898,41 +886,22 @@ class Parameterizable(OptimizationHandlable):
|
||||||
self.notify_observers(which=which)
|
self.notify_observers(which=which)
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# TODO: not working yet
|
# Pickling
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
def __setstate__(self, state):
|
||||||
|
super(Parameterizable, self).__setstate__(state)
|
||||||
|
self._connect_parameters()
|
||||||
|
self._connect_fixes()
|
||||||
|
self._notify_parent_change()
|
||||||
|
|
||||||
|
self.parameters_changed()
|
||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
"""Returns a (deep) copy of the current model"""
|
c = super(Parameterizable, self).copy()
|
||||||
raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy"
|
c._connect_parameters()
|
||||||
import copy
|
c._connect_fixes()
|
||||||
from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView
|
c._notify_parent_change()
|
||||||
from .lists_and_dicts import ArrayList
|
return c
|
||||||
|
|
||||||
dc = dict()
|
|
||||||
for k, v in self.__dict__.iteritems():
|
|
||||||
if k not in ['_parent_', '_parameters_', '_parent_index_', '_observer_callables_'] + self.parameter_names(recursive=False):
|
|
||||||
if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)):
|
|
||||||
dc[k] = v.copy()
|
|
||||||
else:
|
|
||||||
dc[k] = copy.deepcopy(v)
|
|
||||||
if k == '_parameters_':
|
|
||||||
params = [p.copy() for p in v]
|
|
||||||
|
|
||||||
dc['_parent_'] = None
|
|
||||||
dc['_parent_index_'] = None
|
|
||||||
dc['_observer_callables_'] = []
|
|
||||||
dc['_parameters_'] = ArrayList()
|
|
||||||
dc['constraints'].clear()
|
|
||||||
dc['priors'].clear()
|
|
||||||
dc['size'] = 0
|
|
||||||
|
|
||||||
s = self.__new__(self.__class__)
|
|
||||||
s.__dict__ = dc
|
|
||||||
|
|
||||||
for p in params:
|
|
||||||
s.add_parameter(p, _ignore_added_names=True)
|
|
||||||
|
|
||||||
return s
|
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# From being parentable, we have to define the parent_change notification
|
# From being parentable, we have to define the parent_change notification
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ class ParametersChangedMeta(type):
|
||||||
instance.parameters_changed()
|
instance.parameters_changed()
|
||||||
return instance
|
return instance
|
||||||
|
|
||||||
class Parameterized(Parameterizable, Pickleable):
|
class Parameterized(Parameterizable):
|
||||||
"""
|
"""
|
||||||
Parameterized class
|
Parameterized class
|
||||||
|
|
||||||
|
|
@ -90,7 +90,7 @@ class Parameterized(Parameterizable, Pickleable):
|
||||||
child_node = child.build_pydot(G)
|
child_node = child.build_pydot(G)
|
||||||
G.add_edge(pydot.Edge(node, child_node))
|
G.add_edge(pydot.Edge(node, child_node))
|
||||||
|
|
||||||
for o in self._observer_callables_.keys():
|
for o in self.observers.keys():
|
||||||
label = o.name if hasattr(o, 'name') else str(o)
|
label = o.name if hasattr(o, 'name') else str(o)
|
||||||
observed_node = pydot.Node(id(o), label=label)
|
observed_node = pydot.Node(id(o), label=label)
|
||||||
G.add_node(observed_node)
|
G.add_node(observed_node)
|
||||||
|
|
@ -101,48 +101,13 @@ class Parameterized(Parameterizable, Pickleable):
|
||||||
return G
|
return G
|
||||||
return node
|
return node
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
"""
|
|
||||||
Get the current state of the class,
|
|
||||||
here just all the indices, rest can get recomputed
|
|
||||||
For inheriting from Parameterized:
|
|
||||||
|
|
||||||
Allways append the state of the inherited object
|
|
||||||
and call down to the inherited object in _setstate!!
|
|
||||||
"""
|
|
||||||
return [
|
|
||||||
self._fixes_,
|
|
||||||
self.priors,
|
|
||||||
self.constraints,
|
|
||||||
self._parameters_,
|
|
||||||
self._name,
|
|
||||||
self._added_names_,
|
|
||||||
]
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
self._added_names_ = state.pop()
|
|
||||||
self._name = state.pop()
|
|
||||||
self._parameters_ = state.pop()
|
|
||||||
self.constraints = state.pop()
|
|
||||||
self.priors = state.pop()
|
|
||||||
self._fixes_ = state.pop()
|
|
||||||
self._connect_parameters()
|
|
||||||
self.parameters_changed()
|
|
||||||
#===========================================================================
|
|
||||||
# Override copy to handle programmatically added observers
|
|
||||||
#===========================================================================
|
|
||||||
def copy(self):
|
|
||||||
c = super(Pickleable, self).copy()
|
|
||||||
c.add_observer(c, c._parameters_changed_notification, -100)
|
|
||||||
return c
|
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Gradient control
|
# Gradient control
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
def _transform_gradients(self, g):
|
def _transform_gradients(self, g):
|
||||||
if self.has_parent():
|
if self.has_parent():
|
||||||
return g
|
return g
|
||||||
[numpy.put(g, i, g[i] * c.gradfactor(self._param_array_[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
[numpy.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||||
if self._has_fixes(): return g[self._fixes_]
|
if self._has_fixes(): return g[self._fixes_]
|
||||||
return g
|
return g
|
||||||
|
|
||||||
|
|
@ -206,7 +171,7 @@ class Parameterized(Parameterizable, Pickleable):
|
||||||
|
|
||||||
def __getitem__(self, name, paramlist=None):
|
def __getitem__(self, name, paramlist=None):
|
||||||
if isinstance(name, (int, slice, tuple, np.ndarray)):
|
if isinstance(name, (int, slice, tuple, np.ndarray)):
|
||||||
return self._param_array_[name]
|
return self.param_array[name]
|
||||||
else:
|
else:
|
||||||
if paramlist is None:
|
if paramlist is None:
|
||||||
paramlist = self.grep_param_names(name)
|
paramlist = self.grep_param_names(name)
|
||||||
|
|
@ -222,7 +187,7 @@ class Parameterized(Parameterizable, Pickleable):
|
||||||
def __setitem__(self, name, value, paramlist=None):
|
def __setitem__(self, name, value, paramlist=None):
|
||||||
if isinstance(name, (slice, tuple, np.ndarray)):
|
if isinstance(name, (slice, tuple, np.ndarray)):
|
||||||
try:
|
try:
|
||||||
self._param_array_[name] = value
|
self.param_array[name] = value
|
||||||
except:
|
except:
|
||||||
raise ValueError, "Setting by slice or index only allowed with array-like"
|
raise ValueError, "Setting by slice or index only allowed with array-like"
|
||||||
self._trigger_params_changed()
|
self._trigger_params_changed()
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,7 @@ class SpikeAndSlabPrior(VariationalPrior):
|
||||||
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
|
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
|
||||||
|
|
||||||
class VariationalPosterior(Parameterized):
|
class VariationalPosterior(Parameterized):
|
||||||
def __init__(self, means=None, variances=None, name=None, *a, **kw):
|
def __init__(self, means=None, variances=None, name='latent space', *a, **kw):
|
||||||
super(VariationalPosterior, self).__init__(name=name, *a, **kw)
|
super(VariationalPosterior, self).__init__(name=name, *a, **kw)
|
||||||
self.mean = Param("mean", means)
|
self.mean = Param("mean", means)
|
||||||
self.variance = Param("variance", variances, Logexp())
|
self.variance = Param("variance", variances, Logexp())
|
||||||
|
|
@ -124,6 +124,7 @@ class NormalPosterior(VariationalPosterior):
|
||||||
import sys
|
import sys
|
||||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
from ...plotting.matplot_dep import variational_plots
|
from ...plotting.matplot_dep import variational_plots
|
||||||
|
import matplotlib
|
||||||
return variational_plots.plot(self,*args)
|
return variational_plots.plot(self,*args)
|
||||||
|
|
||||||
class SpikeAndSlabPosterior(VariationalPosterior):
|
class SpikeAndSlabPosterior(VariationalPosterior):
|
||||||
|
|
|
||||||
|
|
@ -106,15 +106,3 @@ class SparseGP(GP):
|
||||||
return mu, var
|
return mu, var
|
||||||
|
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
"""
|
|
||||||
Get the current state of the class,
|
|
||||||
"""
|
|
||||||
return GP._getstate(self) + [
|
|
||||||
self.Z,
|
|
||||||
self.num_inducing]
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
self.num_inducing = state.pop()
|
|
||||||
self.Z = state.pop()
|
|
||||||
GP._setstate(self, state)
|
|
||||||
|
|
|
||||||
|
|
@ -89,57 +89,57 @@ class SVIGP(GP):
|
||||||
self._param_steplength_trace = []
|
self._param_steplength_trace = []
|
||||||
self._vb_steplength_trace = []
|
self._vb_steplength_trace = []
|
||||||
|
|
||||||
def _getstate(self):
|
# def _getstate(self):
|
||||||
steplength_params = [self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength]
|
# steplength_params = [self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength]
|
||||||
return GP._getstate(self) + \
|
# return GP._getstate(self) + \
|
||||||
[self.get_vb_param(),
|
# [self.get_vb_param(),
|
||||||
self.Z,
|
# self.Z,
|
||||||
self.num_inducing,
|
# self.num_inducing,
|
||||||
self.has_uncertain_inputs,
|
# self.has_uncertain_inputs,
|
||||||
self.X_variance,
|
# self.X_variance,
|
||||||
self.X_batch,
|
# self.X_batch,
|
||||||
self.X_variance_batch,
|
# self.X_variance_batch,
|
||||||
steplength_params,
|
# steplength_params,
|
||||||
self.batchcounter,
|
# self.batchcounter,
|
||||||
self.batchsize,
|
# self.batchsize,
|
||||||
self.epochs,
|
# self.epochs,
|
||||||
self.momentum,
|
# self.momentum,
|
||||||
self.data_prop,
|
# self.data_prop,
|
||||||
self._param_trace,
|
# self._param_trace,
|
||||||
self._param_steplength_trace,
|
# self._param_steplength_trace,
|
||||||
self._vb_steplength_trace,
|
# self._vb_steplength_trace,
|
||||||
self._ll_trace,
|
# self._ll_trace,
|
||||||
self._grad_trace,
|
# self._grad_trace,
|
||||||
self.Y,
|
# self.Y,
|
||||||
self._permutation,
|
# self._permutation,
|
||||||
self.iterations
|
# self.iterations
|
||||||
]
|
# ]
|
||||||
|
#
|
||||||
def _setstate(self, state):
|
# def _setstate(self, state):
|
||||||
self.iterations = state.pop()
|
# self.iterations = state.pop()
|
||||||
self._permutation = state.pop()
|
# self._permutation = state.pop()
|
||||||
self.Y = state.pop()
|
# self.Y = state.pop()
|
||||||
self._grad_trace = state.pop()
|
# self._grad_trace = state.pop()
|
||||||
self._ll_trace = state.pop()
|
# self._ll_trace = state.pop()
|
||||||
self._vb_steplength_trace = state.pop()
|
# self._vb_steplength_trace = state.pop()
|
||||||
self._param_steplength_trace = state.pop()
|
# self._param_steplength_trace = state.pop()
|
||||||
self._param_trace = state.pop()
|
# self._param_trace = state.pop()
|
||||||
self.data_prop = state.pop()
|
# self.data_prop = state.pop()
|
||||||
self.momentum = state.pop()
|
# self.momentum = state.pop()
|
||||||
self.epochs = state.pop()
|
# self.epochs = state.pop()
|
||||||
self.batchsize = state.pop()
|
# self.batchsize = state.pop()
|
||||||
self.batchcounter = state.pop()
|
# self.batchcounter = state.pop()
|
||||||
steplength_params = state.pop()
|
# steplength_params = state.pop()
|
||||||
(self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength) = steplength_params
|
# (self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength) = steplength_params
|
||||||
self.X_variance_batch = state.pop()
|
# self.X_variance_batch = state.pop()
|
||||||
self.X_batch = state.pop()
|
# self.X_batch = state.pop()
|
||||||
self.X_variance = state.pop()
|
# self.X_variance = state.pop()
|
||||||
self.has_uncertain_inputs = state.pop()
|
# self.has_uncertain_inputs = state.pop()
|
||||||
self.num_inducing = state.pop()
|
# self.num_inducing = state.pop()
|
||||||
self.Z = state.pop()
|
# self.Z = state.pop()
|
||||||
vb_param = state.pop()
|
# vb_param = state.pop()
|
||||||
GP._setstate(self, state)
|
# GP._setstate(self, state)
|
||||||
self.set_vb_param(vb_param)
|
# self.set_vb_param(vb_param)
|
||||||
|
|
||||||
def _compute_kernel_matrices(self):
|
def _compute_kernel_matrices(self):
|
||||||
# kernel computations, using BGPLVM notation
|
# kernel computations, using BGPLVM notation
|
||||||
|
|
|
||||||
|
|
@ -277,6 +277,8 @@ def bgplvm_simulation(optimize=True, verbose=1,
|
||||||
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||||
#k = kern.RBF(Q, ARD=True, lengthscale=10.)
|
#k = kern.RBF(Q, ARD=True, lengthscale=10.)
|
||||||
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
|
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
|
||||||
|
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
|
||||||
|
m.likelihood.variance = .1
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
print "Optimizing model:"
|
print "Optimizing model:"
|
||||||
|
|
@ -304,7 +306,8 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
||||||
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
|
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
|
||||||
m.inference_method = VarDTCMissingData()
|
m.inference_method = VarDTCMissingData()
|
||||||
m.Y[inan] = _np.nan
|
m.Y[inan] = _np.nan
|
||||||
m.X.variance *= .1
|
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
|
||||||
|
m.likelihood.variance = .01
|
||||||
m.parameters_changed()
|
m.parameters_changed()
|
||||||
m.Yreal = Y
|
m.Yreal = Y
|
||||||
|
|
||||||
|
|
@ -321,18 +324,15 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
||||||
def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
||||||
from GPy import kern
|
from GPy import kern
|
||||||
from GPy.models import MRD
|
from GPy.models import MRD
|
||||||
from GPy.likelihoods import Gaussian
|
|
||||||
|
|
||||||
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
|
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
|
||||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||||
|
|
||||||
#Ylist = [Ylist[0]]
|
#Ylist = [Ylist[0]]
|
||||||
k = [kern.Linear(Q, ARD=True) + kern.White(Q, 1e-4) for _ in range(len(Ylist))]
|
k = kern.Linear(Q, ARD=True)
|
||||||
m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="", initz='permute', **kw)
|
m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="PCA_concat", initz='permute', **kw)
|
||||||
|
|
||||||
m['.*noise'] = [Y.var()/500. for Y in Ylist]
|
m['.*noise'] = [Y.var()/40. for Y in Ylist]
|
||||||
#for i, Y in enumerate(Ylist):
|
|
||||||
# m['.*Y_{}.*Gaussian.*noise'.format(i)] = Y.var(1) / 500.
|
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
print "Optimizing Model:"
|
print "Optimizing Model:"
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,8 @@ class ExactGaussianInference(object):
|
||||||
return Y
|
return Y
|
||||||
else:
|
else:
|
||||||
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
|
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
|
||||||
raise NotImplementedError, 'TODO' #TODO
|
print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
|
||||||
|
return Y
|
||||||
|
|
||||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
#
|
#
|
||||||
#Parts of this file were influenced by the Matlab GPML framework written by
|
#Parts of this file were influenced by the Matlab GPML framework written by
|
||||||
|
|
@ -91,7 +91,11 @@ class Laplace(object):
|
||||||
iteration = 0
|
iteration = 0
|
||||||
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
|
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
|
||||||
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
|
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
|
||||||
|
if np.any(np.isnan(W)):
|
||||||
|
raise ValueError('One or more element(s) of W is NaN')
|
||||||
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
|
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
|
||||||
|
if np.any(np.isnan(grad)):
|
||||||
|
raise ValueError('One or more element(s) of grad is NaN')
|
||||||
|
|
||||||
W_f = W*f
|
W_f = W*f
|
||||||
|
|
||||||
|
|
@ -141,25 +145,30 @@ class Laplace(object):
|
||||||
"""
|
"""
|
||||||
#At this point get the hessian matrix (or vector as W is diagonal)
|
#At this point get the hessian matrix (or vector as W is diagonal)
|
||||||
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
|
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
|
||||||
|
if np.any(np.isnan(W)):
|
||||||
|
raise ValueError('One or more element(s) of W is NaN')
|
||||||
|
|
||||||
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
|
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||||
|
|
||||||
#compute vital matrices
|
#compute vital matrices
|
||||||
C = np.dot(LiW12, K)
|
C = np.dot(LiW12, K)
|
||||||
Ki_W_i = K - C.T.dot(C) #Could this be wrong?
|
Ki_W_i = K - C.T.dot(C)
|
||||||
|
|
||||||
#compute the log marginal
|
#compute the log marginal
|
||||||
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
|
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
|
||||||
|
|
||||||
#Compute vival matrices for derivatives
|
# Compute matrices for derivatives
|
||||||
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
|
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
|
||||||
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9.
|
if np.any(np.isnan(dW_df)):
|
||||||
|
raise ValueError('One or more element(s) of dW_df is NaN')
|
||||||
|
|
||||||
|
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) # s2 in R&W p126 line 9.
|
||||||
#BiK, _ = dpotrs(L, K, lower=1)
|
#BiK, _ = dpotrs(L, K, lower=1)
|
||||||
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
|
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
|
||||||
I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i)
|
I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i)
|
||||||
|
|
||||||
####################
|
####################
|
||||||
#compute dL_dK#
|
# compute dL_dK #
|
||||||
####################
|
####################
|
||||||
if kern.size > 0 and not kern.is_fixed:
|
if kern.size > 0 and not kern.is_fixed:
|
||||||
#Explicit
|
#Explicit
|
||||||
|
|
@ -202,12 +211,12 @@ class Laplace(object):
|
||||||
def _compute_B_statistics(self, K, W, log_concave):
|
def _compute_B_statistics(self, K, W, log_concave):
|
||||||
"""
|
"""
|
||||||
Rasmussen suggests the use of a numerically stable positive definite matrix B
|
Rasmussen suggests the use of a numerically stable positive definite matrix B
|
||||||
Which has a positive diagonal element and can be easyily inverted
|
Which has a positive diagonal elements and can be easily inverted
|
||||||
|
|
||||||
:param K: Prior Covariance matrix evaluated at locations X
|
:param K: Prior Covariance matrix evaluated at locations X
|
||||||
:type K: NxN matrix
|
:type K: NxN matrix
|
||||||
:param W: Negative hessian at a point (diagonal matrix)
|
:param W: Negative hessian at a point (diagonal matrix)
|
||||||
:type W: Vector of diagonal values of hessian (1xN)
|
:type W: Vector of diagonal values of Hessian (1xN)
|
||||||
:returns: (W12BiW12, L_B, Li_W12)
|
:returns: (W12BiW12, L_B, Li_W12)
|
||||||
"""
|
"""
|
||||||
if not log_concave:
|
if not log_concave:
|
||||||
|
|
@ -218,7 +227,8 @@ class Laplace(object):
|
||||||
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||||
# To cause the posterior to become less certain than the prior and likelihood,
|
# To cause the posterior to become less certain than the prior and likelihood,
|
||||||
# This is a property only held by non-log-concave likelihoods
|
# This is a property only held by non-log-concave likelihoods
|
||||||
|
if np.any(np.isnan(W)):
|
||||||
|
raise ValueError('One or more element(s) of W is NaN')
|
||||||
#W is diagonal so its sqrt is just the sqrt of the diagonal elements
|
#W is diagonal so its sqrt is just the sqrt of the diagonal elements
|
||||||
W_12 = np.sqrt(W)
|
W_12 = np.sqrt(W)
|
||||||
B = np.eye(K.shape[0]) + W_12*K*W_12.T
|
B = np.eye(K.shape[0]) + W_12*K*W_12.T
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@ class VarDTC(object):
|
||||||
def __init__(self, limit=1):
|
def __init__(self, limit=1):
|
||||||
#self._YYTfactor_cache = caching.cache()
|
#self._YYTfactor_cache = caching.cache()
|
||||||
from ...util.caching import Cacher
|
from ...util.caching import Cacher
|
||||||
|
self.limit = limit
|
||||||
self.get_trYYT = Cacher(self._get_trYYT, limit)
|
self.get_trYYT = Cacher(self._get_trYYT, limit)
|
||||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
|
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
|
||||||
|
|
||||||
|
|
@ -33,6 +34,17 @@ class VarDTC(object):
|
||||||
def _get_trYYT(self, Y):
|
def _get_trYYT(self, Y):
|
||||||
return param_to_array(np.sum(np.square(Y)))
|
return param_to_array(np.sum(np.square(Y)))
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
# has to be overridden, as Cacher objects cannot be pickled.
|
||||||
|
return self.limit
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
# has to be overridden, as Cacher objects cannot be pickled.
|
||||||
|
self.limit = state
|
||||||
|
from ...util.caching import Cacher
|
||||||
|
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
|
||||||
|
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
|
||||||
|
|
||||||
def _get_YYTfactor(self, Y):
|
def _get_YYTfactor(self, Y):
|
||||||
"""
|
"""
|
||||||
find a matrix L which satisfies LLT = YYT.
|
find a matrix L which satisfies LLT = YYT.
|
||||||
|
|
@ -179,6 +191,7 @@ class VarDTC(object):
|
||||||
return post, log_marginal, grad_dict
|
return post, log_marginal, grad_dict
|
||||||
|
|
||||||
class VarDTCMissingData(object):
|
class VarDTCMissingData(object):
|
||||||
|
const_jitter = 1e-6
|
||||||
def __init__(self, limit=1):
|
def __init__(self, limit=1):
|
||||||
from ...util.caching import Cacher
|
from ...util.caching import Cacher
|
||||||
self._Y = Cacher(self._subarray_computations, limit)
|
self._Y = Cacher(self._subarray_computations, limit)
|
||||||
|
|
@ -250,7 +263,7 @@ class VarDTCMissingData(object):
|
||||||
|
|
||||||
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
|
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
|
||||||
if het_noise: beta = beta_all[ind]
|
if het_noise: beta = beta_all[ind]
|
||||||
else: beta = beta_all[0]
|
else: beta = beta_all
|
||||||
|
|
||||||
VVT_factor = (beta*y)
|
VVT_factor = (beta*y)
|
||||||
VVT_factor_all[v, ind].flat = VVT_factor.flat
|
VVT_factor_all[v, ind].flat = VVT_factor.flat
|
||||||
|
|
@ -311,7 +324,7 @@ class VarDTCMissingData(object):
|
||||||
het_noise, uncertain_inputs, LB,
|
het_noise, uncertain_inputs, LB,
|
||||||
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
||||||
psi0, psi1, beta,
|
psi0, psi1, beta,
|
||||||
data_fit, num_data, output_dim, trYYT)
|
data_fit, num_data, output_dim, trYYT, Y)
|
||||||
|
|
||||||
if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf
|
if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
from _src.kern import Kern
|
from _src.kern import Kern
|
||||||
from _src.rbf import RBF
|
from _src.rbf import RBF
|
||||||
from _src.linear import Linear
|
from _src.linear import Linear, LinearFull
|
||||||
from _src.static import Bias, White
|
from _src.static import Bias, White
|
||||||
from _src.brownian import Brownian
|
from _src.brownian import Brownian
|
||||||
from _src.sympykern import Sympykern
|
from _src.symbolic import Symbolic
|
||||||
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
||||||
from _src.mlp import MLP
|
from _src.mlp import MLP
|
||||||
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
|
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,6 @@ class Add(CombinationKernel):
|
||||||
If a list of parts (of this kernel!) `which_parts` is given, only
|
If a list of parts (of this kernel!) `which_parts` is given, only
|
||||||
the parts of the list are taken to compute the covariance.
|
the parts of the list are taken to compute the covariance.
|
||||||
"""
|
"""
|
||||||
assert X.shape[1] > max(np.r_[self.active_dims])
|
|
||||||
if which_parts is None:
|
if which_parts is None:
|
||||||
which_parts = self.parts
|
which_parts = self.parts
|
||||||
elif not isinstance(which_parts, (list, tuple)):
|
elif not isinstance(which_parts, (list, tuple)):
|
||||||
|
|
@ -33,7 +32,6 @@ class Add(CombinationKernel):
|
||||||
|
|
||||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||||
def Kdiag(self, X, which_parts=None):
|
def Kdiag(self, X, which_parts=None):
|
||||||
assert X.shape[1] > max(np.r_[self.active_dims])
|
|
||||||
if which_parts is None:
|
if which_parts is None:
|
||||||
which_parts = self.parts
|
which_parts = self.parts
|
||||||
elif not isinstance(which_parts, (list, tuple)):
|
elif not isinstance(which_parts, (list, tuple)):
|
||||||
|
|
@ -160,21 +158,13 @@ class Add(CombinationKernel):
|
||||||
target_S += b
|
target_S += b
|
||||||
return target_mu, target_S
|
return target_mu, target_S
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
"""
|
|
||||||
Get the current state of the class,
|
|
||||||
here just all the indices, rest can get recomputed
|
|
||||||
"""
|
|
||||||
return super(Add, self)._getstate()
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
super(Add, self)._setstate(state)
|
|
||||||
|
|
||||||
def add(self, other, name='sum'):
|
def add(self, other, name='sum'):
|
||||||
if isinstance(other, Add):
|
if isinstance(other, Add):
|
||||||
other_params = other._parameters_[:]
|
other_params = other._parameters_[:]
|
||||||
for p in other_params:
|
for p in other_params:
|
||||||
other.remove_parameter(p)
|
other.remove_parameter(p)
|
||||||
self.add_parameters(*other_params)
|
self.add_parameters(*other_params)
|
||||||
else: self.add_parameter(other)
|
else:
|
||||||
|
self.add_parameter(other)
|
||||||
|
self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts)
|
||||||
return self
|
return self
|
||||||
|
|
@ -54,85 +54,93 @@ class IndependentOutputs(CombinationKernel):
|
||||||
self.kern = kernels
|
self.kern = kernels
|
||||||
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
|
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
|
||||||
self.index_dim = index_dim
|
self.index_dim = index_dim
|
||||||
self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0])
|
|
||||||
|
|
||||||
def K(self,X ,X2=None):
|
def K(self,X ,X2=None):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
|
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
target = np.zeros((X.shape[0], X.shape[0]))
|
target = np.zeros((X.shape[0], X.shape[0]))
|
||||||
[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)]
|
[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)]
|
||||||
else:
|
else:
|
||||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
[[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
|
[[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def Kdiag(self,X):
|
def Kdiag(self,X):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
|
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||||
target = np.zeros(X.shape[0])
|
target = np.zeros(X.shape[0])
|
||||||
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
|
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def update_gradients_full(self,dL_dK,X,X2=None):
|
def update_gradients_full(self,dL_dK,X,X2=None):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
if self.single_kern: target = np.zeros(self.kern.size)
|
if self.single_kern:
|
||||||
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
|
target = np.zeros(self.kern.size)
|
||||||
|
kerns = itertools.repeat(self.kern)
|
||||||
|
else:
|
||||||
|
kerns = self.kern
|
||||||
|
target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
|
||||||
def collate_grads(kern, i, dL, X, X2):
|
def collate_grads(kern, i, dL, X, X2):
|
||||||
kern.update_gradients_full(dL,X,X2)
|
kern.update_gradients_full(dL,X,X2)
|
||||||
if self.single_kern: target[:] += kern.gradient
|
if self.single_kern: target[:] += kern.gradient
|
||||||
else: target[i][:] += kern.gradient
|
else: target[i][:] += kern.gradient
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
[[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))]
|
[[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(kerns,slices))]
|
||||||
else:
|
else:
|
||||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||||
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))]
|
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
|
||||||
if self.single_kern: kern.gradient = target
|
if self.single_kern: kern.gradient = target
|
||||||
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
|
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
|
||||||
|
|
||||||
def gradients_X(self,dL_dK, X, X2=None):
|
def gradients_X(self,dL_dK, X, X2=None):
|
||||||
target = np.zeros(X.shape)
|
target = np.zeros(X.shape)
|
||||||
|
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
# TODO: make use of index_to_slices
|
# TODO: make use of index_to_slices
|
||||||
values = np.unique(X[:,self.index_dim])
|
values = np.unique(X[:,self.index_dim])
|
||||||
slices = [X[:,self.index_dim]==i for i in values]
|
slices = [X[:,self.index_dim]==i for i in values]
|
||||||
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
|
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
|
||||||
for kern, s in zip(self.kerns, slices)]
|
for kern, s in zip(kerns, slices)]
|
||||||
#slices = index_to_slices(X[:,self.index_dim])
|
#slices = index_to_slices(X[:,self.index_dim])
|
||||||
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
|
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
|
||||||
# for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
|
# for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
||||||
#import ipdb;ipdb.set_trace()
|
#import ipdb;ipdb.set_trace()
|
||||||
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
|
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
|
||||||
# np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss]))
|
# np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss]))
|
||||||
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)]
|
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(kerns, slices)]
|
||||||
else:
|
else:
|
||||||
values = np.unique(X[:,self.index_dim])
|
values = np.unique(X[:,self.index_dim])
|
||||||
slices = [X[:,self.index_dim]==i for i in values]
|
slices = [X[:,self.index_dim]==i for i in values]
|
||||||
slices2 = [X2[:,self.index_dim]==i for i in values]
|
slices2 = [X2[:,self.index_dim]==i for i in values]
|
||||||
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2]))
|
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2]))
|
||||||
for kern, s, s2 in zip(self.kerns, slices, slices2)]
|
for kern, s, s2 in zip(kerns, slices, slices2)]
|
||||||
# TODO: make work with index_to_slices
|
# TODO: make work with index_to_slices
|
||||||
#slices = index_to_slices(X[:,self.index_dim])
|
#slices = index_to_slices(X[:,self.index_dim])
|
||||||
#slices2 = index_to_slices(X2[:,self.index_dim])
|
#slices2 = index_to_slices(X2[:,self.index_dim])
|
||||||
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
|
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
|
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||||
target = np.zeros(X.shape)
|
target = np.zeros(X.shape)
|
||||||
[[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
|
[[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
|
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||||
if self.single_kern: target = np.zeros(self.kern.size)
|
if self.single_kern: target = np.zeros(self.kern.size)
|
||||||
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
|
else: target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
|
||||||
def collate_grads(kern, i, dL, X):
|
def collate_grads(kern, i, dL, X):
|
||||||
kern.update_gradients_diag(dL,X)
|
kern.update_gradients_diag(dL,X)
|
||||||
if self.single_kern: target[:] += kern.gradient
|
if self.single_kern: target[:] += kern.gradient
|
||||||
else: target[i][:] += kern.gradient
|
else: target[i][:] += kern.gradient
|
||||||
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))]
|
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(kerns, slices))]
|
||||||
if self.single_kern: kern.gradient = target
|
if self.single_kern: kern.gradient = target
|
||||||
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
|
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
|
||||||
|
|
||||||
class Hierarchical(CombinationKernel):
|
class Hierarchical(CombinationKernel):
|
||||||
"""
|
"""
|
||||||
|
|
@ -148,30 +156,30 @@ class Hierarchical(CombinationKernel):
|
||||||
def __init__(self, kern, name='hierarchy'):
|
def __init__(self, kern, name='hierarchy'):
|
||||||
assert all([k.input_dim==kerns[0].input_dim for k in kerns])
|
assert all([k.input_dim==kerns[0].input_dim for k in kerns])
|
||||||
super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
|
super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
|
||||||
self.kerns = kerns
|
kerns = kerns
|
||||||
self.add_parameters(self.kerns)
|
self.add_parameters(kerns)
|
||||||
|
|
||||||
def K(self,X ,X2=None):
|
def K(self,X ,X2=None):
|
||||||
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)]
|
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)]
|
||||||
K = self.kerns[0].K(X, X2)
|
K = kerns[0].K(X, X2)
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
[[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)]
|
[[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)]
|
||||||
else:
|
else:
|
||||||
X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||||
[[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(self.kerns[1:], slices, slices2)]
|
[[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(kerns[1:], slices, slices2)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def Kdiag(self,X):
|
def Kdiag(self,X):
|
||||||
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)]
|
X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)]
|
||||||
K = self.kerns[0].K(X, X2)
|
K = kerns[0].K(X, X2)
|
||||||
[[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)]
|
[[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def update_gradients_full(self,dL_dK,X,X2=None):
|
def update_gradients_full(self,dL_dK,X,X2=None):
|
||||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
self.kerns[0].update_gradients_full(dL_dK, X, None)
|
kerns[0].update_gradients_full(dL_dK, X, None)
|
||||||
for k, slices_k in zip(self.kerns[1:], slices):
|
for k, slices_k in zip(kerns[1:], slices):
|
||||||
target = np.zeros(k.size)
|
target = np.zeros(k.size)
|
||||||
def collate_grads(dL, X, X2):
|
def collate_grads(dL, X, X2):
|
||||||
k.update_gradients_full(dL,X,X2)
|
k.update_gradients_full(dL,X,X2)
|
||||||
|
|
@ -180,8 +188,8 @@ class Hierarchical(CombinationKernel):
|
||||||
k._set_gradient(target)
|
k._set_gradient(target)
|
||||||
else:
|
else:
|
||||||
X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1])
|
X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1])
|
||||||
self.kerns[0].update_gradients_full(dL_dK, X, None)
|
kerns[0].update_gradients_full(dL_dK, X, None)
|
||||||
for k, slices_k in zip(self.kerns[1:], slices):
|
for k, slices_k in zip(kerns[1:], slices):
|
||||||
target = np.zeros(k.size)
|
target = np.zeros(k.size)
|
||||||
def collate_grads(dL, X, X2):
|
def collate_grads(dL, X, X2):
|
||||||
k.update_gradients_full(dL,X,X2)
|
k.update_gradients_full(dL,X,X2)
|
||||||
|
|
|
||||||
|
|
@ -15,9 +15,8 @@ class Kern(Parameterized):
|
||||||
# found in kernel_slice_operations
|
# found in kernel_slice_operations
|
||||||
__metaclass__ = KernCallsViaSlicerMeta
|
__metaclass__ = KernCallsViaSlicerMeta
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
_debug=False
|
|
||||||
_support_GPU=False
|
_support_GPU=False
|
||||||
def __init__(self, input_dim, active_dims, name, useGPU=False,*a, **kw):
|
def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw):
|
||||||
"""
|
"""
|
||||||
The base class for a kernel: a positive definite function
|
The base class for a kernel: a positive definite function
|
||||||
which forms of a covariance function (kernel).
|
which forms of a covariance function (kernel).
|
||||||
|
|
@ -178,22 +177,6 @@ class Kern(Parameterized):
|
||||||
#else: kernels.append(other)
|
#else: kernels.append(other)
|
||||||
return Prod([self, other], name)
|
return Prod([self, other], name)
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
"""
|
|
||||||
Get the current state of the class,
|
|
||||||
here just all the indices, rest can get recomputed
|
|
||||||
"""
|
|
||||||
return super(Kern, self)._getstate() + [
|
|
||||||
self.active_dims,
|
|
||||||
self.input_dim,
|
|
||||||
self._sliced_X]
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
self._sliced_X = state.pop()
|
|
||||||
self.input_dim = state.pop()
|
|
||||||
self.active_dims = state.pop()
|
|
||||||
super(Kern, self)._setstate(state)
|
|
||||||
|
|
||||||
class CombinationKernel(Kern):
|
class CombinationKernel(Kern):
|
||||||
"""
|
"""
|
||||||
Abstract super class for combination kernels.
|
Abstract super class for combination kernels.
|
||||||
|
|
@ -211,9 +194,7 @@ class CombinationKernel(Kern):
|
||||||
:param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on
|
:param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on
|
||||||
"""
|
"""
|
||||||
assert all([isinstance(k, Kern) for k in kernels])
|
assert all([isinstance(k, Kern) for k in kernels])
|
||||||
active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
|
input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims)
|
||||||
input_dim = active_dims.max()+1 + len(extra_dims)
|
|
||||||
active_dims = slice(active_dims.max()+1+len(extra_dims))
|
|
||||||
# initialize the kernel with the full input_dim
|
# initialize the kernel with the full input_dim
|
||||||
super(CombinationKernel, self).__init__(input_dim, active_dims, name)
|
super(CombinationKernel, self).__init__(input_dim, active_dims, name)
|
||||||
self.extra_dims = extra_dims
|
self.extra_dims = extra_dims
|
||||||
|
|
@ -223,6 +204,12 @@ class CombinationKernel(Kern):
|
||||||
def parts(self):
|
def parts(self):
|
||||||
return self._parameters_
|
return self._parameters_
|
||||||
|
|
||||||
|
def get_input_dim_active_dims(self, kernels, extra_dims = None):
|
||||||
|
active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
|
||||||
|
input_dim = active_dims.max()+1 + (len(np.r_[extra_dims]) if extra_dims is not None else 0)
|
||||||
|
active_dims = slice(0, input_dim, 1)
|
||||||
|
return input_dim, active_dims
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self):
|
||||||
in_sen = np.zeros((self.num_params, self.input_dim))
|
in_sen = np.zeros((self.num_params, self.input_dim))
|
||||||
for i, p in enumerate(self.parts):
|
for i, p in enumerate(self.parts):
|
||||||
|
|
|
||||||
|
|
@ -5,130 +5,135 @@ Created on 11 Mar 2014
|
||||||
'''
|
'''
|
||||||
from ...core.parameterization.parameterized import ParametersChangedMeta
|
from ...core.parameterization.parameterized import ParametersChangedMeta
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from functools import wraps
|
||||||
|
|
||||||
|
def put_clean(dct, name, func):
|
||||||
|
if name in dct:
|
||||||
|
dct['_clean_{}'.format(name)] = dct[name]
|
||||||
|
dct[name] = func(dct[name])
|
||||||
|
|
||||||
class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||||
def __call__(self, *args, **kw):
|
def __new__(cls, name, bases, dct):
|
||||||
instance = super(ParametersChangedMeta, self).__call__(*args, **kw)
|
put_clean(dct, 'K', _slice_K)
|
||||||
instance.K = _slice_wrapper(instance, instance.K)
|
put_clean(dct, 'Kdiag', _slice_Kdiag)
|
||||||
instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True)
|
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
|
||||||
instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True)
|
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
|
||||||
instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True)
|
put_clean(dct, 'gradients_X', _slice_gradients_X)
|
||||||
instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True)
|
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
|
||||||
instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True)
|
|
||||||
instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False)
|
|
||||||
instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False)
|
|
||||||
instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False)
|
|
||||||
instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True)
|
|
||||||
instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True)
|
|
||||||
instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True)
|
|
||||||
instance.parameters_changed()
|
|
||||||
return instance
|
|
||||||
|
|
||||||
def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False):
|
put_clean(dct, 'psi0', _slice_psi)
|
||||||
"""
|
put_clean(dct, 'psi1', _slice_psi)
|
||||||
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension.
|
put_clean(dct, 'psi2', _slice_psi)
|
||||||
The different switches are:
|
put_clean(dct, 'update_gradients_expectations', _slice_update_gradients_expectations)
|
||||||
diag: if X2 exists
|
put_clean(dct, 'gradients_Z_expectations', _slice_gradients_Z_expectations)
|
||||||
derivative: if first arg is dL_dK
|
put_clean(dct, 'gradients_qX_expectations', _slice_gradients_qX_expectations)
|
||||||
psi_stat: if first 3 args are dL_dpsi0..2
|
return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
|
||||||
psi_stat_Z: if first 2 args are dL_dpsi1..2
|
|
||||||
"""
|
class _Slice_wrap(object):
|
||||||
if derivative:
|
def __init__(self, k, X, X2=None):
|
||||||
if diag:
|
self.k = k
|
||||||
def x_slice_wrapper(dL_dKdiag, X):
|
self.shape = X.shape
|
||||||
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
if self.k._sliced_X == 0:
|
||||||
if ret_X_not_sliced:
|
assert X.shape[1] > max(np.r_[self.k.active_dims]), "At least {} dimensional X needed".format(max(np.r_[self.k.active_dims]))
|
||||||
ret = np.zeros(X.shape)
|
self.X = self.k._slice_X(X)
|
||||||
X = kern._slice_X(X) if not kern._sliced_X else X
|
self.X2 = self.k._slice_X(X2) if X2 is not None else X2
|
||||||
# if the return value is of shape X.shape, we need to make sure to return the right shape
|
self.ret = True
|
||||||
kern._sliced_X += 1
|
|
||||||
try:
|
|
||||||
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X)
|
|
||||||
else: ret = operation(dL_dKdiag, X)
|
|
||||||
except:
|
|
||||||
raise
|
|
||||||
finally:
|
|
||||||
kern._sliced_X -= 1
|
|
||||||
return ret
|
|
||||||
elif psi_stat:
|
|
||||||
def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
|
||||||
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
|
||||||
if ret_X_not_sliced:
|
|
||||||
ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
|
|
||||||
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
|
|
||||||
kern._sliced_X += 1
|
|
||||||
# if the return value is of shape X.shape, we need to make sure to return the right shape
|
|
||||||
try:
|
|
||||||
if ret_X_not_sliced:
|
|
||||||
ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior))
|
|
||||||
r2 = ret[:2]
|
|
||||||
ret[0] = ret1
|
|
||||||
ret[1] = ret2
|
|
||||||
ret[0][:, kern.active_dims] = r2[0]
|
|
||||||
ret[1][:, kern.active_dims] = r2[1]
|
|
||||||
del r2
|
|
||||||
else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
|
||||||
except:
|
|
||||||
raise
|
|
||||||
finally:
|
|
||||||
kern._sliced_X -= 1
|
|
||||||
return ret
|
|
||||||
elif psi_stat_Z:
|
|
||||||
def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
|
||||||
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
|
||||||
if ret_X_not_sliced: ret = np.zeros(Z.shape)
|
|
||||||
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
|
|
||||||
kern._sliced_X += 1
|
|
||||||
try:
|
|
||||||
if ret_X_not_sliced:
|
|
||||||
ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
|
||||||
else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
|
||||||
except:
|
|
||||||
raise
|
|
||||||
finally:
|
|
||||||
kern._sliced_X -= 1
|
|
||||||
return ret
|
|
||||||
else:
|
else:
|
||||||
def x_slice_wrapper(dL_dK, X, X2=None):
|
self.X = X
|
||||||
ret_X_not_sliced = ret_X and kern._sliced_X == 0
|
self.X2 = X2
|
||||||
if ret_X_not_sliced:
|
self.ret = False
|
||||||
ret = np.zeros(X.shape)
|
def __enter__(self):
|
||||||
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
|
self.k._sliced_X += 1
|
||||||
kern._sliced_X += 1
|
return self
|
||||||
try:
|
def __exit__(self, *a):
|
||||||
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2)
|
self.k._sliced_X -= 1
|
||||||
else: ret = operation(dL_dK, X, X2)
|
def handle_return_array(self, return_val):
|
||||||
except:
|
if self.ret:
|
||||||
raise
|
ret = np.zeros(self.shape)
|
||||||
finally:
|
ret[:, self.k.active_dims] = return_val
|
||||||
kern._sliced_X -= 1
|
return ret
|
||||||
return ret
|
return return_val
|
||||||
else:
|
|
||||||
if diag:
|
def _slice_K(f):
|
||||||
def x_slice_wrapper(X, *args, **kw):
|
@wraps(f)
|
||||||
X = kern._slice_X(X) if not kern._sliced_X else X
|
def wrap(self, X, X2 = None, *a, **kw):
|
||||||
kern._sliced_X += 1
|
with _Slice_wrap(self, X, X2) as s:
|
||||||
try:
|
ret = f(self, s.X, s.X2, *a, **kw)
|
||||||
ret = operation(X, *args, **kw)
|
return ret
|
||||||
except:
|
return wrap
|
||||||
raise
|
|
||||||
finally:
|
def _slice_Kdiag(f):
|
||||||
kern._sliced_X -= 1
|
@wraps(f)
|
||||||
return ret
|
def wrap(self, X, *a, **kw):
|
||||||
else:
|
with _Slice_wrap(self, X, None) as s:
|
||||||
def x_slice_wrapper(X, X2=None, *args, **kw):
|
ret = f(self, s.X, *a, **kw)
|
||||||
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
|
return ret
|
||||||
kern._sliced_X += 1
|
return wrap
|
||||||
try:
|
|
||||||
ret = operation(X, X2, *args, **kw)
|
def _slice_update_gradients_full(f):
|
||||||
except: raise
|
@wraps(f)
|
||||||
finally:
|
def wrap(self, dL_dK, X, X2=None):
|
||||||
kern._sliced_X -= 1
|
with _Slice_wrap(self, X, X2) as s:
|
||||||
return ret
|
ret = f(self, dL_dK, s.X, s.X2)
|
||||||
x_slice_wrapper._operation = operation
|
return ret
|
||||||
x_slice_wrapper.__name__ = ("slicer("+str(operation)
|
return wrap
|
||||||
+(","+str(bool(diag)) if diag else'')
|
|
||||||
+(','+str(bool(derivative)) if derivative else '')
|
def _slice_update_gradients_diag(f):
|
||||||
+')')
|
@wraps(f)
|
||||||
x_slice_wrapper.__doc__ = "**sliced**\n" + (operation.__doc__ or "")
|
def wrap(self, dL_dKdiag, X):
|
||||||
return x_slice_wrapper
|
with _Slice_wrap(self, X, None) as s:
|
||||||
|
ret = f(self, dL_dKdiag, s.X)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_gradients_X(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, dL_dK, X, X2=None):
|
||||||
|
with _Slice_wrap(self, X, X2) as s:
|
||||||
|
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_gradients_X_diag(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, dL_dKdiag, X):
|
||||||
|
with _Slice_wrap(self, X, None) as s:
|
||||||
|
ret = s.handle_return_array(f(self, dL_dKdiag, s.X))
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_psi(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, Z, variational_posterior):
|
||||||
|
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||||
|
ret = f(self, s.X, s.X2)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_update_gradients_expectations(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
|
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||||
|
ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_gradients_Z_expectations(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
|
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||||
|
ret = s.handle_return_array(f(self, dL_dpsi1, dL_dpsi2, s.X, s.X2))
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_gradients_qX_expectations(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
|
with _Slice_wrap(self, variational_posterior, Z) as s:
|
||||||
|
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X))
|
||||||
|
r2 = ret[:2]
|
||||||
|
ret[0] = s.handle_return_array(r2[0])
|
||||||
|
ret[1] = s.handle_return_array(r2[1])
|
||||||
|
del r2
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
|
||||||
|
|
@ -313,3 +313,47 @@ class Linear(Kern):
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self):
|
||||||
return np.ones(self.input_dim) * self.variances
|
return np.ones(self.input_dim) * self.variances
|
||||||
|
|
||||||
|
class LinearFull(Kern):
|
||||||
|
def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'):
|
||||||
|
super(LinearFull, self).__init__(input_dim, active_dims, name)
|
||||||
|
if W is None:
|
||||||
|
W = np.ones((input_dim, rank))
|
||||||
|
if kappa is None:
|
||||||
|
kappa = np.ones(input_dim)
|
||||||
|
assert W.shape == (input_dim, rank)
|
||||||
|
assert kappa.shape == (input_dim,)
|
||||||
|
|
||||||
|
self.W = Param('W', W)
|
||||||
|
self.kappa = Param('kappa', kappa, Logexp())
|
||||||
|
self.add_parameters(self.W, self.kappa)
|
||||||
|
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||||
|
return np.einsum('ij,jk,lk->il', X, P, X if X2 is None else X2)
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
self.kappa.gradient = np.einsum('ij,ik,kj->j', X, dL_dK, X if X2 is None else X2)
|
||||||
|
self.W.gradient = np.einsum('ij,kl,ik,lm->jm', X, X if X2 is None else X2, dL_dK, self.W)
|
||||||
|
self.W.gradient += np.einsum('ij,kl,ik,jm->lm', X, X if X2 is None else X2, dL_dK, self.W)
|
||||||
|
|
||||||
|
def Kdiag(self, X):
|
||||||
|
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||||
|
return np.einsum('ij,jk,ik->i', X, P, X)
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
self.kappa.gradient = np.einsum('ij,i->j', np.square(X), dL_dKdiag)
|
||||||
|
self.W.gradient = 2.*np.einsum('ij,ik,jl,i->kl', X, X, self.W, dL_dKdiag)
|
||||||
|
|
||||||
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
|
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||||
|
if X2 is None:
|
||||||
|
return 2.*np.einsum('ij,jk,kl->il', dL_dK, X, P)
|
||||||
|
else:
|
||||||
|
return np.einsum('ij,jk,kl->il', dL_dK, X2, P)
|
||||||
|
|
||||||
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
|
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||||
|
return 2.*np.einsum('jk,i,ij->ik', P, dL_dKdiag, X)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,6 @@ class Prod(CombinationKernel):
|
||||||
|
|
||||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||||
def K(self, X, X2=None, which_parts=None):
|
def K(self, X, X2=None, which_parts=None):
|
||||||
assert X.shape[1] == self.input_dim
|
|
||||||
if which_parts is None:
|
if which_parts is None:
|
||||||
which_parts = self.parts
|
which_parts = self.parts
|
||||||
elif not isinstance(which_parts, (list, tuple)):
|
elif not isinstance(which_parts, (list, tuple)):
|
||||||
|
|
@ -33,7 +32,6 @@ class Prod(CombinationKernel):
|
||||||
|
|
||||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||||
def Kdiag(self, X, which_parts=None):
|
def Kdiag(self, X, which_parts=None):
|
||||||
assert X.shape[1] == self.input_dim
|
|
||||||
if which_parts is None:
|
if which_parts is None:
|
||||||
which_parts = self.parts
|
which_parts = self.parts
|
||||||
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
|
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
|
||||||
|
|
@ -58,8 +56,6 @@ class Prod(CombinationKernel):
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
target = np.zeros(X.shape)
|
target = np.zeros(X.shape)
|
||||||
for k1,k2 in itertools.combinations(self.parts, 2):
|
for k1,k2 in itertools.combinations(self.parts, 2):
|
||||||
target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X)
|
target += k1.gradients_X_diag(dL_dKdiag*k2.Kdiag(X), X)
|
||||||
target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X)
|
target += k2.gradients_X_diag(dL_dKdiag*k1.Kdiag(X), X)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -74,9 +74,6 @@ class RBF(Stationary):
|
||||||
# Spike-and-Slab GPLVM
|
# Spike-and-Slab GPLVM
|
||||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
# dL_dpsi0_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi0))
|
|
||||||
# dL_dpsi1_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi1))
|
|
||||||
# dL_dpsi2_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi2))
|
|
||||||
self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -107,8 +104,6 @@ class RBF(Stationary):
|
||||||
|
|
||||||
#contributions from psi0:
|
#contributions from psi0:
|
||||||
self.variance.gradient = np.sum(dL_dpsi0)
|
self.variance.gradient = np.sum(dL_dpsi0)
|
||||||
if self._debug:
|
|
||||||
num_grad = self.lengthscale.gradient.copy()
|
|
||||||
self.lengthscale.gradient = 0.
|
self.lengthscale.gradient = 0.
|
||||||
|
|
||||||
#from psi1
|
#from psi1
|
||||||
|
|
@ -128,8 +123,6 @@ class RBF(Stationary):
|
||||||
else:
|
else:
|
||||||
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
|
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
|
||||||
|
|
||||||
if self._debug:
|
|
||||||
import ipdb;ipdb.set_trace()
|
|
||||||
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
|
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
@ -139,8 +132,6 @@ class RBF(Stationary):
|
||||||
# Spike-and-Slab GPLVM
|
# Spike-and-Slab GPLVM
|
||||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
# dL_dpsi1_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi1))
|
|
||||||
# dL_dpsi2_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi2))
|
|
||||||
return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||||
else:
|
else:
|
||||||
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||||
|
|
@ -177,8 +168,6 @@ class RBF(Stationary):
|
||||||
# Spike-and-Slab GPLVM
|
# Spike-and-Slab GPLVM
|
||||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
# dL_dpsi1_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi1))
|
|
||||||
# dL_dpsi2_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi2))
|
|
||||||
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||||
else:
|
else:
|
||||||
ndata = variational_posterior.mean.shape[0]
|
ndata = variational_posterior.mean.shape[0]
|
||||||
|
|
|
||||||
|
|
@ -11,9 +11,9 @@ from kern import Kern
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
from ...core.parameterization.transformations import Logexp
|
from ...core.parameterization.transformations import Logexp
|
||||||
|
|
||||||
class Sympykern(Kern):
|
class Symbolic(Kern):
|
||||||
"""
|
"""
|
||||||
A kernel object, where all the hard work in done by sympy.
|
A kernel object, where all the hard work is done by sympy.
|
||||||
|
|
||||||
:param k: the covariance function
|
:param k: the covariance function
|
||||||
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
|
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
|
||||||
|
|
@ -26,10 +26,8 @@ class Sympykern(Kern):
|
||||||
- to handle multiple inputs, call them x_1, z_1, etc
|
- to handle multiple inputs, call them x_1, z_1, etc
|
||||||
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
|
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
|
||||||
"""
|
"""
|
||||||
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None, active_dims=None):
|
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None):
|
||||||
|
|
||||||
if name is None:
|
|
||||||
name='sympykern'
|
|
||||||
if k is None:
|
if k is None:
|
||||||
raise ValueError, "You must provide an argument for the covariance function."
|
raise ValueError, "You must provide an argument for the covariance function."
|
||||||
super(Sympykern, self).__init__(input_dim, active_dims, name)
|
super(Sympykern, self).__init__(input_dim, active_dims, name)
|
||||||
|
|
@ -60,7 +58,6 @@ class Sympykern(Kern):
|
||||||
# extract parameter names from the covariance
|
# extract parameter names from the covariance
|
||||||
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
|
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
|
||||||
|
|
||||||
|
|
||||||
# Look for parameters with index (subscripts), they are associated with different outputs.
|
# Look for parameters with index (subscripts), they are associated with different outputs.
|
||||||
if self.output_dim>1:
|
if self.output_dim>1:
|
||||||
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
|
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
|
||||||
|
|
@ -97,8 +94,8 @@ class Sympykern(Kern):
|
||||||
val = 1.0
|
val = 1.0
|
||||||
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
|
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
|
||||||
if param is not None:
|
if param is not None:
|
||||||
if param.has_key(theta):
|
if param.has_key(theta.name):
|
||||||
val = param[theta]
|
val = param[theta.name]
|
||||||
setattr(self, theta.name, Param(theta.name, val, None))
|
setattr(self, theta.name, Param(theta.name, val, None))
|
||||||
self.add_parameters(getattr(self, theta.name))
|
self.add_parameters(getattr(self, theta.name))
|
||||||
|
|
||||||
|
|
@ -117,6 +114,12 @@ class Sympykern(Kern):
|
||||||
self.arg_list += self._sp_theta_i + self._sp_theta_j
|
self.arg_list += self._sp_theta_i + self._sp_theta_j
|
||||||
self.diag_arg_list += self._sp_theta_i
|
self.diag_arg_list += self._sp_theta_i
|
||||||
|
|
||||||
|
# Check if there are additional linear operators on the covariance.
|
||||||
|
self._sp_operators = operators
|
||||||
|
# TODO: Deal with linear operators
|
||||||
|
#if self._sp_operators:
|
||||||
|
# for operator in self._sp_operators:
|
||||||
|
|
||||||
# psi_stats aren't yet implemented.
|
# psi_stats aren't yet implemented.
|
||||||
if False:
|
if False:
|
||||||
self.compute_psi_stats()
|
self.compute_psi_stats()
|
||||||
|
|
@ -254,3 +257,176 @@ class Sympykern(Kern):
|
||||||
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
|
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
|
||||||
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
|
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
|
||||||
|
|
||||||
|
if False:
|
||||||
|
class Symcombine(CombinationKernel):
|
||||||
|
"""
|
||||||
|
Combine list of given sympy covariances together with the provided operations.
|
||||||
|
"""
|
||||||
|
def __init__(self, subkerns, operations, name='sympy_combine'):
|
||||||
|
super(Symcombine, self).__init__(subkerns, name)
|
||||||
|
for subkern, operation in zip(subkerns, operations):
|
||||||
|
self._sp_k += self._k_double_operate(subkern._sp_k, operation)
|
||||||
|
|
||||||
|
#def _double_operate(self, k, operation):
|
||||||
|
|
||||||
|
|
||||||
|
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||||
|
def K(self, X, X2=None, which_parts=None):
|
||||||
|
"""
|
||||||
|
Combine covariances with a linear operator.
|
||||||
|
"""
|
||||||
|
assert X.shape[1] == self.input_dim
|
||||||
|
if which_parts is None:
|
||||||
|
which_parts = self.parts
|
||||||
|
elif not isinstance(which_parts, (list, tuple)):
|
||||||
|
# if only one part is given
|
||||||
|
which_parts = [which_parts]
|
||||||
|
return reduce(np.add, (p.K(X, X2) for p in which_parts))
|
||||||
|
|
||||||
|
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||||
|
def Kdiag(self, X, which_parts=None):
|
||||||
|
assert X.shape[1] == self.input_dim
|
||||||
|
if which_parts is None:
|
||||||
|
which_parts = self.parts
|
||||||
|
elif not isinstance(which_parts, (list, tuple)):
|
||||||
|
# if only one part is given
|
||||||
|
which_parts = [which_parts]
|
||||||
|
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dK, X):
|
||||||
|
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
|
||||||
|
|
||||||
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
|
"""Compute the gradient of the objective function with respect to X.
|
||||||
|
|
||||||
|
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
|
||||||
|
:type dL_dK: np.ndarray (num_samples x num_inducing)
|
||||||
|
:param X: Observed data inputs
|
||||||
|
:type X: np.ndarray (num_samples x input_dim)
|
||||||
|
:param X2: Observed data inputs (optional, defaults to X)
|
||||||
|
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||||
|
|
||||||
|
target = np.zeros(X.shape)
|
||||||
|
[target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
|
||||||
|
return target
|
||||||
|
|
||||||
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
|
target = np.zeros(X.shape)
|
||||||
|
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
|
||||||
|
return target
|
||||||
|
|
||||||
|
def psi0(self, Z, variational_posterior):
|
||||||
|
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
|
||||||
|
|
||||||
|
def psi1(self, Z, variational_posterior):
|
||||||
|
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
|
||||||
|
|
||||||
|
def psi2(self, Z, variational_posterior):
|
||||||
|
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
|
||||||
|
#return psi2
|
||||||
|
# compute the "cross" terms
|
||||||
|
from static import White, Bias
|
||||||
|
from rbf import RBF
|
||||||
|
#from rbf_inv import RBFInv
|
||||||
|
from linear import Linear
|
||||||
|
#ffrom fixed import Fixed
|
||||||
|
|
||||||
|
for p1, p2 in itertools.combinations(self.parts, 2):
|
||||||
|
# i1, i2 = p1.active_dims, p2.active_dims
|
||||||
|
# white doesn;t combine with anything
|
||||||
|
if isinstance(p1, White) or isinstance(p2, White):
|
||||||
|
pass
|
||||||
|
# rbf X bias
|
||||||
|
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
|
||||||
|
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
|
||||||
|
tmp = p2.psi1(Z, variational_posterior)
|
||||||
|
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||||
|
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
|
||||||
|
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
|
||||||
|
tmp = p1.psi1(Z, variational_posterior)
|
||||||
|
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||||
|
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
|
||||||
|
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
|
||||||
|
tmp1 = p1.psi1(Z, variational_posterior)
|
||||||
|
tmp2 = p2.psi1(Z, variational_posterior)
|
||||||
|
psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||||
|
return psi2
|
||||||
|
|
||||||
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
|
from static import White, Bias
|
||||||
|
for p1 in self.parts:
|
||||||
|
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
|
||||||
|
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||||
|
for p2 in self.parts:
|
||||||
|
if p2 is p1:
|
||||||
|
continue
|
||||||
|
if isinstance(p2, White):
|
||||||
|
continue
|
||||||
|
elif isinstance(p2, Bias):
|
||||||
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||||
|
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
|
||||||
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||||
|
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||||
|
|
||||||
|
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
|
from static import White, Bias
|
||||||
|
target = np.zeros(Z.shape)
|
||||||
|
for p1 in self.parts:
|
||||||
|
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||||
|
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||||
|
for p2 in self.parts:
|
||||||
|
if p2 is p1:
|
||||||
|
continue
|
||||||
|
if isinstance(p2, White):
|
||||||
|
continue
|
||||||
|
elif isinstance(p2, Bias):
|
||||||
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||||
|
else:
|
||||||
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||||
|
target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||||
|
return target
|
||||||
|
|
||||||
|
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
|
from static import White, Bias
|
||||||
|
target_mu = np.zeros(variational_posterior.shape)
|
||||||
|
target_S = np.zeros(variational_posterior.shape)
|
||||||
|
for p1 in self._parameters_:
|
||||||
|
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||||
|
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||||
|
for p2 in self._parameters_:
|
||||||
|
if p2 is p1:
|
||||||
|
continue
|
||||||
|
if isinstance(p2, White):
|
||||||
|
continue
|
||||||
|
elif isinstance(p2, Bias):
|
||||||
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||||
|
else:
|
||||||
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||||
|
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||||
|
target_mu += a
|
||||||
|
target_S += b
|
||||||
|
return target_mu, target_S
|
||||||
|
|
||||||
|
def _getstate(self):
|
||||||
|
"""
|
||||||
|
Get the current state of the class,
|
||||||
|
here just all the indices, rest can get recomputed
|
||||||
|
"""
|
||||||
|
return super(Add, self)._getstate()
|
||||||
|
|
||||||
|
def _setstate(self, state):
|
||||||
|
super(Add, self)._setstate(state)
|
||||||
|
|
||||||
|
def add(self, other, name='sum'):
|
||||||
|
if isinstance(other, Add):
|
||||||
|
other_params = other._parameters_.copy()
|
||||||
|
for p in other_params:
|
||||||
|
other.remove_parameter(p)
|
||||||
|
self.add_parameters(*other_params)
|
||||||
|
else: self.add_parameter(other)
|
||||||
|
return self
|
||||||
BIN
GPy/likelihoods/.DS_Store
vendored
BIN
GPy/likelihoods/.DS_Store
vendored
Binary file not shown.
|
|
@ -6,3 +6,17 @@ from poisson import Poisson
|
||||||
from student_t import StudentT
|
from student_t import StudentT
|
||||||
from likelihood import Likelihood
|
from likelihood import Likelihood
|
||||||
from mixed_noise import MixedNoise
|
from mixed_noise import MixedNoise
|
||||||
|
# TODO need to fix this in a config file.
|
||||||
|
try:
|
||||||
|
import sympy as sym
|
||||||
|
sympy_available=True
|
||||||
|
except ImportError:
|
||||||
|
sympy_available=False
|
||||||
|
if sympy_available:
|
||||||
|
# These are likelihoods that rely on symbolic.
|
||||||
|
from symbolic import Symbolic
|
||||||
|
#from sstudent_t import SstudentT
|
||||||
|
from negative_binomial import Negative_binomial
|
||||||
|
#from skew_normal import Skew_normal
|
||||||
|
#from skew_exponential import Skew_exponential
|
||||||
|
#from null_category import Null_category
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ class Bernoulli(Likelihood):
|
||||||
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||||
|
|
||||||
.. Note::
|
.. Note::
|
||||||
Y is expected to take values in {-1, 1} TODO: {0, 1}??
|
Y takes values in either {-1, 1} or {0, 1}.
|
||||||
link function should have the domain [0, 1], e.g. probit (default) or Heaviside
|
link function should have the domain [0, 1], e.g. probit (default) or Heaviside
|
||||||
|
|
||||||
.. See also::
|
.. See also::
|
||||||
|
|
@ -54,10 +54,10 @@ class Bernoulli(Likelihood):
|
||||||
"""
|
"""
|
||||||
if Y_i == 1:
|
if Y_i == 1:
|
||||||
sign = 1.
|
sign = 1.
|
||||||
elif Y_i == 0:
|
elif Y_i == 0 or Y_i == -1:
|
||||||
sign = -1
|
sign = -1
|
||||||
else:
|
else:
|
||||||
raise ValueError("bad value for Bernouilli observation (0, 1)")
|
raise ValueError("bad value for Bernoulli observation (0, 1)")
|
||||||
if isinstance(self.gp_link, link_functions.Probit):
|
if isinstance(self.gp_link, link_functions.Probit):
|
||||||
z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
|
z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
|
||||||
Z_hat = std_norm_cdf(z)
|
Z_hat = std_norm_cdf(z)
|
||||||
|
|
@ -95,15 +95,15 @@ class Bernoulli(Likelihood):
|
||||||
else:
|
else:
|
||||||
return np.nan
|
return np.nan
|
||||||
|
|
||||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Likelihood function given link(f)
|
Likelihood function given inverse link of f.
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||||
|
|
||||||
:param link_f: latent variables link(f)
|
:param inv_link_f: latent variables inverse link of f.
|
||||||
:type link_f: Nx1 array
|
:type inv_link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param Y_metadata: Y_metadata not used in bernoulli
|
:param Y_metadata: Y_metadata not used in bernoulli
|
||||||
|
|
@ -113,102 +113,106 @@ class Bernoulli(Likelihood):
|
||||||
.. Note:
|
.. Note:
|
||||||
Each y_i must be in {0, 1}
|
Each y_i must be in {0, 1}
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
#objective = (link_f**y) * ((1.-link_f)**(1.-y))
|
#objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y))
|
||||||
objective = np.where(y, link_f, 1.-link_f)
|
objective = np.where(y, inv_link_f, 1.-inv_link_f)
|
||||||
return np.exp(np.sum(np.log(objective)))
|
return np.exp(np.sum(np.log(objective)))
|
||||||
|
|
||||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Log Likelihood function given link(f)
|
Log Likelihood function given inverse link of f.
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
|
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
|
||||||
|
|
||||||
:param link_f: latent variables link(f)
|
:param inv_link_f: latent variables inverse link of f.
|
||||||
:type link_f: Nx1 array
|
:type inv_link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param Y_metadata: Y_metadata not used in bernoulli
|
:param Y_metadata: Y_metadata not used in bernoulli
|
||||||
:returns: log likelihood evaluated at points link(f)
|
:returns: log likelihood evaluated at points inverse link of f.
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
#objective = y*np.log(link_f) + (1.-y)*np.log(link_f)
|
#objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f)
|
||||||
state = np.seterr(divide='ignore')
|
state = np.seterr(divide='ignore')
|
||||||
objective = np.where(y==1, np.log(link_f), np.log(1-link_f))
|
# TODO check y \in {0, 1} or {-1, 1}
|
||||||
|
objective = np.where(y==1, np.log(inv_link_f), np.log(1-inv_link_f))
|
||||||
np.seterr(**state)
|
np.seterr(**state)
|
||||||
return np.sum(objective)
|
return np.sum(objective)
|
||||||
|
|
||||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the pdf at y, given link(f) w.r.t link(f)
|
Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
|
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
|
||||||
|
|
||||||
:param link_f: latent variables link(f)
|
:param inv_link_f: latent variables inverse link of f.
|
||||||
:type link_f: Nx1 array
|
:type inv_link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param Y_metadata: Y_metadata not used in bernoulli
|
:param Y_metadata: Y_metadata not used in bernoulli
|
||||||
:returns: gradient of log likelihood evaluated at points link(f)
|
:returns: gradient of log likelihood evaluated at points inverse link of f.
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
#grad = (y/link_f) - (1.-y)/(1-link_f)
|
#grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f)
|
||||||
state = np.seterr(divide='ignore')
|
state = np.seterr(divide='ignore')
|
||||||
grad = np.where(y, 1./link_f, -1./(1-link_f))
|
# TODO check y \in {0, 1} or {-1, 1}
|
||||||
|
grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f))
|
||||||
np.seterr(**state)
|
np.seterr(**state)
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
|
Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
|
||||||
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
|
i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j.
|
||||||
|
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
|
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
|
||||||
|
|
||||||
:param link_f: latent variables link(f)
|
:param inv_link_f: latent variables inverse link of f.
|
||||||
:type link_f: Nx1 array
|
:type inv_link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param Y_metadata: Y_metadata not used in bernoulli
|
:param Y_metadata: Y_metadata not used in bernoulli
|
||||||
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
|
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
.. Note::
|
.. Note::
|
||||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
(the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
#d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2)
|
#d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2)
|
||||||
state = np.seterr(divide='ignore')
|
state = np.seterr(divide='ignore')
|
||||||
d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f))
|
# TODO check y \in {0, 1} or {-1, 1}
|
||||||
|
d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f))
|
||||||
np.seterr(**state)
|
np.seterr(**state)
|
||||||
return d2logpdf_dlink2
|
return d2logpdf_dlink2
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
Third order derivative log-likelihood function at y given inverse link of f w.r.t inverse link of f
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
|
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
|
||||||
|
|
||||||
:param link_f: latent variables link(f)
|
:param inv_link_f: latent variables passed through inverse link of f.
|
||||||
:type link_f: Nx1 array
|
:type inv_link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param Y_metadata: Y_metadata not used in bernoulli
|
:param Y_metadata: Y_metadata not used in bernoulli
|
||||||
:returns: third derivative of log likelihood evaluated at points link(f)
|
:returns: third derivative of log likelihood evaluated at points inverse_link(f)
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
#d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3))
|
#d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3))
|
||||||
state = np.seterr(divide='ignore')
|
state = np.seterr(divide='ignore')
|
||||||
d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3))
|
# TODO check y \in {0, 1} or {-1, 1}
|
||||||
|
d3logpdf_dlink3 = np.where(y, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3))
|
||||||
np.seterr(**state)
|
np.seterr(**state)
|
||||||
return d3logpdf_dlink3
|
return d3logpdf_dlink3
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,20 +16,20 @@ class Likelihood(Parameterized):
|
||||||
Likelihood base class, used to defing p(y|f).
|
Likelihood base class, used to defing p(y|f).
|
||||||
|
|
||||||
All instances use _inverse_ link functions, which can be swapped out. It is
|
All instances use _inverse_ link functions, which can be swapped out. It is
|
||||||
expected that inherriting classes define a default inverse link function
|
expected that inheriting classes define a default inverse link function
|
||||||
|
|
||||||
To use this class, inherrit and define missing functionality.
|
To use this class, inherit and define missing functionality.
|
||||||
|
|
||||||
Inherriting classes *must* implement:
|
Inheriting classes *must* implement:
|
||||||
pdf_link : a bound method which turns the output of the link function into the pdf
|
pdf_link : a bound method which turns the output of the link function into the pdf
|
||||||
logpdf_link : the logarithm of the above
|
logpdf_link : the logarithm of the above
|
||||||
|
|
||||||
To enable use with EP, inherriting classes *must* define:
|
To enable use with EP, inheriting classes *must* define:
|
||||||
TODO: a suitable derivative function for any parameters of the class
|
TODO: a suitable derivative function for any parameters of the class
|
||||||
It is also desirable to define:
|
It is also desirable to define:
|
||||||
moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature.
|
moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature.
|
||||||
|
|
||||||
To enable use with Laplace approximation, inherriting classes *must* define:
|
To enable use with Laplace approximation, inheriting classes *must* define:
|
||||||
Some derivative functions *AS TODO*
|
Some derivative functions *AS TODO*
|
||||||
|
|
||||||
For exact Gaussian inference, define *JH TODO*
|
For exact Gaussian inference, define *JH TODO*
|
||||||
|
|
@ -159,7 +159,7 @@ class Likelihood(Parameterized):
|
||||||
|
|
||||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Numerical approximation to the predictive variance: V(Y_star)
|
Approximation to the predictive variance: V(Y_star)
|
||||||
|
|
||||||
The following variance decomposition is used:
|
The following variance decomposition is used:
|
||||||
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
||||||
|
|
@ -208,28 +208,28 @@ class Likelihood(Parameterized):
|
||||||
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
|
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
|
||||||
return exp_var + var_exp
|
return exp_var + var_exp
|
||||||
|
|
||||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None):
|
def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None):
|
def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None):
|
def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def pdf(self, f, y, Y_metadata=None):
|
def pdf(self, f, y, Y_metadata=None):
|
||||||
|
|
@ -247,8 +247,8 @@ class Likelihood(Parameterized):
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
link_f = self.gp_link.transf(f)
|
inv_link_f = self.gp_link.transf(f)
|
||||||
return self.pdf_link(link_f, y, Y_metadata=Y_metadata)
|
return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
def logpdf(self, f, y, Y_metadata=None):
|
def logpdf(self, f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
|
|
@ -265,8 +265,8 @@ class Likelihood(Parameterized):
|
||||||
:returns: log likelihood evaluated for this point
|
:returns: log likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
link_f = self.gp_link.transf(f)
|
inv_link_f = self.gp_link.transf(f)
|
||||||
return self.logpdf_link(link_f, y, Y_metadata=Y_metadata)
|
return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||||
|
|
||||||
def dlogpdf_df(self, f, y, Y_metadata=None):
|
def dlogpdf_df(self, f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
|
|
@ -284,8 +284,8 @@ class Likelihood(Parameterized):
|
||||||
:returns: derivative of log likelihood evaluated for this point
|
:returns: derivative of log likelihood evaluated for this point
|
||||||
:rtype: 1xN array
|
:rtype: 1xN array
|
||||||
"""
|
"""
|
||||||
link_f = self.gp_link.transf(f)
|
inv_link_f = self.gp_link.transf(f)
|
||||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
|
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||||
dlink_df = self.gp_link.dtransf_df(f)
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
return chain_1(dlogpdf_dlink, dlink_df)
|
return chain_1(dlogpdf_dlink, dlink_df)
|
||||||
|
|
||||||
|
|
@ -305,10 +305,10 @@ class Likelihood(Parameterized):
|
||||||
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
|
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
|
||||||
:rtype: 1xN array
|
:rtype: 1xN array
|
||||||
"""
|
"""
|
||||||
link_f = self.gp_link.transf(f)
|
inv_link_f = self.gp_link.transf(f)
|
||||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
|
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
|
||||||
dlink_df = self.gp_link.dtransf_df(f)
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
|
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||||
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
||||||
|
|
||||||
|
|
@ -328,12 +328,12 @@ class Likelihood(Parameterized):
|
||||||
:returns: third derivative of log likelihood evaluated for this point
|
:returns: third derivative of log likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
link_f = self.gp_link.transf(f)
|
inv_link_f = self.gp_link.transf(f)
|
||||||
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata)
|
d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
|
||||||
dlink_df = self.gp_link.dtransf_df(f)
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
|
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
|
||||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
|
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||||
d3link_df3 = self.gp_link.d3transf_df3(f)
|
d3link_df3 = self.gp_link.d3transf_df3(f)
|
||||||
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
||||||
|
|
||||||
|
|
@ -342,10 +342,10 @@ class Likelihood(Parameterized):
|
||||||
TODO: Doc strings
|
TODO: Doc strings
|
||||||
"""
|
"""
|
||||||
if self.size > 0:
|
if self.size > 0:
|
||||||
link_f = self.gp_link.transf(f)
|
inv_link_f = self.gp_link.transf(f)
|
||||||
return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata)
|
return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||||
else:
|
else:
|
||||||
#Is no parameters so return an empty array for its derivatives
|
# There are no parameters so return an empty array for derivatives
|
||||||
return np.zeros([1, 0])
|
return np.zeros([1, 0])
|
||||||
|
|
||||||
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
|
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
|
||||||
|
|
@ -353,12 +353,12 @@ class Likelihood(Parameterized):
|
||||||
TODO: Doc strings
|
TODO: Doc strings
|
||||||
"""
|
"""
|
||||||
if self.size > 0:
|
if self.size > 0:
|
||||||
link_f = self.gp_link.transf(f)
|
inv_link_f = self.gp_link.transf(f)
|
||||||
dlink_df = self.gp_link.dtransf_df(f)
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
|
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||||
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
||||||
else:
|
else:
|
||||||
#Is no parameters so return an empty array for its derivatives
|
# There are no parameters so return an empty array for derivatives
|
||||||
return np.zeros([f.shape[0], 0])
|
return np.zeros([f.shape[0], 0])
|
||||||
|
|
||||||
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
|
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
|
||||||
|
|
@ -366,14 +366,14 @@ class Likelihood(Parameterized):
|
||||||
TODO: Doc strings
|
TODO: Doc strings
|
||||||
"""
|
"""
|
||||||
if self.size > 0:
|
if self.size > 0:
|
||||||
link_f = self.gp_link.transf(f)
|
inv_link_f = self.gp_link.transf(f)
|
||||||
dlink_df = self.gp_link.dtransf_df(f)
|
dlink_df = self.gp_link.dtransf_df(f)
|
||||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||||
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata)
|
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
|
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||||
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
||||||
else:
|
else:
|
||||||
#Is no parameters so return an empty array for its derivatives
|
# There are no parameters so return an empty array for derivatives
|
||||||
return np.zeros([f.shape[0], 0])
|
return np.zeros([f.shape[0], 0])
|
||||||
|
|
||||||
def _laplace_gradients(self, f, y, Y_metadata=None):
|
def _laplace_gradients(self, f, y, Y_metadata=None):
|
||||||
|
|
@ -383,12 +383,9 @@ class Likelihood(Parameterized):
|
||||||
|
|
||||||
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
|
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
|
||||||
# ensure we have gradients for every parameter we want to optimize
|
# ensure we have gradients for every parameter we want to optimize
|
||||||
try:
|
assert len(dlogpdf_dtheta) == self.size #1 x num_param array
|
||||||
assert len(dlogpdf_dtheta) == self.size #1 x num_param array
|
assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix
|
||||||
assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix
|
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
|
||||||
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
|
|
||||||
except Exception as e:
|
|
||||||
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
|
||||||
|
|
||||||
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
||||||
|
|
||||||
|
|
@ -411,7 +408,10 @@ class Likelihood(Parameterized):
|
||||||
#compute the quantiles by sampling!!!
|
#compute the quantiles by sampling!!!
|
||||||
N_samp = 1000
|
N_samp = 1000
|
||||||
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
|
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
|
||||||
|
#ss_f = s.flatten()
|
||||||
|
#ss_y = self.samples(ss_f, Y_metadata)
|
||||||
ss_y = self.samples(s, Y_metadata)
|
ss_y = self.samples(s, Y_metadata)
|
||||||
|
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
|
||||||
|
|
||||||
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
|
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -71,6 +71,7 @@ class Probit(GPTransformation):
|
||||||
|
|
||||||
g(f) = \\Phi^{-1} (mu)
|
g(f) = \\Phi^{-1} (mu)
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def transf(self,f):
|
def transf(self,f):
|
||||||
return std_norm_cdf(f)
|
return std_norm_cdf(f)
|
||||||
|
|
|
||||||
46
GPy/likelihoods/negative_binomial.py
Normal file
46
GPy/likelihoods/negative_binomial.py
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
import sympy as sym
|
||||||
|
sympy_available=True
|
||||||
|
from sympy.utilities.lambdify import lambdify
|
||||||
|
from GPy.util.symbolic import gammaln, ln_cum_gaussian, cum_gaussian
|
||||||
|
except ImportError:
|
||||||
|
sympy_available=False
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
||||||
|
import link_functions
|
||||||
|
from symbolic import Symbolic
|
||||||
|
from scipy import stats
|
||||||
|
|
||||||
|
if sympy_available:
|
||||||
|
class Negative_binomial(Symbolic):
|
||||||
|
"""
|
||||||
|
Negative binomial
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Y takes non zero integer values..
|
||||||
|
link function should have a positive domain, e.g. log (default).
|
||||||
|
|
||||||
|
.. See also::
|
||||||
|
symbolic.py, for the parent class
|
||||||
|
"""
|
||||||
|
def __init__(self, gp_link=None):
|
||||||
|
if gp_link is None:
|
||||||
|
gp_link = link_functions.Log()
|
||||||
|
|
||||||
|
dispersion = sym.Symbol('dispersion', positive=True, real=True)
|
||||||
|
y = sym.Symbol('y', nonnegative=True, integer=True)
|
||||||
|
f = sym.Symbol('f', positive=True, real=True)
|
||||||
|
log_pdf=dispersion*sym.log(dispersion) - (dispersion+y)*sym.log(dispersion+f) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*sym.log(f)
|
||||||
|
super(Negative_binomial, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Negative_binomial')
|
||||||
|
|
||||||
|
# TODO: Check this.
|
||||||
|
self.log_concave = False
|
||||||
|
|
||||||
|
|
@ -26,8 +26,8 @@ class StudentT(Likelihood):
|
||||||
gp_link = link_functions.Identity()
|
gp_link = link_functions.Identity()
|
||||||
|
|
||||||
super(StudentT, self).__init__(gp_link, name='Student_T')
|
super(StudentT, self).__init__(gp_link, name='Student_T')
|
||||||
|
# sigma2 is not a noise parameter, it is a squared scale.
|
||||||
self.sigma2 = Param('t_noise', float(sigma2), Logexp())
|
self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
|
||||||
self.v = Param('deg_free', float(deg_free))
|
self.v = Param('deg_free', float(deg_free))
|
||||||
self.add_parameter(self.sigma2)
|
self.add_parameter(self.sigma2)
|
||||||
self.add_parameter(self.v)
|
self.add_parameter(self.v)
|
||||||
|
|
@ -46,23 +46,23 @@ class StudentT(Likelihood):
|
||||||
self.sigma2.gradient = grads[0]
|
self.sigma2.gradient = grads[0]
|
||||||
self.v.gradient = grads[1]
|
self.v.gradient = grads[1]
|
||||||
|
|
||||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Likelihood function given link(f)
|
Likelihood function given link(f)
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
|
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
|
||||||
|
|
||||||
:param link_f: latent variables link(f)
|
:param inv_link_f: latent variables link(f)
|
||||||
:type link_f: Nx1 array
|
:type inv_link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
:returns: likelihood evaluated for this point
|
:returns: likelihood evaluated for this point
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - inv_link_f
|
||||||
#Careful gamma(big_number) is infinity!
|
#Careful gamma(big_number) is infinity!
|
||||||
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
|
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
|
||||||
/ (np.sqrt(self.v * np.pi * self.sigma2)))
|
/ (np.sqrt(self.v * np.pi * self.sigma2)))
|
||||||
|
|
@ -70,15 +70,15 @@ class StudentT(Likelihood):
|
||||||
)
|
)
|
||||||
return np.prod(objective)
|
return np.prod(objective)
|
||||||
|
|
||||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Log Likelihood Function given link(f)
|
Log Likelihood Function given link(f)
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
|
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
|
||||||
|
|
||||||
:param link_f: latent variables (link(f))
|
:param inv_link_f: latent variables (link(f))
|
||||||
:type link_f: Nx1 array
|
:type inv_link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
|
|
@ -86,11 +86,11 @@ class StudentT(Likelihood):
|
||||||
:rtype: float
|
:rtype: float
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - inv_link_f
|
||||||
#FIXME:
|
#FIXME:
|
||||||
#Why does np.log(1 + (1/self.v)*((y-link_f)**2)/self.sigma2) suppress the divide by zero?!
|
#Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
|
||||||
#But np.log(1 + (1/float(self.v))*((y-link_f)**2)/self.sigma2) throws it correctly
|
#But np.log(1 + (1/float(self.v))*((y-inv_link_f)**2)/self.sigma2) throws it correctly
|
||||||
#print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
|
#print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
|
||||||
objective = (+ gammaln((self.v + 1) * 0.5)
|
objective = (+ gammaln((self.v + 1) * 0.5)
|
||||||
- gammaln(self.v * 0.5)
|
- gammaln(self.v * 0.5)
|
||||||
|
|
@ -99,15 +99,15 @@ class StudentT(Likelihood):
|
||||||
)
|
)
|
||||||
return np.sum(objective)
|
return np.sum(objective)
|
||||||
|
|
||||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
|
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
|
||||||
|
|
||||||
:param link_f: latent variables (f)
|
:param inv_link_f: latent variables (f)
|
||||||
:type link_f: Nx1 array
|
:type inv_link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
|
|
@ -115,12 +115,12 @@ class StudentT(Likelihood):
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - inv_link_f
|
||||||
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Hessian at y, given link(f), w.r.t link(f)
|
Hessian at y, given link(f), w.r.t link(f)
|
||||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||||
|
|
@ -129,8 +129,8 @@ class StudentT(Likelihood):
|
||||||
.. math::
|
.. math::
|
||||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
|
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
|
||||||
|
|
||||||
:param link_f: latent variables link(f)
|
:param inv_link_f: latent variables inv_link(f)
|
||||||
:type link_f: Nx1 array
|
:type inv_link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
|
|
@ -141,90 +141,90 @@ class StudentT(Likelihood):
|
||||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - inv_link_f
|
||||||
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
||||||
return hess
|
return hess
|
||||||
|
|
||||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
|
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
|
||||||
|
|
||||||
:param link_f: latent variables link(f)
|
:param inv_link_f: latent variables link(f)
|
||||||
:type link_f: Nx1 array
|
:type inv_link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
:returns: third derivative of likelihood evaluated at points f
|
:returns: third derivative of likelihood evaluated at points f
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - inv_link_f
|
||||||
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
||||||
((e**2 + self.sigma2*self.v)**3)
|
((e**2 + self.sigma2*self.v)**3)
|
||||||
)
|
)
|
||||||
return d3lik_dlink3
|
return d3lik_dlink3
|
||||||
|
|
||||||
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
|
def dlogpdf_link_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
|
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
|
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
|
||||||
|
|
||||||
:param link_f: latent variables link(f)
|
:param inv_link_f: latent variables link(f)
|
||||||
:type link_f: Nx1 array
|
:type inv_link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||||
:rtype: float
|
:rtype: float
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - inv_link_f
|
||||||
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
||||||
return np.sum(dlogpdf_dvar)
|
return np.sum(dlogpdf_dvar)
|
||||||
|
|
||||||
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
|
def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
|
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
|
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
|
||||||
|
|
||||||
:param link_f: latent variables link_f
|
:param inv_link_f: latent variables inv_link_f
|
||||||
:type link_f: Nx1 array
|
:type inv_link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - inv_link_f
|
||||||
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
||||||
return dlogpdf_dlink_dvar
|
return dlogpdf_dlink_dvar
|
||||||
|
|
||||||
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
|
def d2logpdf_dlink2_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
|
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
|
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
|
||||||
|
|
||||||
:param link_f: latent variables link(f)
|
:param inv_link_f: latent variables link(f)
|
||||||
:type link_f: Nx1 array
|
:type inv_link_f: Nx1 array
|
||||||
:param y: data
|
:param y: data
|
||||||
:type y: Nx1 array
|
:type y: Nx1 array
|
||||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
||||||
:rtype: Nx1 array
|
:rtype: Nx1 array
|
||||||
"""
|
"""
|
||||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
e = y - link_f
|
e = y - inv_link_f
|
||||||
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
||||||
/ ((self.sigma2*self.v + (e**2))**3)
|
/ ((self.sigma2*self.v + (e**2))**3)
|
||||||
)
|
)
|
||||||
|
|
@ -246,11 +246,12 @@ class StudentT(Likelihood):
|
||||||
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
|
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
|
||||||
|
|
||||||
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
||||||
return self.gp_link.transf(mu) # only true in link is monotoci, which it is.
|
# The comment here confuses mean and median.
|
||||||
|
return self.gp_link.transf(mu) # only true if link is monotonic, which it is.
|
||||||
|
|
||||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||||
if self.deg_free <2.:
|
if self.deg_free<=2.:
|
||||||
return np.empty(mu.shape)*np.nan #not defined for small degress fo freedom
|
return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2.
|
||||||
else:
|
else:
|
||||||
return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)
|
return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)
|
||||||
|
|
||||||
|
|
|
||||||
298
GPy/likelihoods/symbolic.py
Normal file
298
GPy/likelihoods/symbolic.py
Normal file
|
|
@ -0,0 +1,298 @@
|
||||||
|
# Copyright (c) 2014 GPy Authors
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import sympy as sym
|
||||||
|
sympy_available=True
|
||||||
|
from sympy.utilities.lambdify import lambdify
|
||||||
|
except ImportError:
|
||||||
|
sympy_available=False
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import link_functions
|
||||||
|
from scipy import stats, integrate
|
||||||
|
from scipy.special import gammaln, gamma, erf, polygamma
|
||||||
|
from GPy.util.functions import cum_gaussian, ln_cum_gaussian
|
||||||
|
from likelihood import Likelihood
|
||||||
|
from ..core.parameterization import Param
|
||||||
|
|
||||||
|
|
||||||
|
if sympy_available:
|
||||||
|
class Symbolic(Likelihood):
|
||||||
|
"""
|
||||||
|
Symbolic likelihood.
|
||||||
|
|
||||||
|
Likelihood where the form of the likelihood is provided by a sympy expression.
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, log_pdf=None, logZ=None, missing_log_pdf=None, gp_link=None, name='symbolic', log_concave=False, param=None, func_modules=[]):
|
||||||
|
|
||||||
|
if gp_link is None:
|
||||||
|
gp_link = link_functions.Identity()
|
||||||
|
|
||||||
|
if log_pdf is None:
|
||||||
|
raise ValueError, "You must provide an argument for the log pdf."
|
||||||
|
|
||||||
|
self.func_modules = func_modules + [{'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma, 'cum_gaussian':cum_gaussian, 'ln_cum_gaussian':ln_cum_gaussian}, 'numpy']
|
||||||
|
|
||||||
|
super(Symbolic, self).__init__(gp_link, name=name)
|
||||||
|
self.missing_data = False
|
||||||
|
self._sym_log_pdf = log_pdf
|
||||||
|
if missing_log_pdf:
|
||||||
|
self.missing_data = True
|
||||||
|
self._sym_missing_log_pdf = missing_log_pdf
|
||||||
|
|
||||||
|
# pull the variable names out of the symbolic pdf
|
||||||
|
sym_vars = [e for e in self._sym_log_pdf.atoms() if e.is_Symbol]
|
||||||
|
self._sym_f = [e for e in sym_vars if e.name=='f']
|
||||||
|
if not self._sym_f:
|
||||||
|
raise ValueError('No variable f in log pdf.')
|
||||||
|
self._sym_y = [e for e in sym_vars if e.name=='y']
|
||||||
|
if not self._sym_y:
|
||||||
|
raise ValueError('No variable y in log pdf.')
|
||||||
|
self._sym_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name)
|
||||||
|
|
||||||
|
theta_names = [theta.name for theta in self._sym_theta]
|
||||||
|
if self.missing_data:
|
||||||
|
# pull the variable names out of missing data
|
||||||
|
sym_vars = [e for e in self._sym_missing_log_pdf.atoms() if e.is_Symbol]
|
||||||
|
sym_f = [e for e in sym_vars if e.name=='f']
|
||||||
|
if not sym_f:
|
||||||
|
raise ValueError('No variable f in missing log pdf.')
|
||||||
|
sym_y = [e for e in sym_vars if e.name=='y']
|
||||||
|
if sym_y:
|
||||||
|
raise ValueError('Data is present in missing data portion of likelihood.')
|
||||||
|
# additional missing data parameters
|
||||||
|
missing_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='missing' or e.name in theta_names)],key=lambda e:e.name)
|
||||||
|
self._sym_theta += missing_theta
|
||||||
|
self._sym_theta = sorted(self._sym_theta, key=lambda e:e.name)
|
||||||
|
|
||||||
|
# These are all the arguments need to compute likelihoods.
|
||||||
|
self.arg_list = self._sym_y + self._sym_f + self._sym_theta
|
||||||
|
|
||||||
|
# these are arguments for computing derivatives.
|
||||||
|
derivative_arguments = self._sym_f + self._sym_theta
|
||||||
|
|
||||||
|
# Do symbolic work to compute derivatives.
|
||||||
|
self._log_pdf_derivatives = {theta.name : sym.diff(self._sym_log_pdf,theta).simplify() for theta in derivative_arguments}
|
||||||
|
self._log_pdf_second_derivatives = {theta.name : sym.diff(self._log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments}
|
||||||
|
self._log_pdf_third_derivatives = {theta.name : sym.diff(self._log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments}
|
||||||
|
|
||||||
|
if self.missing_data:
|
||||||
|
# Do symbolic work to compute derivatives.
|
||||||
|
self._missing_log_pdf_derivatives = {theta.name : sym.diff(self._sym_missing_log_pdf,theta).simplify() for theta in derivative_arguments}
|
||||||
|
self._missing_log_pdf_second_derivatives = {theta.name : sym.diff(self._missing_log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments}
|
||||||
|
self._missing_log_pdf_third_derivatives = {theta.name : sym.diff(self._missing_log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments}
|
||||||
|
|
||||||
|
|
||||||
|
# Add parameters to the model.
|
||||||
|
for theta in self._sym_theta:
|
||||||
|
val = 1.0
|
||||||
|
# TODO: need to decide how to handle user passing values for the se parameter vectors.
|
||||||
|
if param is not None:
|
||||||
|
if param.has_key(theta.name):
|
||||||
|
val = param[theta.name]
|
||||||
|
setattr(self, theta.name, Param(theta.name, val, None))
|
||||||
|
self.add_parameters(getattr(self, theta.name))
|
||||||
|
|
||||||
|
|
||||||
|
# Is there some way to check whether the pdf is log
|
||||||
|
# concave? For the moment, need user to specify.
|
||||||
|
self.log_concave = log_concave
|
||||||
|
|
||||||
|
# initialise code arguments
|
||||||
|
self._arguments = {}
|
||||||
|
|
||||||
|
# generate the code for the pdf and derivatives
|
||||||
|
self._gen_code()
|
||||||
|
|
||||||
|
def _gen_code(self):
|
||||||
|
"""Generate the code from the symbolic parts that will be used for likleihod computation."""
|
||||||
|
# TODO: Check here whether theano is available and set up
|
||||||
|
# functions accordingly.
|
||||||
|
self._log_pdf_function = lambdify(self.arg_list, self._sym_log_pdf, self.func_modules)
|
||||||
|
|
||||||
|
# compute code for derivatives (for implicit likelihood terms
|
||||||
|
# we need up to 3rd derivatives)
|
||||||
|
setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], self.func_modules) for key in self._log_pdf_derivatives.keys()})
|
||||||
|
setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], self.func_modules) for key in self._log_pdf_second_derivatives.keys()})
|
||||||
|
setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], self.func_modules) for key in self._log_pdf_third_derivatives.keys()})
|
||||||
|
|
||||||
|
if self.missing_data:
|
||||||
|
setattr(self, '_missing_first_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_derivatives[key], self.func_modules) for key in self._missing_log_pdf_derivatives.keys()})
|
||||||
|
setattr(self, '_missing_second_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_second_derivatives[key], self.func_modules) for key in self._missing_log_pdf_second_derivatives.keys()})
|
||||||
|
setattr(self, '_missing_third_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_third_derivatives[key], self.func_modules) for key in self._missing_log_pdf_third_derivatives.keys()})
|
||||||
|
|
||||||
|
# TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta
|
||||||
|
|
||||||
|
def parameters_changed(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def update_gradients(self, grads):
|
||||||
|
"""
|
||||||
|
Pull out the gradients, be careful as the order must match the order
|
||||||
|
in which the parameters are added
|
||||||
|
"""
|
||||||
|
# The way the Laplace approximation is run requires the
|
||||||
|
# covariance function to compute the true gradient (because it
|
||||||
|
# is dependent on the mode). This means we actually compute
|
||||||
|
# the gradient outside this object. This function would
|
||||||
|
# normally ask the object to update its gradients internally,
|
||||||
|
# but here it provides them externally, because they are
|
||||||
|
# computed in the inference code. TODO: Thought: How does this
|
||||||
|
# effect EP? Shouldn't this be done by a separate
|
||||||
|
# Laplace-approximation specific call?
|
||||||
|
for grad, theta in zip(grads, self._sym_theta):
|
||||||
|
parameter = getattr(self, theta.name)
|
||||||
|
setattr(parameter, 'gradient', grad)
|
||||||
|
|
||||||
|
def _arguments_update(self, f, y):
|
||||||
|
"""Set up argument lists for the derivatives."""
|
||||||
|
# If we do make use of Theano, then at this point we would
|
||||||
|
# need to do a lot of precomputation to ensure that the
|
||||||
|
# likelihoods and gradients are computed together, then check
|
||||||
|
# for parameter changes before updating.
|
||||||
|
for i, fvar in enumerate(self._sym_f):
|
||||||
|
self._arguments[fvar.name] = f
|
||||||
|
for i, yvar in enumerate(self._sym_y):
|
||||||
|
self._arguments[yvar.name] = y
|
||||||
|
for theta in self._sym_theta:
|
||||||
|
self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
|
||||||
|
|
||||||
|
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||||
|
"""
|
||||||
|
Likelihood function given inverse link of f.
|
||||||
|
|
||||||
|
:param inv_link_f: inverse link of latent variables.
|
||||||
|
:type inv_link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
"""
|
||||||
|
return np.exp(self.logpdf_link(inv_link_f, y, Y_metadata=None))
|
||||||
|
|
||||||
|
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||||
|
"""
|
||||||
|
Log Likelihood Function given inverse link of latent variables.
|
||||||
|
|
||||||
|
:param inv_inv_link_f: latent variables (inverse link of f)
|
||||||
|
:type inv_inv_link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param Y_metadata: Y_metadata
|
||||||
|
:returns: likelihood evaluated for this point
|
||||||
|
:rtype: float
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
|
self._arguments_update(inv_link_f, y)
|
||||||
|
if self.missing_data:
|
||||||
|
ll = np.where(np.isnan(y), self._missing_log_pdf_function(**self._missing_arguments), self._log_pdf_function(**self._arguments))
|
||||||
|
else:
|
||||||
|
ll = np.where(np.isnan(y), 0., self._log_pdf_function(**self._arguments))
|
||||||
|
return np.sum(ll)
|
||||||
|
|
||||||
|
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||||
|
"""
|
||||||
|
Gradient of log likelihood with respect to the inverse link function.
|
||||||
|
|
||||||
|
:param inv_inv_link_f: latent variables (inverse link of f)
|
||||||
|
:type inv_inv_link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param Y_metadata: Y_metadata
|
||||||
|
:returns: gradient of likelihood with respect to each point.
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
|
self._arguments_update(inv_link_f, y)
|
||||||
|
if self.missing_data:
|
||||||
|
return np.where(np.isnan(y), self._missing_first_derivative_code['f'](**self._missing_argments), self._first_derivative_code['f'](**self._argments))
|
||||||
|
else:
|
||||||
|
return np.where(np.isnan(y), 0., self._first_derivative_code['f'](**self._arguments))
|
||||||
|
|
||||||
|
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||||
|
"""
|
||||||
|
Hessian of log likelihood given inverse link of latent variables with respect to that inverse link.
|
||||||
|
i.e. second derivative logpdf at y given inv_link(f_i) and inv_link(f_j) w.r.t inv_link(f_i) and inv_link(f_j).
|
||||||
|
|
||||||
|
|
||||||
|
:param inv_link_f: inverse link of the latent variables.
|
||||||
|
:type inv_link_f: Nx1 array
|
||||||
|
:param y: data
|
||||||
|
:type y: Nx1 array
|
||||||
|
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||||
|
:returns: Diagonal of Hessian matrix (second derivative of likelihood evaluated at points f)
|
||||||
|
:rtype: Nx1 array
|
||||||
|
|
||||||
|
.. Note::
|
||||||
|
Returns diagonal of Hessian, since every where else it is
|
||||||
|
0, as the likelihood factorizes over cases (the
|
||||||
|
distribution for y_i depends only on link(f_i) not on
|
||||||
|
link(f_(j!=i))
|
||||||
|
"""
|
||||||
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
|
self._arguments_update(inv_link_f, y)
|
||||||
|
if self.missing_data:
|
||||||
|
return np.where(np.isnan(y), self._missing_second_derivative_code['f'](**self._missing_argments), self._second_derivative_code['f'](**self._argments))
|
||||||
|
else:
|
||||||
|
return np.where(np.isnan(y), 0., self._second_derivative_code['f'](**self._arguments))
|
||||||
|
|
||||||
|
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||||
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
|
self._arguments_update(inv_link_f, y)
|
||||||
|
if self.missing_data:
|
||||||
|
return np.where(np.isnan(y), self._missing_third_derivative_code['f'](**self._missing_argments), self._third_derivative_code['f'](**self._argments))
|
||||||
|
else:
|
||||||
|
return np.where(np.isnan(y), 0., self._third_derivative_code['f'](**self._arguments))
|
||||||
|
|
||||||
|
def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||||
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
|
self._arguments_update(inv_link_f, y)
|
||||||
|
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
|
||||||
|
for i, theta in enumerate(self._sym_theta):
|
||||||
|
if self.missing_data:
|
||||||
|
g[:, i:i+1] = np.where(np.isnan(y), self._missing_first_derivative_code[theta.name](**self._arguments), self._first_derivative_code[theta.name](**self._arguments))
|
||||||
|
else:
|
||||||
|
g[:, i:i+1] = np.where(np.isnan(y), 0., self._first_derivative_code[theta.name](**self._arguments))
|
||||||
|
return g.sum(0)
|
||||||
|
|
||||||
|
def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||||
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
|
self._arguments_update(inv_link_f, y)
|
||||||
|
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
|
||||||
|
for i, theta in enumerate(self._sym_theta):
|
||||||
|
if self.missing_data:
|
||||||
|
g[:, i:i+1] = np.where(np.isnan(y), self._missing_second_derivative_code[theta.name](**self._arguments), self._second_derivative_code[theta.name](**self._arguments))
|
||||||
|
else:
|
||||||
|
g[:, i:i+1] = np.where(np.isnan(y), 0., self._second_derivative_code[theta.name](**self._arguments))
|
||||||
|
return g
|
||||||
|
|
||||||
|
def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||||
|
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||||
|
self._arguments_update(inv_link_f, y)
|
||||||
|
g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta)))
|
||||||
|
for i, theta in enumerate(self._sym_theta):
|
||||||
|
if self.missing_data:
|
||||||
|
g[:, i:i+1] = np.where(np.isnan(y), self._missing_third_derivative_code[theta.name](**self._arguments), self._third_derivative_code[theta.name](**self._arguments))
|
||||||
|
else:
|
||||||
|
g[:, i:i+1] = np.where(np.isnan(y), 0., self._third_derivative_code[theta.name](**self._arguments))
|
||||||
|
return g
|
||||||
|
|
||||||
|
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def conditional_mean(self, gp):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def conditional_variance(self, gp):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def samples(self, gp, Y_metadata=None):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
@ -27,7 +27,10 @@ class BayesianGPLVM(SparseGP):
|
||||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
|
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
|
||||||
if X == None:
|
if X == None:
|
||||||
from ..util.initialization import initialize_latent
|
from ..util.initialization import initialize_latent
|
||||||
X = initialize_latent(init, input_dim, Y)
|
X, fracs = initialize_latent(init, input_dim, Y)
|
||||||
|
else:
|
||||||
|
fracs = np.ones(input_dim)
|
||||||
|
|
||||||
self.init = init
|
self.init = init
|
||||||
|
|
||||||
if X_variance is None:
|
if X_variance is None:
|
||||||
|
|
@ -39,7 +42,7 @@ class BayesianGPLVM(SparseGP):
|
||||||
assert Z.shape[1] == X.shape[1]
|
assert Z.shape[1] == X.shape[1]
|
||||||
|
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.RBF(input_dim) # + kern.white(input_dim)
|
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
|
||||||
|
|
||||||
if likelihood is None:
|
if likelihood is None:
|
||||||
likelihood = Gaussian()
|
likelihood = Gaussian()
|
||||||
|
|
@ -48,21 +51,17 @@ class BayesianGPLVM(SparseGP):
|
||||||
self.variational_prior = NormalPrior()
|
self.variational_prior = NormalPrior()
|
||||||
X = NormalPosterior(X, X_variance)
|
X = NormalPosterior(X, X_variance)
|
||||||
|
|
||||||
|
if inference_method is None:
|
||||||
|
if np.any(np.isnan(Y)):
|
||||||
|
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
|
||||||
|
inference_method = VarDTCMissingData()
|
||||||
|
else:
|
||||||
|
from ..inference.latent_function_inference.var_dtc import VarDTC
|
||||||
|
inference_method = VarDTC()
|
||||||
|
|
||||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
||||||
self.add_parameter(self.X, index=0)
|
self.add_parameter(self.X, index=0)
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
"""
|
|
||||||
Get the current state of the class,
|
|
||||||
here just all the indices, rest can get recomputed
|
|
||||||
"""
|
|
||||||
return SparseGP._getstate(self) + [self.init]
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
self._const_jitter = None
|
|
||||||
self.init = state.pop()
|
|
||||||
SparseGP._setstate(self, state)
|
|
||||||
|
|
||||||
def set_X_gradients(self, X, X_grad):
|
def set_X_gradients(self, X, X_grad):
|
||||||
"""Set the gradients of the posterior distribution of X in its specific form."""
|
"""Set the gradients of the posterior distribution of X in its specific form."""
|
||||||
X.mean.gradient, X.variance.gradient = X_grad
|
X.mean.gradient, X.variance.gradient = X_grad
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,10 @@
|
||||||
# Copyright (c) 2013, the GPy Authors (see AUTHORS.txt)
|
# Copyright (c) 2013, the GPy Authors (see AUTHORS.txt)
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from ..core import GP
|
from ..core import GP
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
from .. import kern
|
from .. import kern
|
||||||
|
from ..inference.latent_function_inference.expectation_propagation import EP
|
||||||
|
|
||||||
class GPClassification(GP):
|
class GPClassification(GP):
|
||||||
"""
|
"""
|
||||||
|
|
@ -27,4 +27,4 @@ class GPClassification(GP):
|
||||||
|
|
||||||
likelihood = likelihoods.Bernoulli()
|
likelihood = likelihoods.Bernoulli()
|
||||||
|
|
||||||
GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, name='gp_classification')
|
GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=EP(), name='gp_classification')
|
||||||
|
|
|
||||||
|
|
@ -29,8 +29,3 @@ class GPRegression(GP):
|
||||||
|
|
||||||
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata)
|
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata)
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
return GP._getstate(self)
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
return GP._setstate(self, state)
|
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,6 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..util.linalg import PCA
|
|
||||||
from ..core import GP, Param
|
from ..core import GP, Param
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
from .. import util
|
from .. import util
|
||||||
|
|
@ -29,9 +28,11 @@ class GPLVM(GP):
|
||||||
"""
|
"""
|
||||||
if X is None:
|
if X is None:
|
||||||
from ..util.initialization import initialize_latent
|
from ..util.initialization import initialize_latent
|
||||||
X = initialize_latent(init, input_dim, Y)
|
X, fracs = initialize_latent(init, input_dim, Y)
|
||||||
|
else:
|
||||||
|
fracs = np.ones(input_dim)
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.RBF(input_dim, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2))
|
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2))
|
||||||
|
|
||||||
likelihood = Gaussian()
|
likelihood = Gaussian()
|
||||||
|
|
||||||
|
|
@ -43,12 +44,6 @@ class GPLVM(GP):
|
||||||
super(GPLVM, self).parameters_changed()
|
super(GPLVM, self).parameters_changed()
|
||||||
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
|
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
return GP._getstate(self)
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
GP._setstate(self, state)
|
|
||||||
|
|
||||||
def jacobian(self,X):
|
def jacobian(self,X):
|
||||||
target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||||
for i in range(self.output_dim):
|
for i in range(self.output_dim):
|
||||||
|
|
|
||||||
|
|
@ -6,12 +6,12 @@ import itertools
|
||||||
import pylab
|
import pylab
|
||||||
|
|
||||||
from ..core import Model
|
from ..core import Model
|
||||||
from ..util.linalg import PCA
|
|
||||||
from ..kern import Kern
|
from ..kern import Kern
|
||||||
from ..core.parameterization.variational import NormalPosterior, NormalPrior
|
from ..core.parameterization.variational import NormalPosterior, NormalPrior
|
||||||
from ..core.parameterization import Param, Parameterized
|
from ..core.parameterization import Param, Parameterized
|
||||||
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC
|
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
|
from GPy.util.initialization import initialize_latent
|
||||||
|
|
||||||
class MRD(Model):
|
class MRD(Model):
|
||||||
"""
|
"""
|
||||||
|
|
@ -51,27 +51,31 @@ class MRD(Model):
|
||||||
inference_method=None, likelihood=None, name='mrd', Ynames=None):
|
inference_method=None, likelihood=None, name='mrd', Ynames=None):
|
||||||
super(MRD, self).__init__(name)
|
super(MRD, self).__init__(name)
|
||||||
|
|
||||||
# sort out the kernels
|
|
||||||
if kernel is None:
|
|
||||||
from ..kern import RBF
|
|
||||||
self.kern = [RBF(input_dim, ARD=1, name='rbf'.format(i)) for i in range(len(Ylist))]
|
|
||||||
elif isinstance(kernel, Kern):
|
|
||||||
self.kern = [kernel.copy(name='{}'.format(kernel.name, i)) for i in range(len(Ylist))]
|
|
||||||
else:
|
|
||||||
assert len(kernel) == len(Ylist), "need one kernel per output"
|
|
||||||
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
|
|
||||||
self.kern = kernel
|
|
||||||
self.input_dim = input_dim
|
self.input_dim = input_dim
|
||||||
self.num_inducing = num_inducing
|
self.num_inducing = num_inducing
|
||||||
|
|
||||||
self.Ylist = Ylist
|
self.Ylist = Ylist
|
||||||
self._in_init_ = True
|
self._in_init_ = True
|
||||||
X = self._init_X(initx, Ylist)
|
X, fracs = self._init_X(initx, Ylist)
|
||||||
self.Z = Param('inducing inputs', self._init_Z(initz, X))
|
self.Z = Param('inducing inputs', self._init_Z(initz, X))
|
||||||
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
|
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
|
||||||
|
|
||||||
|
# sort out the kernels
|
||||||
|
if kernel is None:
|
||||||
|
from ..kern import RBF
|
||||||
|
self.kern = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))]
|
||||||
|
elif isinstance(kernel, Kern):
|
||||||
|
self.kern = []
|
||||||
|
for i in range(len(Ylist)):
|
||||||
|
k = kernel.copy()
|
||||||
|
self.kern.append(k)
|
||||||
|
else:
|
||||||
|
assert len(kernel) == len(Ylist), "need one kernel per output"
|
||||||
|
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
|
||||||
|
self.kern = kernel
|
||||||
|
|
||||||
if X_variance is None:
|
if X_variance is None:
|
||||||
X_variance = np.random.uniform(0, .2, X.shape)
|
X_variance = np.random.uniform(0.1, 0.2, X.shape)
|
||||||
|
|
||||||
self.variational_prior = NormalPrior()
|
self.variational_prior = NormalPrior()
|
||||||
self.X = NormalPosterior(X, X_variance)
|
self.X = NormalPosterior(X, X_variance)
|
||||||
|
|
@ -107,9 +111,8 @@ class MRD(Model):
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
self._log_marginal_likelihood = 0
|
self._log_marginal_likelihood = 0
|
||||||
self.posteriors = []
|
self.posteriors = []
|
||||||
self.Z.gradient = 0.
|
self.Z.gradient[:] = 0.
|
||||||
self.X.mean.gradient = 0.
|
self.X.gradient[:] = 0.
|
||||||
self.X.variance.gradient = 0.
|
|
||||||
|
|
||||||
for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method):
|
for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method):
|
||||||
posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
|
posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
|
||||||
|
|
@ -147,14 +150,22 @@ class MRD(Model):
|
||||||
if Ylist is None:
|
if Ylist is None:
|
||||||
Ylist = self.Ylist
|
Ylist = self.Ylist
|
||||||
if init in "PCA_concat":
|
if init in "PCA_concat":
|
||||||
X = PCA(np.hstack(Ylist), self.input_dim)[0]
|
X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist))
|
||||||
|
fracs = [fracs]*self.input_dim
|
||||||
elif init in "PCA_single":
|
elif init in "PCA_single":
|
||||||
X = np.zeros((Ylist[0].shape[0], self.input_dim))
|
X = np.zeros((Ylist[0].shape[0], self.input_dim))
|
||||||
|
fracs = []
|
||||||
for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
|
for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
|
||||||
X[:, qs] = PCA(Y, len(qs))[0]
|
x,frcs = initialize_latent('PCA', len(qs), Y)
|
||||||
|
X[:, qs] = x
|
||||||
|
fracs.append(frcs)
|
||||||
else: # init == 'random':
|
else: # init == 'random':
|
||||||
X = np.random.randn(Ylist[0].shape[0], self.input_dim)
|
X = np.random.randn(Ylist[0].shape[0], self.input_dim)
|
||||||
return X
|
fracs = X.var(0)
|
||||||
|
fracs = [fracs]*self.input_dim
|
||||||
|
X -= X.mean()
|
||||||
|
X /= X.std()
|
||||||
|
return X, fracs
|
||||||
|
|
||||||
def _init_Z(self, init="permute", X=None):
|
def _init_Z(self, init="permute", X=None):
|
||||||
if X is None:
|
if X is None:
|
||||||
|
|
|
||||||
|
|
@ -46,11 +46,3 @@ class SparseGPClassification(SparseGP):
|
||||||
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
|
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
|
||||||
self.ensure_default_constraints()
|
self.ensure_default_constraints()
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
return SparseGP._getstate(self)
|
|
||||||
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
return SparseGP._setstate(self, state)
|
|
||||||
|
|
||||||
pass
|
|
||||||
|
|
|
||||||
|
|
@ -51,14 +51,6 @@ class SparseGPRegression(SparseGP):
|
||||||
|
|
||||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC())
|
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC())
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
return SparseGP._getstate(self)
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
return SparseGP._setstate(self, state)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SparseGPRegressionUncertainInput(SparseGP):
|
class SparseGPRegressionUncertainInput(SparseGP):
|
||||||
"""
|
"""
|
||||||
Gaussian Process model for regression with Gaussian variance on the inputs (X_variance)
|
Gaussian Process model for regression with Gaussian variance on the inputs (X_variance)
|
||||||
|
|
|
||||||
|
|
@ -28,14 +28,6 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
|
||||||
SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
|
SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
|
||||||
self.ensure_default_constraints()
|
self.ensure_default_constraints()
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
return SparseGPRegression._getstate(self)
|
|
||||||
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
return SparseGPRegression._setstate(self, state)
|
|
||||||
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
|
return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
|
||||||
+ SparseGPRegression._get_param_names(self))
|
+ SparseGPRegression._get_param_names(self))
|
||||||
|
|
|
||||||
|
|
@ -43,10 +43,3 @@ class SVIGPRegression(SVIGP):
|
||||||
SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
|
SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
|
||||||
self.load_batch()
|
self.load_batch()
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
return GPBase._getstate(self)
|
|
||||||
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
return GPBase._setstate(self, state)
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,14 +30,6 @@ class WarpedGP(GP):
|
||||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||||
self._set_params(self._get_params())
|
self._set_params(self._get_params())
|
||||||
|
|
||||||
def _getstate(self):
|
|
||||||
return GP._getstate(self)
|
|
||||||
|
|
||||||
|
|
||||||
def _setstate(self, state):
|
|
||||||
return GP._setstate(self, state)
|
|
||||||
|
|
||||||
|
|
||||||
def _scale_data(self, Y):
|
def _scale_data(self, Y):
|
||||||
self._Ymax = Y.max()
|
self._Ymax = Y.max()
|
||||||
self._Ymin = Y.min()
|
self._Ymin = Y.min()
|
||||||
|
|
|
||||||
|
|
@ -53,8 +53,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
which_data_rows = slice(None)
|
which_data_rows = slice(None)
|
||||||
if which_data_ycols == 'all':
|
if which_data_ycols == 'all':
|
||||||
which_data_ycols = np.arange(model.output_dim)
|
which_data_ycols = np.arange(model.output_dim)
|
||||||
if len(which_data_ycols)==0:
|
#if len(which_data_ycols)==0:
|
||||||
raise ValueError('No data selected for plotting')
|
#raise ValueError('No data selected for plotting')
|
||||||
if ax is None:
|
if ax is None:
|
||||||
fig = pb.figure(num=fignum)
|
fig = pb.figure(num=fignum)
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,8 @@ class Test(unittest.TestCase):
|
||||||
self.assertListEqual(self.param_index[one].tolist(), [3])
|
self.assertListEqual(self.param_index[one].tolist(), [3])
|
||||||
self.assertListEqual(self.param_index.remove('not in there', [2,3,4]).tolist(), [])
|
self.assertListEqual(self.param_index.remove('not in there', [2,3,4]).tolist(), [])
|
||||||
|
|
||||||
|
self.assertListEqual(self.view.remove('not in there', [2,3,4]).tolist(), [])
|
||||||
|
|
||||||
def test_shift_left(self):
|
def test_shift_left(self):
|
||||||
self.view.shift_left(0, 2)
|
self.view.shift_left(0, 2)
|
||||||
self.assertListEqual(self.param_index[three].tolist(), [2,5])
|
self.assertListEqual(self.param_index[three].tolist(), [2,5])
|
||||||
|
|
@ -82,6 +84,10 @@ class Test(unittest.TestCase):
|
||||||
self.assertEqual(self.param_index.size, 6)
|
self.assertEqual(self.param_index.size, 6)
|
||||||
self.assertEqual(self.view.size, 5)
|
self.assertEqual(self.view.size, 5)
|
||||||
|
|
||||||
|
def test_print(self):
|
||||||
|
print self.param_index
|
||||||
|
print self.view
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
#import sys;sys.argv = ['', 'Test.test_index_view']
|
#import sys;sys.argv = ['', 'Test.test_index_view']
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
@ -5,9 +5,11 @@ import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import GPy
|
import GPy
|
||||||
import sys
|
import sys
|
||||||
|
from GPy.core.parameterization.param import Param
|
||||||
|
|
||||||
verbose = 0
|
verbose = 0
|
||||||
|
|
||||||
|
np.random.seed(50)
|
||||||
|
|
||||||
|
|
||||||
class Kern_check_model(GPy.core.Model):
|
class Kern_check_model(GPy.core.Model):
|
||||||
|
|
@ -29,7 +31,7 @@ class Kern_check_model(GPy.core.Model):
|
||||||
dL_dK = np.ones((X.shape[0], X2.shape[0]))
|
dL_dK = np.ones((X.shape[0], X2.shape[0]))
|
||||||
|
|
||||||
self.kernel = kernel
|
self.kernel = kernel
|
||||||
self.X = GPy.core.parameterization.Param('X',X)
|
self.X = X
|
||||||
self.X2 = X2
|
self.X2 = X2
|
||||||
self.dL_dK = dL_dK
|
self.dL_dK = dL_dK
|
||||||
|
|
||||||
|
|
@ -76,10 +78,11 @@ class Kern_check_dK_dX(Kern_check_model):
|
||||||
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
|
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
|
||||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||||
|
self.X = Param('X',X)
|
||||||
self.add_parameter(self.X)
|
self.add_parameter(self.X)
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
self.X.gradient = self.kernel.gradients_X(self.dL_dK, self.X, self.X2)
|
self.X.gradient[:] = self.kernel.gradients_X(self.dL_dK, self.X, self.X2)
|
||||||
|
|
||||||
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
||||||
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
|
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
|
||||||
|
|
@ -90,7 +93,7 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
||||||
return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
|
return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
self.X.gradient = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X)
|
self.X.gradient[:] = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -126,6 +129,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
||||||
if not result:
|
if not result:
|
||||||
print("Positive definite check failed for " + kern.name + " covariance function.")
|
print("Positive definite check failed for " + kern.name + " covariance function.")
|
||||||
pass_checks = False
|
pass_checks = False
|
||||||
|
assert(result)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
|
|
@ -137,6 +141,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
||||||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
|
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||||
pass_checks = False
|
pass_checks = False
|
||||||
|
assert(result)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
|
|
@ -148,6 +153,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
||||||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
|
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||||
pass_checks = False
|
pass_checks = False
|
||||||
|
assert(result)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
|
|
@ -164,6 +170,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
||||||
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
|
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
|
||||||
pass_checks = False
|
pass_checks = False
|
||||||
|
assert(result)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
|
|
@ -182,6 +189,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
||||||
if not result:
|
if not result:
|
||||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
testmodel.checkgrad(verbose=True)
|
testmodel.checkgrad(verbose=True)
|
||||||
|
import ipdb;ipdb.set_trace()
|
||||||
|
assert(result)
|
||||||
pass_checks = False
|
pass_checks = False
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
@ -201,6 +210,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
||||||
if not result:
|
if not result:
|
||||||
print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
testmodel.checkgrad(verbose=True)
|
testmodel.checkgrad(verbose=True)
|
||||||
|
assert(result)
|
||||||
pass_checks = False
|
pass_checks = False
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
@ -218,6 +228,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
||||||
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||||
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
|
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
|
||||||
pass_checks = False
|
pass_checks = False
|
||||||
|
assert(result)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return pass_checks
|
return pass_checks
|
||||||
|
|
@ -226,7 +237,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
|
||||||
|
|
||||||
class KernelGradientTestsContinuous(unittest.TestCase):
|
class KernelGradientTestsContinuous(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.N, self.D = 100, 5
|
self.N, self.D = 10, 5
|
||||||
self.X = np.random.randn(self.N,self.D)
|
self.X = np.random.randn(self.N,self.D)
|
||||||
self.X2 = np.random.randn(self.N+10,self.D)
|
self.X2 = np.random.randn(self.N+10,self.D)
|
||||||
|
|
||||||
|
|
@ -243,6 +254,17 @@ class KernelGradientTestsContinuous(unittest.TestCase):
|
||||||
k.randomize()
|
k.randomize()
|
||||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||||
|
|
||||||
|
def test_Prod2(self):
|
||||||
|
k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
|
||||||
|
k.randomize()
|
||||||
|
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||||
|
|
||||||
|
def test_Prod3(self):
|
||||||
|
k = GPy.kern.Matern32(2, active_dims=[2,3]) * (GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D))
|
||||||
|
k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
|
||||||
|
k.randomize()
|
||||||
|
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||||
|
|
||||||
def test_Add(self):
|
def test_Add(self):
|
||||||
k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
||||||
k += GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
k += GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
||||||
|
|
@ -276,6 +298,11 @@ class KernelGradientTestsContinuous(unittest.TestCase):
|
||||||
k.randomize()
|
k.randomize()
|
||||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||||
|
|
||||||
|
def test_LinearFull(self):
|
||||||
|
k = GPy.kern.LinearFull(self.D, self.D-1)
|
||||||
|
k.randomize()
|
||||||
|
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||||
|
|
||||||
#TODO: turn off grad checkingwrt X for indexed kernels like coregionalize
|
#TODO: turn off grad checkingwrt X for indexed kernels like coregionalize
|
||||||
# class KernelGradientTestsContinuous1D(unittest.TestCase):
|
# class KernelGradientTestsContinuous1D(unittest.TestCase):
|
||||||
# def setUp(self):
|
# def setUp(self):
|
||||||
|
|
@ -356,25 +383,25 @@ class KernelTestsNonContinuous(unittest.TestCase):
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print "Running unit tests, please be (very) patient..."
|
print "Running unit tests, please be (very) patient..."
|
||||||
#unittest.main()
|
unittest.main()
|
||||||
np.random.seed(0)
|
# np.random.seed(0)
|
||||||
N0 = 3
|
# N0 = 3
|
||||||
N1 = 9
|
# N1 = 9
|
||||||
N2 = 4
|
# N2 = 4
|
||||||
N = N0+N1+N2
|
# N = N0+N1+N2
|
||||||
D = 3
|
# D = 3
|
||||||
X = np.random.randn(N, D+1)
|
# X = np.random.randn(N, D+1)
|
||||||
indices = np.random.random_integers(0, 2, size=N)
|
# indices = np.random.random_integers(0, 2, size=N)
|
||||||
X[indices==0, -1] = 0
|
# X[indices==0, -1] = 0
|
||||||
X[indices==1, -1] = 1
|
# X[indices==1, -1] = 1
|
||||||
X[indices==2, -1] = 2
|
# X[indices==2, -1] = 2
|
||||||
#X = X[X[:, -1].argsort(), :]
|
# #X = X[X[:, -1].argsort(), :]
|
||||||
X2 = np.random.randn((N0+N1)*2, D+1)
|
# X2 = np.random.randn((N0+N1)*2, D+1)
|
||||||
X2[:(N0*2), -1] = 0
|
# X2[:(N0*2), -1] = 0
|
||||||
X2[(N0*2):, -1] = 1
|
# X2[(N0*2):, -1] = 1
|
||||||
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
|
# k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
|
||||||
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
|
# kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
|
||||||
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
|
# assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
|
||||||
k = GPy.kern.RBF(D)
|
# k = GPy.kern.RBF(D)
|
||||||
kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
|
# kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
|
||||||
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
|
# assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
|
||||||
|
|
|
||||||
|
|
@ -112,7 +112,7 @@ class TestNoiseModels(object):
|
||||||
self.f = None
|
self.f = None
|
||||||
self.X = None
|
self.X = None
|
||||||
|
|
||||||
def test_noise_models(self):
|
def test_scale2_models(self):
|
||||||
self.setUp()
|
self.setUp()
|
||||||
|
|
||||||
####################################################
|
####################################################
|
||||||
|
|
@ -150,64 +150,64 @@ class TestNoiseModels(object):
|
||||||
noise_models = {"Student_t_default": {
|
noise_models = {"Student_t_default": {
|
||||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": [".*t_noise"],
|
"names": [".*t_scale2"],
|
||||||
"vals": [self.var],
|
"vals": [self.var],
|
||||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
#"constraints": [("t_noise", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))]
|
#"constraints": [("t_scale2", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
},
|
},
|
||||||
"Student_t_1_var": {
|
"Student_t_1_var": {
|
||||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": [".*t_noise"],
|
"names": [".*t_scale2"],
|
||||||
"vals": [1.0],
|
"vals": [1.0],
|
||||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
},
|
},
|
||||||
"Student_t_small_deg_free": {
|
"Student_t_small_deg_free": {
|
||||||
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": [".*t_noise"],
|
"names": [".*t_scale2"],
|
||||||
"vals": [self.var],
|
"vals": [self.var],
|
||||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
},
|
},
|
||||||
"Student_t_small_var": {
|
"Student_t_small_var": {
|
||||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": [".*t_noise"],
|
"names": [".*t_scale2"],
|
||||||
"vals": [0.001],
|
"vals": [0.001],
|
||||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
},
|
},
|
||||||
"Student_t_large_var": {
|
"Student_t_large_var": {
|
||||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": [".*t_noise"],
|
"names": [".*t_scale2"],
|
||||||
"vals": [10.0],
|
"vals": [10.0],
|
||||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
},
|
},
|
||||||
"Student_t_approx_gauss": {
|
"Student_t_approx_gauss": {
|
||||||
"model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": [".*t_noise"],
|
"names": [".*t_scale2"],
|
||||||
"vals": [self.var],
|
"vals": [self.var],
|
||||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
},
|
},
|
||||||
"Student_t_log": {
|
"Student_t_log": {
|
||||||
"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
|
"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
|
||||||
"grad_params": {
|
"grad_params": {
|
||||||
"names": [".*t_noise"],
|
"names": [".*t_scale2"],
|
||||||
"vals": [self.var],
|
"vals": [self.var],
|
||||||
"constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)]
|
"constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
|
||||||
},
|
},
|
||||||
"laplace": True
|
"laplace": True
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -130,7 +130,14 @@ class MiscTests(unittest.TestCase):
|
||||||
m2.kern[:] = m.kern[''].values()
|
m2.kern[:] = m.kern[''].values()
|
||||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
||||||
|
|
||||||
class GradientTests(unittest.TestCase):
|
def test_model_optimize(self):
|
||||||
|
X = np.random.uniform(-3., 3., (20, 1))
|
||||||
|
Y = np.sin(X) + np.random.randn(20, 1) * 0.05
|
||||||
|
m = GPy.models.GPRegression(X,Y)
|
||||||
|
m.optimize()
|
||||||
|
print m
|
||||||
|
|
||||||
|
class GradientTests(np.testing.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
######################################
|
######################################
|
||||||
# # 1 dimensional example
|
# # 1 dimensional example
|
||||||
|
|
|
||||||
|
|
@ -93,12 +93,12 @@ class Test(unittest.TestCase):
|
||||||
|
|
||||||
def test_set_params(self):
|
def test_set_params(self):
|
||||||
self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet')
|
self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet')
|
||||||
self.par._param_array_[:] = 1
|
self.par.param_array[:] = 1
|
||||||
self.par._trigger_params_changed()
|
self.par._trigger_params_changed()
|
||||||
self.assertEqual(self.par.params_changed_count, 1, 'now params changed')
|
self.assertEqual(self.par.params_changed_count, 1, 'now params changed')
|
||||||
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
|
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
|
||||||
|
|
||||||
self.par._param_array_[:] = 2
|
self.par.param_array[:] = 2
|
||||||
self.par._trigger_params_changed()
|
self.par._trigger_params_changed()
|
||||||
self.assertEqual(self.par.params_changed_count, 2, 'now params changed')
|
self.assertEqual(self.par.params_changed_count, 2, 'now params changed')
|
||||||
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
|
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import unittest
|
||||||
import GPy
|
import GPy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from GPy.core.parameterization.parameter_core import HierarchyError
|
from GPy.core.parameterization.parameter_core import HierarchyError
|
||||||
from GPy.core.parameterization.array_core import ObsAr
|
from GPy.core.parameterization.observable_array import ObsAr
|
||||||
|
|
||||||
class ArrayCoreTest(unittest.TestCase):
|
class ArrayCoreTest(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
|
@ -34,6 +34,7 @@ class ParameterizedTest(unittest.TestCase):
|
||||||
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
|
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
|
||||||
|
|
||||||
self.test1 = GPy.core.Parameterized("test model")
|
self.test1 = GPy.core.Parameterized("test model")
|
||||||
|
self.test1.param = self.param
|
||||||
self.test1.kern = self.rbf+self.white
|
self.test1.kern = self.rbf+self.white
|
||||||
self.test1.add_parameter(self.test1.kern)
|
self.test1.add_parameter(self.test1.kern)
|
||||||
self.test1.add_parameter(self.param, 0)
|
self.test1.add_parameter(self.param, 0)
|
||||||
|
|
@ -58,6 +59,9 @@ class ParameterizedTest(unittest.TestCase):
|
||||||
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
|
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
|
||||||
self.test1.kern.rbf.fix()
|
self.test1.kern.rbf.fix()
|
||||||
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3)
|
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3)
|
||||||
|
self.test1.fix()
|
||||||
|
self.assertTrue(self.test1.is_fixed)
|
||||||
|
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*self.test1.size)
|
||||||
|
|
||||||
def test_remove_parameter(self):
|
def test_remove_parameter(self):
|
||||||
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
|
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
|
||||||
|
|
@ -86,9 +90,9 @@ class ParameterizedTest(unittest.TestCase):
|
||||||
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
|
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
|
||||||
|
|
||||||
def test_remove_parameter_param_array_grad_array(self):
|
def test_remove_parameter_param_array_grad_array(self):
|
||||||
val = self.test1.kern._param_array_.copy()
|
val = self.test1.kern.param_array.copy()
|
||||||
self.test1.kern.remove_parameter(self.white)
|
self.test1.kern.remove_parameter(self.white)
|
||||||
self.assertListEqual(self.test1.kern._param_array_.tolist(), val[:2].tolist())
|
self.assertListEqual(self.test1.kern.param_array.tolist(), val[:2].tolist())
|
||||||
|
|
||||||
def test_add_parameter_already_in_hirarchy(self):
|
def test_add_parameter_already_in_hirarchy(self):
|
||||||
self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])
|
self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])
|
||||||
|
|
|
||||||
207
GPy/testing/pickle_tests.py
Normal file
207
GPy/testing/pickle_tests.py
Normal file
|
|
@ -0,0 +1,207 @@
|
||||||
|
'''
|
||||||
|
Created on 13 Mar 2014
|
||||||
|
|
||||||
|
@author: maxz
|
||||||
|
'''
|
||||||
|
import unittest, itertools
|
||||||
|
import cPickle as pickle
|
||||||
|
import numpy as np
|
||||||
|
from GPy.core.parameterization.index_operations import ParameterIndexOperations,\
|
||||||
|
ParameterIndexOperationsView
|
||||||
|
import tempfile
|
||||||
|
from GPy.core.parameterization.param import Param
|
||||||
|
from GPy.core.parameterization.observable_array import ObsAr
|
||||||
|
from GPy.core.parameterization.priors import Gaussian
|
||||||
|
from GPy.kern._src.rbf import RBF
|
||||||
|
from GPy.kern._src.linear import Linear
|
||||||
|
from GPy.kern._src.static import Bias, White
|
||||||
|
from GPy.examples.dimensionality_reduction import mrd_simulation,\
|
||||||
|
bgplvm_simulation
|
||||||
|
from GPy.examples.regression import toy_rbf_1d_50
|
||||||
|
from GPy.core.parameterization.variational import NormalPosterior
|
||||||
|
from GPy.models.gp_regression import GPRegression
|
||||||
|
|
||||||
|
class ListDictTestCase(unittest.TestCase):
|
||||||
|
def assertListDictEquals(self, d1, d2, msg=None):
|
||||||
|
for k,v in d1.iteritems():
|
||||||
|
self.assertListEqual(list(v), list(d2[k]), msg)
|
||||||
|
def assertArrayListEquals(self, l1, l2):
|
||||||
|
for a1, a2 in itertools.izip(l1,l2):
|
||||||
|
np.testing.assert_array_equal(a1, a2)
|
||||||
|
|
||||||
|
class Test(ListDictTestCase):
|
||||||
|
def test_parameter_index_operations(self):
|
||||||
|
pio = ParameterIndexOperations(dict(test1=np.array([4,3,1,6,4]), test2=np.r_[2:130]))
|
||||||
|
piov = ParameterIndexOperationsView(pio, 20, 250)
|
||||||
|
self.assertListDictEquals(dict(piov.items()), dict(piov.copy().iteritems()))
|
||||||
|
self.assertListDictEquals(dict(pio.iteritems()), dict(pio.copy().items()))
|
||||||
|
|
||||||
|
self.assertArrayListEquals(pio.copy().indices(), pio.indices())
|
||||||
|
self.assertArrayListEquals(piov.copy().indices(), piov.indices())
|
||||||
|
|
||||||
|
with tempfile.TemporaryFile('w+b') as f:
|
||||||
|
pickle.dump(pio, f)
|
||||||
|
f.seek(0)
|
||||||
|
pio2 = pickle.load(f)
|
||||||
|
self.assertListDictEquals(pio._properties, pio2._properties)
|
||||||
|
|
||||||
|
with tempfile.TemporaryFile('w+b') as f:
|
||||||
|
pickle.dump(piov, f)
|
||||||
|
f.seek(0)
|
||||||
|
pio2 = pickle.load(f)
|
||||||
|
self.assertListDictEquals(dict(piov.items()), dict(pio2.iteritems()))
|
||||||
|
|
||||||
|
def test_param(self):
|
||||||
|
param = Param('test', np.arange(4*2).reshape(4,2))
|
||||||
|
param[0].constrain_positive()
|
||||||
|
param[1].fix()
|
||||||
|
param[2].set_prior(Gaussian(0,1))
|
||||||
|
pcopy = param.copy()
|
||||||
|
self.assertListEqual(param.tolist(), pcopy.tolist())
|
||||||
|
self.assertListEqual(str(param).split('\n'), str(pcopy).split('\n'))
|
||||||
|
self.assertIsNot(param, pcopy)
|
||||||
|
with tempfile.TemporaryFile('w+b') as f:
|
||||||
|
pickle.dump(param, f)
|
||||||
|
f.seek(0)
|
||||||
|
pcopy = pickle.load(f)
|
||||||
|
self.assertListEqual(param.tolist(), pcopy.tolist())
|
||||||
|
self.assertSequenceEqual(str(param), str(pcopy))
|
||||||
|
|
||||||
|
def test_observable_array(self):
|
||||||
|
obs = ObsAr(np.arange(4*2).reshape(4,2))
|
||||||
|
pcopy = obs.copy()
|
||||||
|
self.assertListEqual(obs.tolist(), pcopy.tolist())
|
||||||
|
with tempfile.TemporaryFile('w+b') as f:
|
||||||
|
pickle.dump(obs, f)
|
||||||
|
f.seek(0)
|
||||||
|
pcopy = pickle.load(f)
|
||||||
|
self.assertListEqual(obs.tolist(), pcopy.tolist())
|
||||||
|
self.assertSequenceEqual(str(obs), str(pcopy))
|
||||||
|
|
||||||
|
def test_parameterized(self):
|
||||||
|
par = RBF(1, active_dims=[1]) + Linear(2, active_dims=[0,2]) + Bias(3) + White(3)
|
||||||
|
par.gradient = 10
|
||||||
|
par.randomize()
|
||||||
|
pcopy = par.copy()
|
||||||
|
self.assertIsInstance(pcopy.constraints, ParameterIndexOperations)
|
||||||
|
self.assertIsInstance(pcopy.rbf.constraints, ParameterIndexOperationsView)
|
||||||
|
self.assertIs(pcopy.constraints, pcopy.rbf.constraints._param_index_ops)
|
||||||
|
self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops)
|
||||||
|
self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops)
|
||||||
|
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||||
|
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||||
|
self.assertSequenceEqual(str(par), str(pcopy))
|
||||||
|
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||||
|
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||||
|
with tempfile.TemporaryFile('w+b') as f:
|
||||||
|
par.pickle(f)
|
||||||
|
f.seek(0)
|
||||||
|
pcopy = pickle.load(f)
|
||||||
|
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||||
|
pcopy.gradient = 10
|
||||||
|
np.testing.assert_allclose(par.linear.full_gradient, pcopy.linear.full_gradient)
|
||||||
|
np.testing.assert_allclose(pcopy.linear.full_gradient, 10)
|
||||||
|
self.assertSequenceEqual(str(par), str(pcopy))
|
||||||
|
|
||||||
|
def test_model(self):
|
||||||
|
par = toy_rbf_1d_50(optimize=0, plot=0)
|
||||||
|
pcopy = par.copy()
|
||||||
|
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||||
|
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||||
|
self.assertSequenceEqual(str(par), str(pcopy))
|
||||||
|
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||||
|
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||||
|
self.assertTrue(pcopy.checkgrad())
|
||||||
|
self.assert_(np.any(pcopy.gradient!=0.0))
|
||||||
|
with tempfile.TemporaryFile('w+b') as f:
|
||||||
|
par.pickle(f)
|
||||||
|
f.seek(0)
|
||||||
|
pcopy = pickle.load(f)
|
||||||
|
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||||
|
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
|
||||||
|
self.assertSequenceEqual(str(par), str(pcopy))
|
||||||
|
self.assert_(pcopy.checkgrad())
|
||||||
|
|
||||||
|
def test_modelrecreation(self):
|
||||||
|
par = toy_rbf_1d_50(optimize=0, plot=0)
|
||||||
|
pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
|
||||||
|
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||||
|
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||||
|
self.assertSequenceEqual(str(par), str(pcopy))
|
||||||
|
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||||
|
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||||
|
self.assertTrue(pcopy.checkgrad())
|
||||||
|
self.assert_(np.any(pcopy.gradient!=0.0))
|
||||||
|
with tempfile.TemporaryFile('w+b') as f:
|
||||||
|
par.pickle(f)
|
||||||
|
f.seek(0)
|
||||||
|
pcopy = pickle.load(f)
|
||||||
|
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||||
|
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
|
||||||
|
self.assertSequenceEqual(str(par), str(pcopy))
|
||||||
|
self.assert_(pcopy.checkgrad())
|
||||||
|
|
||||||
|
def test_posterior(self):
|
||||||
|
X = np.random.randn(3,5)
|
||||||
|
Xv = np.random.rand(*X.shape)
|
||||||
|
par = NormalPosterior(X,Xv)
|
||||||
|
par.gradient = 10
|
||||||
|
pcopy = par.copy()
|
||||||
|
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||||
|
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||||
|
self.assertSequenceEqual(str(par), str(pcopy))
|
||||||
|
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||||
|
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||||
|
with tempfile.TemporaryFile('w+b') as f:
|
||||||
|
par.pickle(f)
|
||||||
|
f.seek(0)
|
||||||
|
pcopy = pickle.load(f)
|
||||||
|
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||||
|
pcopy.gradient = 10
|
||||||
|
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
|
||||||
|
np.testing.assert_allclose(pcopy.mean.full_gradient, 10)
|
||||||
|
self.assertSequenceEqual(str(par), str(pcopy))
|
||||||
|
|
||||||
|
def test_model_concat(self):
|
||||||
|
par = mrd_simulation(optimize=0, plot=0, plot_sim=0)
|
||||||
|
par.randomize()
|
||||||
|
pcopy = par.copy()
|
||||||
|
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||||
|
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist())
|
||||||
|
self.assertSequenceEqual(str(par), str(pcopy))
|
||||||
|
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||||
|
self.assertIsNot(par.full_gradient, pcopy.full_gradient)
|
||||||
|
self.assertTrue(pcopy.checkgrad())
|
||||||
|
self.assert_(np.any(pcopy.gradient!=0.0))
|
||||||
|
with tempfile.TemporaryFile('w+b') as f:
|
||||||
|
par.pickle(f)
|
||||||
|
f.seek(0)
|
||||||
|
pcopy = pickle.load(f)
|
||||||
|
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||||
|
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient)
|
||||||
|
self.assertSequenceEqual(str(par), str(pcopy))
|
||||||
|
self.assert_(pcopy.checkgrad())
|
||||||
|
|
||||||
|
def _callback(self, what, which):
|
||||||
|
what.count += 1
|
||||||
|
|
||||||
|
def test_add_observer(self):
|
||||||
|
par = toy_rbf_1d_50(optimize=0, plot=0)
|
||||||
|
par.name = "original"
|
||||||
|
par.count = 0
|
||||||
|
par.add_observer(self, self._callback, 1)
|
||||||
|
pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
|
||||||
|
self.assertNotIn(par.observers[0], pcopy.observers)
|
||||||
|
pcopy = par.copy()
|
||||||
|
pcopy.name = "copy"
|
||||||
|
self.assertTrue(par.checkgrad())
|
||||||
|
self.assertTrue(pcopy.checkgrad())
|
||||||
|
self.assertTrue(pcopy.kern.checkgrad())
|
||||||
|
import ipdb;ipdb.set_trace()
|
||||||
|
self.assertIn(par.observers[0], pcopy.observers)
|
||||||
|
self.assertEqual(par.count, 3)
|
||||||
|
self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
#import sys;sys.argv = ['', 'Test.test_parameter_index_operations']
|
||||||
|
unittest.main()
|
||||||
|
|
@ -48,7 +48,7 @@ class Cacher(object):
|
||||||
if k in kw and kw[k] is not None:
|
if k in kw and kw[k] is not None:
|
||||||
return self.operation(*args, **kw)
|
return self.operation(*args, **kw)
|
||||||
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
|
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
|
||||||
#return self.operation(*args)
|
# return self.operation(*args, **kw)
|
||||||
|
|
||||||
#if the result is cached, return the cached computation
|
#if the result is cached, return the cached computation
|
||||||
state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
|
state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
|
||||||
|
|
@ -98,11 +98,20 @@ class Cacher(object):
|
||||||
self.cached_outputs = []
|
self.cached_outputs = []
|
||||||
self.inputs_changed = []
|
self.inputs_changed = []
|
||||||
|
|
||||||
|
def __deepcopy__(self, memo=None):
|
||||||
|
return Cacher(self.operation, self.limit, self.ignore_args, self.force_kwargs)
|
||||||
|
|
||||||
|
def __getstate__(self, memo=None):
|
||||||
|
raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation))
|
||||||
|
|
||||||
|
def __setstate__(self, memo=None):
|
||||||
|
raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def __name__(self):
|
def __name__(self):
|
||||||
return self.operation.__name__
|
return self.operation.__name__
|
||||||
|
|
||||||
from functools import partial
|
from functools import partial, update_wrapper
|
||||||
|
|
||||||
class Cacher_wrap(object):
|
class Cacher_wrap(object):
|
||||||
def __init__(self, f, limit, ignore_args, force_kwargs):
|
def __init__(self, f, limit, ignore_args, force_kwargs):
|
||||||
|
|
@ -110,6 +119,7 @@ class Cacher_wrap(object):
|
||||||
self.ignore_args = ignore_args
|
self.ignore_args = ignore_args
|
||||||
self.force_kwargs = force_kwargs
|
self.force_kwargs = force_kwargs
|
||||||
self.f = f
|
self.f = f
|
||||||
|
update_wrapper(self, self.f)
|
||||||
def __get__(self, obj, objtype=None):
|
def __get__(self, obj, objtype=None):
|
||||||
return partial(self, obj)
|
return partial(self, obj)
|
||||||
def __call__(self, *args, **kwargs):
|
def __call__(self, *args, **kwargs):
|
||||||
|
|
|
||||||
|
|
@ -331,7 +331,7 @@ def football_data(season='1314', data_set='football_data'):
|
||||||
|
|
||||||
# This will be for downloading google trends data.
|
# This will be for downloading google trends data.
|
||||||
def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'):
|
def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'):
|
||||||
"""Data downloaded from Google trends for given query terms."""
|
"""Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations."""
|
||||||
# Inspired by this notebook:
|
# Inspired by this notebook:
|
||||||
# http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb
|
# http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb
|
||||||
|
|
||||||
|
|
|
||||||
18
GPy/util/functions.py
Normal file
18
GPy/util/functions.py
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
import numpy as np
|
||||||
|
from scipy.special import erf, erfcx
|
||||||
|
import sys
|
||||||
|
epsilon = sys.float_info.epsilon
|
||||||
|
lim_val = -np.log(epsilon)
|
||||||
|
|
||||||
|
def cum_gaussian(x):
|
||||||
|
g=0.5*(1+erf(x/np.sqrt(2)))
|
||||||
|
return np.where(g==0, epsilon, np.where(g==1, 1-epsilon, g))
|
||||||
|
|
||||||
|
def ln_cum_gaussian(x):
|
||||||
|
return np.where(x < 0, -.5*x*x + np.log(.5) + np.log(erfcx(-np.sqrt(2)/2*x)), np.log(cum_gaussian(x)))
|
||||||
|
|
||||||
|
def clip_exp(x):
|
||||||
|
if any(x>=lim_val) or any(x<=-lim_val):
|
||||||
|
return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val))
|
||||||
|
else:
|
||||||
|
return np.exp(x)
|
||||||
|
|
@ -5,13 +5,15 @@ Created on 24 Feb 2014
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from linalg import PCA
|
from GPy.util.pca import pca
|
||||||
|
|
||||||
def initialize_latent(init, input_dim, Y):
|
def initialize_latent(init, input_dim, Y):
|
||||||
Xr = np.random.randn(Y.shape[0], input_dim)
|
Xr = np.random.randn(Y.shape[0], input_dim)
|
||||||
if init == 'PCA':
|
if init == 'PCA':
|
||||||
PC = PCA(Y, input_dim)[0]
|
p = pca(Y)
|
||||||
|
PC = p.project(Y, min(input_dim, Y.shape[1]))
|
||||||
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
||||||
else:
|
else:
|
||||||
pass
|
var = Xr.var(0)
|
||||||
return Xr
|
return Xr, var/var.max()
|
||||||
|
return Xr, p.fracs[:input_dim]
|
||||||
|
|
@ -580,26 +580,3 @@ def backsub_both_sides(L, X, transpose='left'):
|
||||||
tmp, _ = dtrtrs(L, X, lower=1, trans=0)
|
tmp, _ = dtrtrs(L, X, lower=1, trans=0)
|
||||||
return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T
|
return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T
|
||||||
|
|
||||||
def PCA(Y, input_dim):
|
|
||||||
"""
|
|
||||||
Principal component analysis: maximum likelihood solution by SVD
|
|
||||||
|
|
||||||
:param Y: NxD np.array of data
|
|
||||||
:param input_dim: int, dimension of projection
|
|
||||||
|
|
||||||
|
|
||||||
:rval X: - Nxinput_dim np.array of dimensionality reduced data
|
|
||||||
:rval W: - input_dimxD mapping from X to Y
|
|
||||||
|
|
||||||
"""
|
|
||||||
if not np.allclose(Y.mean(axis=0), 0.0):
|
|
||||||
print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
|
|
||||||
|
|
||||||
# Y -= Y.mean(axis=0)
|
|
||||||
|
|
||||||
Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False)
|
|
||||||
[X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]]
|
|
||||||
v = X.std(axis=0)
|
|
||||||
X /= v;
|
|
||||||
W *= v;
|
|
||||||
return X, W.T
|
|
||||||
|
|
|
||||||
122
GPy/util/pca.py
Normal file
122
GPy/util/pca.py
Normal file
|
|
@ -0,0 +1,122 @@
|
||||||
|
'''
|
||||||
|
Created on 10 Sep 2012
|
||||||
|
|
||||||
|
@author: Max Zwiessele
|
||||||
|
@copyright: Max Zwiessele 2012
|
||||||
|
'''
|
||||||
|
import numpy
|
||||||
|
import pylab
|
||||||
|
import matplotlib
|
||||||
|
from numpy.linalg.linalg import LinAlgError
|
||||||
|
|
||||||
|
class pca(object):
|
||||||
|
"""
|
||||||
|
pca module with automatic primal/dual determination.
|
||||||
|
"""
|
||||||
|
def __init__(self, X):
|
||||||
|
self.mu = X.mean(0)
|
||||||
|
self.sigma = X.std(0)
|
||||||
|
|
||||||
|
X = self.center(X)
|
||||||
|
|
||||||
|
# self.X = input
|
||||||
|
if X.shape[0] >= X.shape[1]:
|
||||||
|
# print "N >= D: using primal"
|
||||||
|
self.eigvals, self.eigvectors = self._primal_eig(X)
|
||||||
|
else:
|
||||||
|
# print "N < D: using dual"
|
||||||
|
self.eigvals, self.eigvectors = self._dual_eig(X)
|
||||||
|
self.sort = numpy.argsort(self.eigvals)[::-1]
|
||||||
|
self.eigvals = self.eigvals[self.sort]
|
||||||
|
self.eigvectors = self.eigvectors[:, self.sort]
|
||||||
|
self.fracs = self.eigvals / self.eigvals.sum()
|
||||||
|
self.Q = self.eigvals.shape[0]
|
||||||
|
|
||||||
|
def center(self, X):
|
||||||
|
"""
|
||||||
|
Center `X` in pca space.
|
||||||
|
"""
|
||||||
|
X = X - self.mu
|
||||||
|
X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
|
||||||
|
return X
|
||||||
|
|
||||||
|
def _primal_eig(self, X):
|
||||||
|
return numpy.linalg.eigh(numpy.einsum('ji,jk->ik',X,X))
|
||||||
|
|
||||||
|
def _dual_eig(self, X):
|
||||||
|
dual_eigvals, dual_eigvects = numpy.linalg.eigh(numpy.einsum('ij,kj->ik',X,X))
|
||||||
|
relevant_dimensions = numpy.argsort(numpy.abs(dual_eigvals))[-X.shape[1]:]
|
||||||
|
eigvals = dual_eigvals[relevant_dimensions]
|
||||||
|
eigvects = dual_eigvects[:, relevant_dimensions]
|
||||||
|
eigvects = (1. / numpy.sqrt(X.shape[0] * numpy.abs(eigvals))) * X.T.dot(eigvects)
|
||||||
|
eigvects /= numpy.sqrt(numpy.diag(eigvects.T.dot(eigvects)))
|
||||||
|
return eigvals, eigvects
|
||||||
|
|
||||||
|
def project(self, X, Q=None):
|
||||||
|
"""
|
||||||
|
Project X into pca space, defined by the Q highest eigenvalues.
|
||||||
|
Y = X dot V
|
||||||
|
"""
|
||||||
|
if Q is None:
|
||||||
|
Q = self.Q
|
||||||
|
if Q > X.shape[1]:
|
||||||
|
raise IndexError("requested dimension larger then input dimension")
|
||||||
|
X = self.center(X)
|
||||||
|
return X.dot(self.eigvectors[:, :Q])
|
||||||
|
|
||||||
|
def plot_fracs(self, Q=None, ax=None, fignum=None):
|
||||||
|
"""
|
||||||
|
Plot fractions of Eigenvalues sorted in descending order.
|
||||||
|
"""
|
||||||
|
if ax is None:
|
||||||
|
fig = pylab.figure(fignum)
|
||||||
|
ax = fig.add_subplot(111)
|
||||||
|
if Q is None:
|
||||||
|
Q = self.Q
|
||||||
|
ticks = numpy.arange(Q)
|
||||||
|
bar = ax.bar(ticks - .4, self.fracs[:Q])
|
||||||
|
ax.set_xticks(ticks, map(lambda x: r"${}$".format(x), ticks + 1))
|
||||||
|
ax.set_ylabel("Eigenvalue fraction")
|
||||||
|
ax.set_xlabel("PC")
|
||||||
|
ax.set_ylim(0, ax.get_ylim()[1])
|
||||||
|
ax.set_xlim(ticks.min() - .5, ticks.max() + .5)
|
||||||
|
try:
|
||||||
|
pylab.tight_layout()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return bar
|
||||||
|
|
||||||
|
def plot_2d(self, X, labels=None, s=20, marker='o',
|
||||||
|
dimensions=(0, 1), ax=None, colors=None,
|
||||||
|
fignum=None, cmap=matplotlib.cm.jet, # @UndefinedVariable
|
||||||
|
** kwargs):
|
||||||
|
"""
|
||||||
|
Plot dimensions `dimensions` with given labels against each other in
|
||||||
|
PC space. Labels can be any sequence of labels of dimensions X.shape[0].
|
||||||
|
Labels can be drawn with a subsequent call to legend()
|
||||||
|
"""
|
||||||
|
if ax is None:
|
||||||
|
fig = pylab.figure(fignum)
|
||||||
|
ax = fig.add_subplot(111)
|
||||||
|
if labels is None:
|
||||||
|
labels = numpy.zeros(X.shape[0])
|
||||||
|
ulabels = []
|
||||||
|
for lab in labels:
|
||||||
|
if not lab in ulabels:
|
||||||
|
ulabels.append(lab)
|
||||||
|
nlabels = len(ulabels)
|
||||||
|
if colors is None:
|
||||||
|
colors = [cmap(float(i) / nlabels) for i in range(nlabels)]
|
||||||
|
X_ = self.project(X, self.Q)[:,dimensions]
|
||||||
|
kwargs.update(dict(s=s))
|
||||||
|
plots = list()
|
||||||
|
for i, l in enumerate(ulabels):
|
||||||
|
kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)]))
|
||||||
|
plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs))
|
||||||
|
ax.set_xlabel(r"PC$_1$")
|
||||||
|
ax.set_ylabel(r"PC$_2$")
|
||||||
|
try:
|
||||||
|
pylab.tight_layout()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return plots
|
||||||
|
|
@ -1,5 +1,55 @@
|
||||||
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign
|
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma,polygamma
|
||||||
|
|
||||||
|
class gammaln(Function):
|
||||||
|
nargs = 1
|
||||||
|
|
||||||
|
def fdiff(self, argindex=1):
|
||||||
|
x=self.args[0]
|
||||||
|
return polygamma(0, x)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def eval(cls, x):
|
||||||
|
if x.is_Number:
|
||||||
|
return log(gamma(x))
|
||||||
|
|
||||||
|
|
||||||
|
class ln_cum_gaussian(Function):
|
||||||
|
nargs = 1
|
||||||
|
|
||||||
|
def fdiff(self, argindex=1):
|
||||||
|
x = self.args[0]
|
||||||
|
return exp(-ln_cum_gaussian(x) - 0.5*x*x)/sqrt(2*pi)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def eval(cls, x):
|
||||||
|
if x.is_Number:
|
||||||
|
return log(cum_gaussian(x))
|
||||||
|
|
||||||
|
def _eval_is_real(self):
|
||||||
|
return self.args[0].is_real
|
||||||
|
|
||||||
|
class cum_gaussian(Function):
|
||||||
|
nargs = 1
|
||||||
|
def fdiff(self, argindex=1):
|
||||||
|
x = self.args[0]
|
||||||
|
return gaussian(x)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def eval(cls, x):
|
||||||
|
if x.is_Number:
|
||||||
|
return 0.5*(1+erf(sqrt(2)/2*x))
|
||||||
|
|
||||||
|
def _eval_is_real(self):
|
||||||
|
return self.args[0].is_real
|
||||||
|
|
||||||
|
class gaussian(Function):
|
||||||
|
nargs = 1
|
||||||
|
@classmethod
|
||||||
|
def eval(cls, x):
|
||||||
|
return 1/sqrt(2*pi)*exp(-0.5*x*x)
|
||||||
|
|
||||||
|
def _eval_is_real(self):
|
||||||
|
return True
|
||||||
|
|
||||||
class ln_diff_erf(Function):
|
class ln_diff_erf(Function):
|
||||||
nargs = 2
|
nargs = 2
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue