merged last devel

This commit is contained in:
beckdaniel 2016-04-11 11:26:46 +01:00
commit 45f692340a
170 changed files with 39094 additions and 4107 deletions

View file

@ -9,20 +9,16 @@ omit = ./GPy/testing/*.py, travis_tests.py, setup.py, ./GPy/__version__.py
exclude_lines = exclude_lines =
# Have to re-enable the standard pragma # Have to re-enable the standard pragma
pragma: no cover pragma: no cover
verbose
# Don't complain about missing debug-only code: # Don't complain about missing debug-only code:
if self\.debug if self\.debug
# Don't complain if tests don't hit defensive assertion code: # Don't complain if tests don't hit defensive assertion code:
raise AssertionError raise
raise NotImplementedError except
raise NotImplemented
except NotImplementedError
except NotImplemented
except AssertionError
except ImportError
pass pass
Not implemented
# Don't complain if non-runnable code isn't run: # Don't complain if non-runnable code isn't run:
if 0: if 0:
@ -31,4 +27,4 @@ exclude_lines =
# Don't fail on python3 catch clauses: # Don't fail on python3 catch clauses:
python3 python3
ignore_errors = True ignore_errors = True

View file

@ -30,6 +30,8 @@ install:
- source install_retry.sh - source install_retry.sh
- pip install codecov - pip install codecov
- pip install pypandoc - pip install pypandoc
- pip install git+git://github.com/BRML/climin.git
- pip install autograd
- python setup.py develop - python setup.py develop
script: script:
@ -56,8 +58,7 @@ deploy:
password: password:
secure: "vMEOlP7DQhFJ7hQAKtKC5hrJXFl5BkUt4nXdosWWiw//Kg8E+PPLg88XPI2gqIosir9wwgtbSBBbbwCxkM6uxRNMpoNR8Ixyv9fmSXp4rLl7bbBY768W7IRXKIBjpuEy2brQjoT+CwDDSzUkckHvuUjJDNRvUv8ab4P/qYO1LG4=" secure: "vMEOlP7DQhFJ7hQAKtKC5hrJXFl5BkUt4nXdosWWiw//Kg8E+PPLg88XPI2gqIosir9wwgtbSBBbbwCxkM6uxRNMpoNR8Ixyv9fmSXp4rLl7bbBY768W7IRXKIBjpuEy2brQjoT+CwDDSzUkckHvuUjJDNRvUv8ab4P/qYO1LG4="
on: on:
tags: false tags: true
branch: devel branch: deploy
server: https://testpypi.python.org/pypi
distributions: $DIST distributions: $DIST
skip_cleanup: true skip_cleanup: true

View file

@ -1 +1 @@
[GPy Authors](https://github.com/SheffieldML/GPy/graphs/contributors) GPy Authors: https://github.com/SheffieldML/GPy/graphs/contributors

View file

@ -28,16 +28,18 @@ from .core.parameterization import Param, Parameterized, ObsAr, transformations
from .__version__ import __version__ from .__version__ import __version__
from numpy.testing import Tester from numpy.testing import Tester
#@nottest
try: with warnings.catch_warnings():
#Get rid of nose dependency by only ignoring if you have nose installed warnings.simplefilter('ignore')
from nose.tools import nottest try:
@nottest #Get rid of nose dependency by only ignoring if you have nose installed
def tests(verbose=10): from nose.tools import nottest
Tester(testing).test(verbose=verbose) @nottest
except: def tests(verbose=10):
def tests(verbose=10): Tester(testing).test(verbose=verbose)
Tester(testing).test(verbose=verbose) except:
def tests(verbose=10):
Tester(testing).test(verbose=verbose)
def load(file_or_path): def load(file_or_path):
""" """

View file

@ -1 +1 @@
__version__ = "0.9.7" __version__ = "1.0.7"

View file

@ -43,4 +43,4 @@ def randomize(self, rand_gen=None, *args, **kwargs):
Model.randomize = randomize Model.randomize = randomize
Param.randomize = randomize Param.randomize = randomize
Parameterized.randomize = randomize Parameterized.randomize = randomize

View file

@ -212,41 +212,18 @@ class GP(Model):
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*} = N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance} \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
""" """
if kern is None: mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew, pred_var=self._predictive_variable, full_cov=full_cov)
kern = self.kern
Kx = kern.K(self._predictive_variable, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if len(mu.shape)==1:
mu = mu.reshape(-1,1)
if full_cov:
Kxx = kern.K(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
from ..util.linalg import mdot
for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
var = var
else:
Kxx = kern.Kdiag(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
elif self.posterior.woodbury_inv.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]):
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
var = var
#add in the mean function
if self.mean_function is not None: if self.mean_function is not None:
mu += self.mean_function.f(Xnew) mu += self.mean_function.f(Xnew)
return mu, var return mu, var
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None): def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True):
""" """
Predict the function(s) at the new point(s) Xnew. Predict the function(s) at the new point(s) Xnew. This includes the likelihood
variance added to the predicted underlying function (usually referred to as f).
In order to predict without adding in the likelihood give
`include_likelihood=False`, or refer to self.predict_noiseless().
:param Xnew: The points at which to make a prediction :param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray (Nnew x self.input_dim) :type Xnew: np.ndarray (Nnew x self.input_dim)
@ -256,6 +233,8 @@ class GP(Model):
:param Y_metadata: metadata about the predicting point to pass to the likelihood :param Y_metadata: metadata about the predicting point to pass to the likelihood
:param kern: The kernel to use for prediction (defaults to the model :param kern: The kernel to use for prediction (defaults to the model
kern). this is useful for examining e.g. subprocesses. kern). this is useful for examining e.g. subprocesses.
:param bool include_likelihood: Whether or not to add likelihood noise to the predicted underlying latent function f.
:returns: (mean, var): :returns: (mean, var):
mean: posterior mean, a Numpy array, Nnew x self.input_dim mean: posterior mean, a Numpy array, Nnew x self.input_dim
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
@ -270,11 +249,40 @@ class GP(Model):
if self.normalizer is not None: if self.normalizer is not None:
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var) mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
# now push through likelihood if include_likelihood:
if likelihood is None: # now push through likelihood
likelihood = self.likelihood if likelihood is None:
mean, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata) likelihood = self.likelihood
return mean, var mu, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
return mu, var
def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
"""
Convenience function to predict the underlying function of the GP (often
referred to as f) without adding the likelihood variance on the
prediction function.
This is most likely what you want to use for your predictions.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray (Nnew x self.input_dim)
:param full_cov: whether to return the full covariance matrix, or just
the diagonal
:type full_cov: bool
:param Y_metadata: metadata about the predicting point to pass to the likelihood
:param kern: The kernel to use for prediction (defaults to the model
kern). this is useful for examining e.g. subprocesses.
:returns: (mean, var):
mean: posterior mean, a Numpy array, Nnew x self.input_dim
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions.
Note: If you want the predictive quantiles (e.g. 95% confidence interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
"""
return self.predict(Xnew, full_cov, Y_metadata, kern, None, False)
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None, likelihood=None): def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None, likelihood=None):
""" """
@ -395,9 +403,9 @@ class GP(Model):
var_jac = compute_cov_inner(self.posterior.woodbury_inv) var_jac = compute_cov_inner(self.posterior.woodbury_inv)
return mean_jac, var_jac return mean_jac, var_jac
def predict_wishard_embedding(self, Xnew, kern=None, mean=True, covariance=True): def predict_wishart_embedding(self, Xnew, kern=None, mean=True, covariance=True):
""" """
Predict the wishard embedding G of the GP. This is the density of the Predict the wishart embedding G of the GP. This is the density of the
input of the GP defined by the probabilistic function mapping f. input of the GP defined by the probabilistic function mapping f.
G = J_mean.T*J_mean + output_dim*J_cov. G = J_mean.T*J_mean + output_dim*J_cov.
@ -425,15 +433,26 @@ class GP(Model):
G += Sigma G += Sigma
return G return G
def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True): def predict_wishard_embedding(self, Xnew, kern=None, mean=True, covariance=True):
warnings.warn("Wrong naming, use predict_wishart_embedding instead. Will be removed in future versions!", DeprecationWarning)
return self.predict_wishart_embedding(Xnew, kern, mean, covariance)
def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True, dimensions=None):
""" """
Predict the magnification factor as Predict the magnification factor as
sqrt(det(G)) sqrt(det(G))
for each point N in Xnew for each point N in Xnew.
:param bool mean: whether to include the mean of the wishart embedding.
:param bool covariance: whether to include the covariance of the wishart embedding.
:param array-like dimensions: which dimensions of the input space to use [defaults to self.get_most_significant_input_dimensions()[:2]]
""" """
G = self.predict_wishard_embedding(Xnew, kern, mean, covariance) G = self.predict_wishard_embedding(Xnew, kern, mean, covariance)
if dimensions is None:
dimensions = self.get_most_significant_input_dimensions()[:2]
G = G[:, dimensions][:,:,dimensions]
from ..util.linalg import jitchol from ..util.linalg import jitchol
mag = np.empty(Xnew.shape[0]) mag = np.empty(Xnew.shape[0])
for n in range(Xnew.shape[0]): for n in range(Xnew.shape[0]):
@ -513,21 +532,23 @@ class GP(Model):
def get_most_significant_input_dimensions(self, which_indices=None): def get_most_significant_input_dimensions(self, which_indices=None):
return self.kern.get_most_significant_input_dimensions(which_indices) return self.kern.get_most_significant_input_dimensions(which_indices)
def optimize(self, optimizer=None, start=None, **kwargs): def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs):
""" """
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be: kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations :param max_iters: maximum number of function evaluations
:type max_f_eval: int :type max_iters: int
:messages: whether to display during optimisation :messages: whether to display during optimisation
:type messages: bool :type messages: bool
:param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'. :param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'.
:type optimizer: string :type optimizer: string
:param bool ipython_notebook: whether to use ipython notebook widgets or not.
:param bool clear_after_finish: if in ipython notebook, we can clear the widgets after optimization.
""" """
self.inference_method.on_optimization_start() self.inference_method.on_optimization_start()
try: try:
super(GP, self).optimize(optimizer, start, **kwargs) super(GP, self).optimize(optimizer, start, messages, max_iters, ipython_notebook, clear_after_finish, **kwargs)
except KeyboardInterrupt: except KeyboardInterrupt:
print("KeyboardInterrupt caught, calling on_optimization_end() to round things up") print("KeyboardInterrupt caught, calling on_optimization_end() to round things up")
self.inference_method.on_optimization_end() self.inference_method.on_optimization_end()

View file

@ -45,4 +45,4 @@ class Model(ParamzModel, Priorizable):
(including the MAP prior), so we return it here. If your model is not (including the MAP prior), so we return it here. If your model is not
probabilistic, just return your *negative* gradient here! probabilistic, just return your *negative* gradient here!
""" """
return -(self._log_likelihood_gradients() + self._log_prior_gradients()) return -(self._log_likelihood_gradients() + self._log_prior_gradients())

View file

@ -3,7 +3,7 @@
from .param import Param from .param import Param
from .parameterized import Parameterized from .parameterized import Parameterized
from paramz import transformations from . import transformations
from paramz.core import lists_and_dicts, index_operations, observable_array, observable from paramz.core import lists_and_dicts, index_operations, observable_array, observable
from paramz import ties_and_remappings, ObsAr from paramz import ties_and_remappings, ObsAr

View file

@ -7,4 +7,4 @@ from paramz.transformations import __fixed__
import logging, numpy as np import logging, numpy as np
class Param(Param, Priorizable): class Param(Param, Priorizable):
pass pass

View file

@ -49,4 +49,4 @@ class Parameterized(Parameterized, Priorizable):
If you want to operate on all parameters use m[''] to wildcard select all paramters If you want to operate on all parameters use m[''] to wildcard select all paramters
and concatenate them. Printing m[''] will result in printing of all parameters in detail. and concatenate them. Printing m[''] will result in printing of all parameters in detail.
""" """
pass pass

View file

@ -1,4 +1,5 @@
# Copyright (c) 2014, Max Zwiessele, James Hensman # Copyright (c) 2014, Max Zwiessele, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from paramz.transformations import * from paramz.transformations import *
from paramz.transformations import __fixed__

View file

@ -44,7 +44,7 @@ class SparseGP(GP):
#pick a sensible inference method #pick a sensible inference method
if inference_method is None: if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian): if isinstance(likelihood, likelihoods.Gaussian):
inference_method = var_dtc.VarDTC(limit=1) inference_method = var_dtc.VarDTC(limit=3)
else: else:
#inference_method = ?? #inference_method = ??
raise NotImplementedError("what to do what to do?") raise NotImplementedError("what to do what to do?")
@ -113,85 +113,3 @@ class SparseGP(GP):
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
self._Zgrad = self.Z.gradient.copy() self._Zgrad = self.Z.gradient.copy()
def _raw_predict(self, Xnew, full_cov=False, kern=None):
"""
Make a prediction for the latent function values.
For certain inputs we give back a full_cov of shape NxN,
if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of,
we take only the diagonal elements across N.
For uncertain inputs, the SparseGP bound produces cannot predict the full covariance matrix full_cov for now.
The implementation of that will follow. However, for each dimension the
covariance changes, so if full_cov is False (standard), we return the variance
for each dimension [NxD].
"""
if kern is None: kern = self.kern
if not isinstance(Xnew, VariationalPosterior):
# Kx = kern.K(self._predictive_variable, Xnew)
# mu = np.dot(Kx.T, self.posterior.woodbury_vector)
# if full_cov:
# Kxx = kern.K(Xnew)
# if self.posterior.woodbury_inv.ndim == 2:
# var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
# elif self.posterior.woodbury_inv.ndim == 3:
# var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
# for i in range(var.shape[2]):
# var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
# var = var
# else:
# Kxx = kern.Kdiag(Xnew)
# if self.posterior.woodbury_inv.ndim == 2:
# var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
# elif self.posterior.woodbury_inv.ndim == 3:
# var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
# for i in range(var.shape[1]):
# var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
# var = var
# #add in the mean function
# if self.mean_function is not None:
# mu += self.mean_function.f(Xnew)
mu, var = super(SparseGP, self)._raw_predict(Xnew, full_cov, kern)
else:
psi0_star = kern.psi0(self._predictive_variable, Xnew)
psi1_star = kern.psi1(self._predictive_variable, Xnew)
psi2_star = kern.psi2n(self._predictive_variable, Xnew)
la = self.posterior.woodbury_vector
mu = np.dot(psi1_star, la) # TODO: dimensions?
N,M,D = psi0_star.shape[0],psi1_star.shape[1], la.shape[1]
if full_cov:
raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.")
var = np.zeros((Xnew.shape[0], la.shape[1], la.shape[1]))
di = np.diag_indices(la.shape[1])
else:
tmp = psi2_star - psi1_star[:,:,None]*psi1_star[:,None,:]
var = (tmp.reshape(-1,M).dot(la).reshape(N,M,D)*la[None,:,:]).sum(1) + psi0_star[:,None]
if self.posterior.woodbury_inv.ndim==2:
var += -psi2_star.reshape(N,-1).dot(self.posterior.woodbury_inv.flat)[:,None]
else:
var += -psi2_star.reshape(N,-1).dot(self.posterior.woodbury_inv.reshape(-1,D))
assert np.all(var>=-1e-5), "The predicted variance goes negative!: "+str(var)
var = np.clip(var,1e-15,np.inf)
# for i in range(Xnew.shape[0]):
# _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
# psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var))
# tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
#
# var_ = mdot(la.T, tmp, la)
# p0 = psi0_star[i]
# t = np.atleast_3d(self.posterior.woodbury_inv)
# t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2)
#
# if full_cov:
# var_[di] += p0
# var_[di] += -t2
# var[i] = var_
# else:
# var[i] = np.diag(var_)+p0-t2
return mu, var

View file

@ -0,0 +1,26 @@
import GPy
import numpy as np
import matplotlib.pyplot as plt
import GPy.models.state_space_model as SS_model
X = np.linspace(0, 10, 2000)[:, None]
Y = np.sin(X) + np.random.randn(*X.shape)*0.1
kernel1 = GPy.kern.Matern32(X.shape[1])
m1 = GPy.models.GPRegression(X,Y, kernel1)
print m1
m1.optimize(optimizer='bfgs',messages=True)
print m1
kernel2 = GPy.kern.sde_Matern32(X.shape[1])
#m2 = SS_model.StateSpace(X,Y, kernel2)
m2 = GPy.models.StateSpace(X,Y, kernel2)
print m2
m2.optimize(optimizer='bfgs',messages=True)
print m2

View file

@ -1,14 +1,13 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from .posterior import Posterior from .posterior import PosteriorExact as Posterior
from ...util.linalg import pdinv, dpotrs, tdot from ...util.linalg import pdinv, dpotrs, tdot
from ...util import diag from ...util import diag
import numpy as np import numpy as np
from . import LatentFunctionInference from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
class ExactGaussianInference(LatentFunctionInference): class ExactGaussianInference(LatentFunctionInference):
""" """
An object for inference when the likelihood is Gaussian. An object for inference when the likelihood is Gaussian.

View file

@ -2,7 +2,8 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol, dtrtrs, tdot
from GPy.core.parameterization.variational import VariationalPosterior
class Posterior(object): class Posterior(object):
""" """
@ -187,3 +188,85 @@ class Posterior(object):
if self._K_chol is None: if self._K_chol is None:
self._K_chol = jitchol(self._K) self._K_chol = jitchol(self._K)
return self._K_chol return self._K_chol
def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
woodbury_vector = self.woodbury_vector
woodbury_inv = self.woodbury_inv
if not isinstance(Xnew, VariationalPosterior):
Kx = kern.K(pred_var, Xnew)
mu = np.dot(Kx.T, woodbury_vector)
if len(mu.shape)==1:
mu = mu.reshape(-1,1)
if full_cov:
Kxx = kern.K(Xnew)
if woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(woodbury_inv, Kx))
elif woodbury_inv.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],Kxx.shape[1],woodbury_inv.shape[2]))
from ...util.linalg import mdot
for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, woodbury_inv[:, :, i], Kx))
var = var
else:
Kxx = kern.Kdiag(Xnew)
if woodbury_inv.ndim == 2:
var = (Kxx - np.sum(np.dot(woodbury_inv.T, Kx) * Kx, 0))[:,None]
elif woodbury_inv.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],woodbury_inv.shape[2]))
for i in range(var.shape[1]):
var[:, i] = (Kxx - (np.sum(np.dot(woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
var = var
else:
psi0_star = kern.psi0(pred_var, Xnew)
psi1_star = kern.psi1(pred_var, Xnew)
psi2_star = kern.psi2n(pred_var, Xnew)
la = woodbury_vector
mu = np.dot(psi1_star, la) # TODO: dimensions?
N,M,D = psi0_star.shape[0],psi1_star.shape[1], la.shape[1]
if full_cov:
raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.")
var = np.zeros((Xnew.shape[0], la.shape[1], la.shape[1]))
di = np.diag_indices(la.shape[1])
else:
tmp = psi2_star - psi1_star[:,:,None]*psi1_star[:,None,:]
var = (tmp.reshape(-1,M).dot(la).reshape(N,M,D)*la[None,:,:]).sum(1) + psi0_star[:,None]
if woodbury_inv.ndim==2:
var += -psi2_star.reshape(N,-1).dot(woodbury_inv.flat)[:,None]
else:
var += -psi2_star.reshape(N,-1).dot(woodbury_inv.reshape(-1,D))
var = np.clip(var,1e-15,np.inf)
return mu, var
class PosteriorExact(Posterior):
def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
Kx = kern.K(pred_var, Xnew)
mu = np.dot(Kx.T, self.woodbury_vector)
if len(mu.shape)==1:
mu = mu.reshape(-1,1)
if full_cov:
Kxx = kern.K(Xnew)
if self._woodbury_chol.ndim == 2:
tmp = dtrtrs(self._woodbury_chol, Kx)[0]
var = Kxx - tdot(tmp.T)
elif self._woodbury_chol.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],Kxx.shape[1],self._woodbury_chol.shape[2]))
for i in range(var.shape[2]):
tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0]
var[:, :, i] = (Kxx - tdot(tmp.T))
var = var
else:
Kxx = kern.Kdiag(Xnew)
if self._woodbury_chol.ndim == 2:
tmp = dtrtrs(self._woodbury_chol, Kx)[0]
var = (Kxx - np.square(tmp).sum(0))[:,None]
elif self._woodbury_chol.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],self._woodbury_chol.shape[2]))
for i in range(var.shape[1]):
tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0]
var[:, i] = (Kxx - np.square(tmp).sum(0))
var = var
return mu, var

View file

@ -21,7 +21,7 @@ class VarDTC_minibatch(LatentFunctionInference):
""" """
const_jitter = 1e-8 const_jitter = 1e-8
def __init__(self, batchsize=None, limit=1, mpi_comm=None): def __init__(self, batchsize=None, limit=3, mpi_comm=None):
self.batchsize = batchsize self.batchsize = batchsize
self.mpi_comm = mpi_comm self.mpi_comm = mpi_comm

View file

@ -37,16 +37,14 @@ class Metropolis_Hastings(object):
def sample(self, Ntotal=10000, Nburn=1000, Nthin=10, tune=True, tune_throughout=False, tune_interval=400): def sample(self, Ntotal=10000, Nburn=1000, Nthin=10, tune=True, tune_throughout=False, tune_interval=400):
current = self.model.optimizer_array current = self.model.optimizer_array
fcurrent = self.model.log_likelihood() + self.model.log_prior() + \ fcurrent = self.model.log_likelihood() + self.model.log_prior()
self.model._log_det_jacobian()
accepted = np.zeros(Ntotal,dtype=np.bool) accepted = np.zeros(Ntotal,dtype=np.bool)
for it in range(Ntotal): for it in range(Ntotal):
print("sample %d of %d\r"%(it,Ntotal),end="\t") print("sample %d of %d\r"%(it+1,Ntotal),end="")
sys.stdout.flush() sys.stdout.flush()
prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale) prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
self.model.optimizer_array = prop self.model.optimizer_array = prop
fprop = self.model.log_likelihood() + self.model.log_prior() + \ fprop = self.model.log_likelihood() + self.model.log_prior()
self.model._log_det_jacobian()
if fprop>fcurrent:#sample accepted, going 'uphill' if fprop>fcurrent:#sample accepted, going 'uphill'
accepted[it] = True accepted[it] = True

View file

@ -1,5 +1,8 @@
from paramz.optimization import stochastics, Optimizer from paramz.optimization import Optimizer
from . import stochastics
from paramz.optimization import * from paramz.optimization import *
import sys import sys
sys.modules['GPy.inference.optimization.stochastics'] = stochastics sys.modules['GPy.inference.optimization.stochastics'] = stochastics
sys.modules['GPy.inference.optimization.Optimizer'] = Optimizer sys.modules['GPy.inference.optimization.Optimizer'] = Optimizer

View file

@ -0,0 +1,119 @@
#===============================================================================
# Copyright (c) 2015, Max Zwiessele
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of paramax nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#===============================================================================
class StochasticStorage(object):
'''
This is a container for holding the stochastic parameters,
such as subset indices or step length and so on.
self.d has to be a list of lists:
[dimension indices, nan indices for those dimensions]
so that the minibatches can be used as efficiently as possible.
'''
def __init__(self, model):
"""
Initialize this stochastic container using the given model
"""
def do_stochastics(self):
"""
Update the internal state to the next batch of the stochastic
descent algorithm.
"""
pass
def reset(self):
"""
Reset the state of this stochastics generator.
"""
class SparseGPMissing(StochasticStorage):
def __init__(self, model, batchsize=1):
"""
Here we want to loop over all dimensions everytime.
Thus, we can just make sure the loop goes over self.d every
time. We will try to get batches which look the same together
which speeds up calculations significantly.
"""
import numpy as np
self.Y = model.Y_normalized
bdict = {}
#For N > 1000 array2string default crops
opt = np.get_printoptions()
np.set_printoptions(threshold=np.inf)
for d in range(self.Y.shape[1]):
inan = np.isnan(self.Y)[:, d]
arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
np.set_printoptions(**opt)
self.d = bdict.values()
class SparseGPStochastics(StochasticStorage):
"""
For the sparse gp we need to store the dimension we are in,
and the indices corresponding to those
"""
def __init__(self, model, batchsize=1, missing_data=True):
self.batchsize = batchsize
self.output_dim = model.Y.shape[1]
self.Y = model.Y_normalized
self.missing_data = missing_data
self.reset()
self.do_stochastics()
def do_stochastics(self):
import numpy as np
if self.batchsize == 1:
self.current_dim = (self.current_dim+1)%self.output_dim
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
else:
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
bdict = {}
if self.missing_data:
opt = np.get_printoptions()
np.set_printoptions(threshold=np.inf)
for d in self.d:
inan = np.isnan(self.Y[:, d])
arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
np.set_printoptions(**opt)
self.d = bdict.values()
else:
self.d = [[self.d, None]]
def reset(self):
self.current_dim = -1
self.d = None

View file

@ -10,7 +10,7 @@ from .src.add import Add
from .src.prod import Prod from .src.prod import Prod
from .src.rbf import RBF from .src.rbf import RBF
from .src.linear import Linear, LinearFull from .src.linear import Linear, LinearFull
from .src.static import Bias, White, Fixed from .src.static import Bias, White, Fixed, WhiteHeteroscedastic
from .src.brownian import Brownian from .src.brownian import Brownian
from .src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine from .src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from .src.mlp import MLP from .src.mlp import MLP
@ -28,4 +28,12 @@ from .src.trunclinear import TruncLinear,TruncLinear_inf
from .src.splitKern import SplitKern,DEtime from .src.splitKern import SplitKern,DEtime
from .src.splitKern import DEtime as DiffGenomeKern from .src.splitKern import DEtime as DiffGenomeKern
from .src.spline import Spline from .src.spline import Spline
from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
from .src.sde_matern import sde_Matern32
from .src.sde_matern import sde_Matern52
from .src.sde_linear import sde_Linear
from .src.sde_standard_periodic import sde_StdPeriodic
from .src.sde_static import sde_White, sde_Bias
from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad
from .src.sde_brownian import sde_Brownian

View file

@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Brownian motion covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .brownian import Brownian
import numpy as np
class sde_Brownian(Brownian):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sigma^2 min(x,y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((1.0,),(0,)) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,0),) )
Pinf = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
#P0 = Pinf.copy()
P0 = np.zeros((2,2))
#Pinf = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.ones( (1,1,1) )
dPinf = np.zeros((2,2,1))
dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
#dP0 = dPinf.copy()
dP0 = np.zeros((2,2,1))
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Linear covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .linear import Linear
import numpy as np
class sde_Linear(Linear):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
"""
def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
"""
Modify the init method, because one extra parameter is required. X - points
on the X axis.
"""
super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
self.t0 = np.min(X)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variances.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
t0 = float(self.t0)
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((0,),(1.0,)) )
Qc = np.zeros((1,1))
H = np.array( ((1.0,0),) )
Pinf = np.zeros((2,2))
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.zeros( (1,1,1) )
dPinf = np.zeros((2,2,1))
dP0 = np.zeros((2,2,1))
dP0[:,:,0] = P0 / variance
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

135
GPy/kern/_src/sde_matern.py Normal file
View file

@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .stationary import Matern32
from .stationary import Matern52
import numpy as np
class sde_Matern32(Matern32):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 3/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array(((0, 1.0), (-foo**2, -2*foo)))
L = np.array(( (0,), (1.0,) ))
Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),))
H = np.array(((1.0, 0),))
Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2))))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros((2,2))
dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2)))
dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3))
dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance))
dPinfvariance = np.array(((1,0),(0,3./lengthscale**2)))
dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Matern52(Matern52):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
lamda = np.sqrt(5.0)/lengthscale
kappa = 5.0/3.0*variance/lengthscale**2
F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
L = np.array(((0,),(0,),(1,)))
Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
H = np.array(((1,0,0),))
Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty((3,3,2))
dQc = np.empty((1,1,2))
dPinf = np.empty((3,3,2))
# The partial derivatives
dFvariance = np.zeros((3,3))
dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
dPinf_variance = Pinf/variance
kappa2 = -2.0*kappa/lengthscale
dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
0,-100*variance/lengthscale**5)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,178 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .standard_periodic import StdPeriodic
import numpy as np
import scipy as sp
from scipy import special as special
class sde_StdPeriodic(StdPeriodic):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Standard Periodic kernel:
.. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.wavelengths.gradient = gradients[1]
self.lengthscales.gradient = gradients[2]
def sde(self):
"""
Return the state space representation of the covariance.
! Note: one must constrain lengthscale not to drop below 0.25.
After this bessel functions of the first kind grows to very high.
! Note: one must keep wevelength also not very low. Because then
the gradients wrt wavelength become ustable.
However this might depend on the data. For test example with
300 data points the low limit is 0.15.
"""
# Params to use: (in that order)
#self.variance
#self.wavelengths
#self.lengthscales
N = 7 # approximation order
w0 = 2*np.pi/self.wavelengths # frequency
lengthscales = 2*self.lengthscales
[q2,dq2l] = seriescoeff(N,lengthscales,self.variance)
# lengthscale is multiplied by 2 because of slightly different
# formula for periodic covariance function.
# For the same reason:
dq2l = 2*dq2l
if np.any( np.isfinite(q2) == False):
raise ValueError("SDE periodic covariance error 1")
if np.any( np.isfinite(dq2l) == False):
raise ValueError("SDE periodic covariance error 2")
F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
L = np.eye(2*(N+1))
Qc = np.zeros((2*(N+1), 2*(N+1)))
P_inf = np.kron(np.diag(q2),np.eye(2))
H = np.kron(np.ones((1,N+1)),np.array((1,0)) )
P0 = P_inf.copy()
# Derivatives
dF = np.empty((F.shape[0], F.shape[1], 3))
dQc = np.empty((Qc.shape[0], Qc.shape[1], 3))
dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3))
# Derivatives wrt self.variance
dF[:,:,0] = np.zeros(F.shape)
dQc[:,:,0] = np.zeros(Qc.shape)
dP_inf[:,:,0] = P_inf / self.variance
# Derivatives self.wavelengths
dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.wavelengths );
dQc[:,:,1] = np.zeros(Qc.shape)
dP_inf[:,:,1] = np.zeros(P_inf.shape)
# Derivatives self.lengthscales
dF[:,:,2] = np.zeros(F.shape)
dQc[:,:,2] = np.zeros(Qc.shape)
dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
dP0 = dP_inf.copy()
return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
"""
Calculate the coefficients q_j^2 for the covariance function
approximation:
k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
Reference is:
[1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic
covariance functions and state space models. In Proceedings of the
Seventeenth International Conference on Artifcial Intelligence and
Statistics (AISTATS 2014). JMLR: W&CP, volume 33.
Note! Only the infinite approximation (through Bessel function)
is currently implemented.
Input:
----------------
m: int
Degree of approximation. Default 6.
lengthScale: float
Length scale parameter in the kerenl
magnSigma2:float
Multiplier in front of the kernel.
Output:
-----------------
coeffs: array(m+1)
Covariance series coefficients
coeffs_dl: array(m+1)
Derivatives of the coefficients with respect to lengthscale.
"""
if true_covariance:
bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\
sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\
np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64)
M,J = np.meshgrid(range(0,m+1),range(0,m+1))
coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\
(lengthScale**(-2))**J *magnSigma2
coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0)
coeffs = np.sum(coeffs,0)
else:
coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
if np.any( np.isfinite(coeffs) == False):
raise ValueError("sde_standard_periodic: Coefficients are not finite!")
#import pdb; pdb.set_trace()
coeffs[0] = 0.5*coeffs[0]
# Derivatives wrt (lengthScale)
coeffs_dl = np.zeros(m+1)
coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
(-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) )
# The first element
coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\
(2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )
return coeffs, coeffs_dl

101
GPy/kern/_src/sde_static.py Normal file
View file

@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Static covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .static import White
from .static import Bias
import numpy as np
class sde_White(White):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
White kernel:
.. math::
k(x,y) = \alpha*\delta(x-y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((-np.inf,),) )
L = np.array( ((1.0,),) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,),) )
Pinf = np.array( ((variance,),) )
P0 = Pinf.copy()
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dQc[:,:,0] = np.array( ((1.0,),) )
dPinf = np.zeros((1,1,1))
dPinf[:,:,0] = np.array( ((1.0,),) )
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Bias(Bias):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Bias kernel:
.. math::
k(x,y) = \alpha
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((0.0,),))
L = np.array( ((1.0,),))
Qc = np.zeros((1,1))
H = np.array( ((1.0,),))
Pinf = np.zeros((1,1))
P0 = np.array( ((variance,),) )
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dPinf = np.zeros((1,1,1))
dP0 = np.zeros((1,1,1))
dP0[:,:,0] = np.array( ((1.0,),) )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,190 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance several stationary covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .rbf import RBF
from .stationary import Exponential
from .stationary import RatQuad
import numpy as np
import scipy as sp
class sde_RBF(RBF):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Radial Basis Function kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
N = 10# approximation order ( number of terms in exponent series expansion)
roots_rounding_decimals = 6
fn = np.math.factorial(N)
kappa = 1.0/2.0/self.lengthscale**2
Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
pp = sp.poly1d(pp)
roots = sp.roots(pp)
neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
aa = sp.poly1d(neg_real_part_roots, r=True).coeffs
F = np.diag(np.ones((N-1,)),1)
F[-1,:] = -aa[-1:0:-1]
L= np.zeros((N,1))
L[N-1,0] = 1
H = np.zeros((1,N))
H[0,0] = 1
# Infinite covariance:
Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
Pinf = 0.5*(Pinf + Pinf.T)
# Allocating space for derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# Derivatives:
dFvariance = np.zeros(F.shape)
dFlengthscale = np.zeros(F.shape)
dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
dQcvariance = Qc/self.variance
dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
dPinf_variance = Pinf/self.variance
lp = Pinf.shape[0]
coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
coeff[np.mod(coeff,2) != 0] = 0
dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
P0 = Pinf.copy()
dP0 = dPinf.copy()
# Benefits of this are not very sound. Helps only in one case:
# SVD Kalman + RBF kernel
import GPy.models.state_space_main as ssm
(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Exponential(Exponential):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Exponential kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale)
F = np.array(((-1.0/lengthscale,),))
L = np.array(((1.0,),))
Qc = np.array( ((2.0*variance/lengthscale,),) )
H = np.array(((1.0,),))
Pinf = np.array(((variance,),))
P0 = Pinf.copy()
dF = np.zeros((1,1,2));
dQc = np.zeros((1,1,2));
dPinf = np.zeros((1,1,2));
dF[:,:,0] = 0.0
dF[:,:,1] = 1.0/lengthscale**2
dQc[:,:,0] = 2.0/lengthscale
dQc[:,:,1] = -2.0*variance/lengthscale**2
dPinf[:,:,0] = 1.0
dPinf[:,:,1] = 0.0
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_RatQuad(RatQuad):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Rational Quadratic kernel:
.. math::
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde(self):
"""
Return the state space representation of the covariance.
"""
assert False, 'Not Implemented'
# Params to use:
# self.lengthscale
# self.variance
#self.power
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)

View file

@ -162,4 +162,4 @@ class ODE_t(Kern):
self.lengthscale_Yt.gradient = np.sum(dkYdlent*(-0.5*self.lengthscale_Yt**(-2)) * dL_dK) self.lengthscale_Yt.gradient = np.sum(dkYdlent*(-0.5*self.lengthscale_Yt**(-2)) * dL_dK)
self.ubias.gradient = np.sum(dkdubias * dL_dK) self.ubias.gradient = np.sum(dkdubias * dL_dK)

View file

@ -1 +1 @@
from . import psi_comp from . import psi_comp

View file

@ -19,8 +19,8 @@ class Add(CombinationKernel):
if isinstance(kern, Add): if isinstance(kern, Add):
del subkerns[i] del subkerns[i]
for part in kern.parts[::-1]: for part in kern.parts[::-1]:
kern.unlink_parameter(part) #kern.unlink_parameter(part)
subkerns.insert(i, part) subkerns.insert(i, part.copy())
super(Add, self).__init__(subkerns, name) super(Add, self).__init__(subkerns, name)
self._exact_psicomp = self._check_exact_psicomp() self._exact_psicomp = self._check_exact_psicomp()
@ -37,7 +37,7 @@ class Add(CombinationKernel):
else: else:
return False return False
@Cache_this(limit=2, force_kwargs=['which_parts']) @Cache_this(limit=3, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None): def K(self, X, X2=None, which_parts=None):
""" """
Add all kernels together. Add all kernels together.
@ -51,7 +51,7 @@ class Add(CombinationKernel):
which_parts = [which_parts] which_parts = [which_parts]
return reduce(np.add, (p.K(X, X2) for p in which_parts)) return reduce(np.add, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts']) @Cache_this(limit=3, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None): def Kdiag(self, X, which_parts=None):
if which_parts is None: if which_parts is None:
which_parts = self.parts which_parts = self.parts
@ -98,17 +98,17 @@ class Add(CombinationKernel):
[target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts] [target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts]
return target return target
@Cache_this(limit=1, force_kwargs=['which_parts']) @Cache_this(limit=3, force_kwargs=['which_parts'])
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
if not self._exact_psicomp: return Kern.psi0(self,Z,variational_posterior) if not self._exact_psicomp: return Kern.psi0(self,Z,variational_posterior)
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts)) return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=1, force_kwargs=['which_parts']) @Cache_this(limit=3, force_kwargs=['which_parts'])
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
if not self._exact_psicomp: return Kern.psi1(self,Z,variational_posterior) if not self._exact_psicomp: return Kern.psi1(self,Z,variational_posterior)
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts)) return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=1, force_kwargs=['which_parts']) @Cache_this(limit=3, force_kwargs=['which_parts'])
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
if not self._exact_psicomp: return Kern.psi2(self,Z,variational_posterior) if not self._exact_psicomp: return Kern.psi2(self,Z,variational_posterior)
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts)) psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
@ -144,7 +144,7 @@ class Add(CombinationKernel):
raise NotImplementedError("psi2 cannot be computed for this kernel") raise NotImplementedError("psi2 cannot be computed for this kernel")
return psi2 return psi2
@Cache_this(limit=1, force_kwargs=['which_parts']) @Cache_this(limit=3, force_kwargs=['which_parts'])
def psi2n(self, Z, variational_posterior): def psi2n(self, Z, variational_posterior):
if not self._exact_psicomp: return Kern.psi2n(self, Z, variational_posterior) if not self._exact_psicomp: return Kern.psi2n(self, Z, variational_posterior)
psi2 = reduce(np.add, (p.psi2n(Z, variational_posterior) for p in self.parts)) psi2 = reduce(np.add, (p.psi2n(Z, variational_posterior) for p in self.parts))
@ -241,16 +241,20 @@ class Add(CombinationKernel):
[np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))] [np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))]
return target_grads return target_grads
def add(self, other): #def add(self, other):
if isinstance(other, Add): # parts = self.parts
other_params = other.parameters[:] # if 0:#isinstance(other, Add):
for p in other_params: # #other_params = other.parameters[:]
other.unlink_parameter(p) # for p in other.parts[:]:
self.link_parameters(*other_params) # other.unlink_parameter(p)
else: # parts.extend(other.parts)
self.link_parameter(other) # #self.link_parameters(*other_params)
self.input_dim, self._all_dims_active = self.get_input_dim_active_dims(self.parts) #
return self # else:
# #self.link_parameter(other)
# parts.append(other)
# #self.input_dim, self._all_dims_active = self.get_input_dim_active_dims(parts)
# return Add([p for p in parts], self.name)
def input_sensitivity(self, summarize=True): def input_sensitivity(self, summarize=True):
if summarize: if summarize:
@ -259,4 +263,94 @@ class Add(CombinationKernel):
i_s[k._all_dims_active] += k.input_sensitivity(summarize) i_s[k._all_dims_active] += k.input_sensitivity(summarize)
return i_s return i_s
else: else:
return super(Add, self).input_sensitivity(summarize) return super(Add, self).input_sensitivity(summarize)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
part_start_param_index = 0
for p in self.parts:
if not p.is_fixed:
part_param_num = len(p.param_array) # number of parameters in the part
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
part_start_param_index += part_param_num
def sde(self):
"""
Support adding kernels for sde representation
"""
import scipy.linalg as la
F = None
L = None
Qc = None
H = None
Pinf = None
P0 = None
dF = None
dQc = None
dPinf = None
dP0 = None
n = 0
nq = 0
nd = 0
# Assign models
for p in self.parts:
(Ft,Lt,Qct,Ht,Pinft,P0t,dFt,dQct,dPinft,dP0t) = p.sde()
F = la.block_diag(F,Ft) if (F is not None) else Ft
L = la.block_diag(L,Lt) if (L is not None) else Lt
Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
H = np.hstack((H,Ht)) if (H is not None) else Ht
Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t
if dF is not None:
dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
'constant', constant_values=0)
dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt
else:
dF = dFt
if dQc is not None:
dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])),
'constant', constant_values=0)
dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct
else:
dQc = dQct
if dPinf is not None:
dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])),
'constant', constant_values=0)
dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
else:
dPinf = dPinft
if dP0 is not None:
dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])),
'constant', constant_values=0)
dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t
else:
dP0 = dP0t
n += Ft.shape[0]
nq += Qct.shape[0]
nd += dFt.shape[2]
assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed"
assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"
assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed"
return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)

View file

@ -64,7 +64,7 @@ class EQ_ODE2(Kern):
self.W = Param('W', W) self.W = Param('W', W)
self.link_parameters(self.lengthscale, self.C, self.B, self.W) self.link_parameters(self.lengthscale, self.C, self.B, self.W)
@Cache_this(limit=2) @Cache_this(limit=3)
def K(self, X, X2=None): def K(self, X, X2=None):
#This way is not working, indexes are lost after using k._slice_X #This way is not working, indexes are lost after using k._slice_X
#index = np.asarray(X, dtype=np.int) #index = np.asarray(X, dtype=np.int)

View file

@ -56,14 +56,18 @@ class IndependentOutputs(CombinationKernel):
self.single_kern = False self.single_kern = False
self.kern = kernels self.kern = kernels
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name) super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
self.index_dim = index_dim # The combination kernel ALLWAYS puts the extra dimension last.
# Thus, the index dimension of this kernel is always the last dimension
# after slicing. This is why the index_dim is just the last column:
self.index_dim = -1
def K(self,X ,X2=None): def K(self,X ,X2=None):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if X2 is None: if X2 is None:
target = np.zeros((X.shape[0], X.shape[0])) target = np.zeros((X.shape[0], X.shape[0]))
[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)] #[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)]
[[target.__setitem__((s,ss), kern.K(X[s,:]) if s==ss else kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)]
else: else:
slices2 = index_to_slices(X2[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0])) target = np.zeros((X.shape[0], X2.shape[0]))
@ -103,13 +107,10 @@ class IndependentOutputs(CombinationKernel):
target = np.zeros(X.shape) target = np.zeros(X.shape)
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if X2 is None: if X2 is None:
# TODO: make use of index_to_slices
# FIXME: Broken as X is already sliced out
# print("Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!")
values = np.unique(X[:,self.index_dim]) values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values] slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None)) for kern, s in zip(kerns, slices):
for kern, s in zip(kerns, slices)] target[s] += kern.gradients_X(dL_dK[s, :][:, s],X[s], None)
#slices = index_to_slices(X[:,self.index_dim]) #slices = index_to_slices(X[:,self.index_dim])
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s]) #[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
# for s in slices_i] for kern, slices_i in zip(kerns, slices)] # for s in slices_i] for kern, slices_i in zip(kerns, slices)]
@ -121,8 +122,8 @@ class IndependentOutputs(CombinationKernel):
values = np.unique(X[:,self.index_dim]) values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values] slices = [X[:,self.index_dim]==i for i in values]
slices2 = [X2[:,self.index_dim]==i for i in values] slices2 = [X2[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2])) for kern, s, s2 in zip(kerns, slices, slices2):
for kern, s, s2 in zip(kerns, slices, slices2)] target[s] += kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2])
# TODO: make work with index_to_slices # TODO: make work with index_to_slices
#slices = index_to_slices(X[:,self.index_dim]) #slices = index_to_slices(X[:,self.index_dim])
#slices2 = index_to_slices(X2[:,self.index_dim]) #slices2 = index_to_slices(X2[:,self.index_dim])
@ -133,7 +134,9 @@ class IndependentOutputs(CombinationKernel):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
target = np.zeros(X.shape) target = np.zeros(X.shape)
[[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)] for kern, slices_i in zip(kerns, slices):
for s in slices_i:
target[s] += kern.gradients_X_diag(dL_dKdiag[s],X[s])
return target return target
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):

View file

@ -49,10 +49,11 @@ class Kern(Parameterized):
if active_dims is None: if active_dims is None:
active_dims = np.arange(input_dim) active_dims = np.arange(input_dim)
self.active_dims = active_dims self.active_dims = np.asarray(active_dims, np.int_)
self._all_dims_active = np.atleast_1d(active_dims).astype(int)
assert self._all_dims_active.size == self.input_dim, "input_dim={} does not match len(active_dim)={}, _all_dims_active={}".format(self.input_dim, self._all_dims_active.size, self._all_dims_active) self._all_dims_active = np.atleast_1d(self.active_dims).astype(int)
assert self.active_dims.size == self.input_dim, "input_dim={} does not match len(active_dim)={}".format(self.input_dim, self._all_dims_active.size)
self._sliced_X = 0 self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU self.useGPU = self._support_GPU and useGPU
@ -68,9 +69,12 @@ class Kern(Parameterized):
def _effective_input_dim(self): def _effective_input_dim(self):
return np.size(self._all_dims_active) return np.size(self._all_dims_active)
@Cache_this(limit=20) @Cache_this(limit=3)
def _slice_X(self, X): def _slice_X(self, X):
return X[:, self._all_dims_active] try:
return X[:, self._all_dims_active].astype('float')
except:
return X[:, self._all_dims_active]
def K(self, X, X2): def K(self, X, X2):
""" """
@ -296,12 +300,11 @@ class Kern(Parameterized):
return Prod([self, other], name) return Prod([self, other], name)
def _check_input_dim(self, X): def _check_input_dim(self, X):
assert X.shape[1] == self.input_dim, "{} did not specify _all_dims_active and X has wrong shape: X_dim={}, whereas input_dim={}".format(self.name, X.shape[1], self.input_dim) assert X.shape[1] == self.input_dim, "{} did not specify active_dims and X has wrong shape: X_dim={}, whereas input_dim={}".format(self.name, X.shape[1], self.input_dim)
def _check_active_dims(self, X): def _check_active_dims(self, X):
assert X.shape[1] >= len(self._all_dims_active), "At least {} dimensional X needed, X.shape={!s}".format(len(self._all_dims_active), X.shape) assert X.shape[1] >= len(self._all_dims_active), "At least {} dimensional X needed, X.shape={!s}".format(len(self._all_dims_active), X.shape)
class CombinationKernel(Kern): class CombinationKernel(Kern):
""" """
Abstract super class for combination kernels. Abstract super class for combination kernels.
@ -319,10 +322,18 @@ class CombinationKernel(Kern):
:param array-like extra_dims: if needed extra dimensions for the combination kernel to work on :param array-like extra_dims: if needed extra dimensions for the combination kernel to work on
""" """
assert all([isinstance(k, Kern) for k in kernels]) assert all([isinstance(k, Kern) for k in kernels])
extra_dims = np.array(extra_dims, dtype=int) extra_dims = np.asarray(extra_dims, dtype=int)
input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims)
active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), extra_dims)
input_dim = active_dims.size
# initialize the kernel with the full input_dim # initialize the kernel with the full input_dim
super(CombinationKernel, self).__init__(input_dim, active_dims, name) super(CombinationKernel, self).__init__(input_dim, active_dims, name)
effective_input_dim = reduce(max, (k._all_dims_active.max() for k in kernels)) + 1
self._all_dims_active = np.array(np.concatenate((np.arange(effective_input_dim), extra_dims if extra_dims is not None else [])), dtype=int)
self.extra_dims = extra_dims self.extra_dims = extra_dims
self.link_parameters(*kernels) self.link_parameters(*kernels)
@ -330,16 +341,8 @@ class CombinationKernel(Kern):
def parts(self): def parts(self):
return self.parameters return self.parameters
def get_input_dim_active_dims(self, kernels, extra_dims = None): def _set_all_dims_ative(self):
self.active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int)) self._all_dims_active = np.atleast_1d(self.active_dims).astype(int)
#_all_dims_active = np.array(np.concatenate((_all_dims_active, extra_dims if extra_dims is not None else [])), dtype=int)
input_dim = reduce(max, (k._all_dims_active.max() for k in kernels)) + 1
if extra_dims is not None:
input_dim += extra_dims.size
_all_dims_active = np.arange(input_dim)
return input_dim, _all_dims_active
def input_sensitivity(self, summarize=True): def input_sensitivity(self, summarize=True):
""" """

View file

@ -51,7 +51,7 @@ class Linear(Kern):
self.link_parameter(self.variances) self.link_parameter(self.variances)
self.psicomp = PSICOMP_Linear() self.psicomp = PSICOMP_Linear()
@Cache_this(limit=2) @Cache_this(limit=3)
def K(self, X, X2=None): def K(self, X, X2=None):
if self.ARD: if self.ARD:
if X2 is None: if X2 is None:
@ -62,7 +62,7 @@ class Linear(Kern):
else: else:
return self._dot_product(X, X2) * self.variances return self._dot_product(X, X2) * self.variances
@Cache_this(limit=1, ignore_args=(0,)) @Cache_this(limit=3, ignore_args=(0,))
def _dot_product(self, X, X2=None): def _dot_product(self, X, X2=None):
if X2 is None: if X2 is None:
return tdot(X) return tdot(X)

View file

@ -45,7 +45,7 @@ class MLP(Kern):
self.link_parameters(self.variance, self.weight_variance, self.bias_variance) self.link_parameters(self.variance, self.weight_variance, self.bias_variance)
@Cache_this(limit=20, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def K(self, X, X2=None): def K(self, X, X2=None):
if X2 is None: if X2 is None:
X_denom = np.sqrt(self._comp_prod(X)+1.) X_denom = np.sqrt(self._comp_prod(X)+1.)
@ -57,7 +57,7 @@ class MLP(Kern):
XTX = self._comp_prod(X,X2)/X_denom[:,None]/X2_denom[None,:] XTX = self._comp_prod(X,X2)/X_denom[:,None]/X2_denom[None,:]
return self.variance*four_over_tau*np.arcsin(XTX) return self.variance*four_over_tau*np.arcsin(XTX)
@Cache_this(limit=20, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def Kdiag(self, X): def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix for X.""" """Compute the diagonal of the covariance matrix for X."""
X_prod = self._comp_prod(X) X_prod = self._comp_prod(X)
@ -88,14 +88,14 @@ class MLP(Kern):
"""Gradient of diagonal of covariance with respect to X""" """Gradient of diagonal of covariance with respect to X"""
return self._comp_grads_diag(dL_dKdiag, X)[3] return self._comp_grads_diag(dL_dKdiag, X)[3]
@Cache_this(limit=50, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def _comp_prod(self, X, X2=None): def _comp_prod(self, X, X2=None):
if X2 is None: if X2 is None:
return (np.square(X)*self.weight_variance).sum(axis=1)+self.bias_variance return (np.square(X)*self.weight_variance).sum(axis=1)+self.bias_variance
else: else:
return (X*self.weight_variance).dot(X2.T)+self.bias_variance return (X*self.weight_variance).dot(X2.T)+self.bias_variance
@Cache_this(limit=20, ignore_args=(1,)) @Cache_this(limit=3, ignore_args=(1,))
def _comp_grads(self, dL_dK, X, X2=None): def _comp_grads(self, dL_dK, X, X2=None):
var,w,b = self.variance, self.weight_variance, self.bias_variance var,w,b = self.variance, self.weight_variance, self.bias_variance
K = self.K(X, X2) K = self.K(X, X2)
@ -130,7 +130,7 @@ class MLP(Kern):
dX2 = common.T.dot(X)*w-((common*XTX).sum(axis=0)/(X2_prod+1.))[:,None]*X2*w dX2 = common.T.dot(X)*w-((common*XTX).sum(axis=0)/(X2_prod+1.))[:,None]*X2*w
return dvar, dw, db, dX, dX2 return dvar, dw, db, dX, dX2
@Cache_this(limit=20, ignore_args=(1,)) @Cache_this(limit=3, ignore_args=(1,))
def _comp_grads_diag(self, dL_dKdiag, X): def _comp_grads_diag(self, dL_dKdiag, X):
var,w,b = self.variance, self.weight_variance, self.bias_variance var,w,b = self.variance, self.weight_variance, self.bias_variance
K = self.Kdiag(X) K = self.Kdiag(X)

View file

@ -27,7 +27,7 @@ class Poly(Kern):
_, _, B = self._AB(X, X2) _, _, B = self._AB(X, X2)
return B * self.variance return B * self.variance
@Cache_this(limit=2) @Cache_this(limit=3)
def _AB(self, X, X2=None): def _AB(self, X, X2=None):
if X2 is None: if X2 is None:
dot_prod = np.dot(X, X.T) dot_prod = np.dot(X, X.T)

View file

@ -39,7 +39,7 @@ class Prod(CombinationKernel):
kernels.insert(i, part) kernels.insert(i, part)
super(Prod, self).__init__(kernels, name) super(Prod, self).__init__(kernels, name)
@Cache_this(limit=2, force_kwargs=['which_parts']) @Cache_this(limit=3, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None): def K(self, X, X2=None, which_parts=None):
if which_parts is None: if which_parts is None:
which_parts = self.parts which_parts = self.parts
@ -48,7 +48,7 @@ class Prod(CombinationKernel):
which_parts = [which_parts] which_parts = [which_parts]
return reduce(np.multiply, (p.K(X, X2) for p in which_parts)) return reduce(np.multiply, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts']) @Cache_this(limit=3, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None): def Kdiag(self, X, which_parts=None):
if which_parts is None: if which_parts is None:
which_parts = self.parts which_parts = self.parts
@ -105,3 +105,114 @@ class Prod(CombinationKernel):
return i_s return i_s
else: else:
return super(Prod, self).input_sensitivity(summarize) return super(Prod, self).input_sensitivity(summarize)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
part_start_param_index = 0
for p in self.parts:
if not p.is_fixed:
part_param_num = len(p.param_array) # number of parameters in the part
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
part_start_param_index += part_param_num
def sde(self):
"""
"""
F = np.array((0,), ndmin=2)
L = np.array((1,), ndmin=2)
Qc = np.array((1,), ndmin=2)
H = np.array((1,), ndmin=2)
Pinf = np.array((1,), ndmin=2)
P0 = np.array((1,), ndmin=2)
dF = None
dQc = None
dPinf = None
dP0 = None
# Assign models
for p in self.parts:
(Ft,Lt,Qct,Ht,P_inft, P0t, dFt,dQct,dP_inft,dP0t) = p.sde()
# check derivative dimensions ->
number_of_parameters = len(p.param_array)
assert dFt.shape[2] == number_of_parameters, "Dynamic matrix derivative shape is wrong"
assert dQct.shape[2] == number_of_parameters, "Diffusion matrix derivative shape is wrong"
assert dP_inft.shape[2] == number_of_parameters, "Infinite covariance matrix derivative shape is wrong"
# check derivative dimensions <-
# exception for periodic kernel
if (p.name == 'std_periodic'):
Qct = P_inft
dQct = dP_inft
dF = dkron(F,dF,Ft,dFt,'sum')
dQc = dkron(Qc,dQc,Qct,dQct,'prod')
dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
dP0 = dkron(P0,dP0,P0t,dP0t,'prod')
F = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
L = np.kron(L,Lt)
Qc = np.kron(Qc,Qct)
Pinf = np.kron(Pinf,P_inft)
P0 = np.kron(P0,P_inft)
H = np.kron(H,Ht)
return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
def dkron(A,dA,B,dB, operation='prod'):
"""
Function computes the derivative of Kronecker product A*B
(or Kronecker sum A+B).
Input:
-----------------------
A: 2D matrix
Some matrix
dA: 3D (or 2D matrix)
Derivarives of A
B: 2D matrix
Some matrix
dB: 3D (or 2D matrix)
Derivarives of B
operation: str 'prod' or 'sum'
Which operation is considered. If the operation is 'sum' it is assumed
that A and are square matrices.s
Output:
dC: 3D matrix
Derivative of Kronecker product A*B (or Kronecker sum A+B)
"""
if dA is None:
dA_param_num = 0
dA = np.zeros((A.shape[0], A.shape[1],1))
else:
dA_param_num = dA.shape[2]
if dB is None:
dB_param_num = 0
dB = np.zeros((B.shape[0], B.shape[1],1))
else:
dB_param_num = dB.shape[2]
# Space allocation for derivative matrix
dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num + dB_param_num))
for k in range(dA_param_num):
if operation == 'prod':
dC[:,:,k] = np.kron(dA[:,:,k],B);
else:
dC[:,:,k] = np.kron(dA[:,:,k],np.eye( B.shape[0] ))
for k in range(dB_param_num):
if operation == 'prod':
dC[:,:,dA_param_num+k] = np.kron(A,dB[:,:,k])
else:
dC[:,:,dA_param_num+k] = np.kron(np.eye( A.shape[0] ),dB[:,:,k])
return dC

View file

@ -21,7 +21,7 @@ from .gaussherm import PSICOMP_GH
from . import rbf_psi_comp, linear_psi_comp, ssrbf_psi_comp, sslinear_psi_comp from . import rbf_psi_comp, linear_psi_comp, ssrbf_psi_comp, sslinear_psi_comp
class PSICOMP_RBF(PSICOMP): class PSICOMP_RBF(PSICOMP):
@Cache_this(limit=10, ignore_args=(0,)) @Cache_this(limit=3, ignore_args=(0,))
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False): def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
variance, lengthscale = kern.variance, kern.lengthscale variance, lengthscale = kern.variance, kern.lengthscale
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
@ -31,7 +31,7 @@ class PSICOMP_RBF(PSICOMP):
else: else:
raise ValueError("unknown distriubtion received for psi-statistics") raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=10, ignore_args=(0,2,3,4)) @Cache_this(limit=3, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variance, lengthscale = kern.variance, kern.lengthscale variance, lengthscale = kern.variance, kern.lengthscale
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
@ -43,7 +43,7 @@ class PSICOMP_RBF(PSICOMP):
class PSICOMP_Linear(PSICOMP): class PSICOMP_Linear(PSICOMP):
@Cache_this(limit=10, ignore_args=(0,)) @Cache_this(limit=3, ignore_args=(0,))
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False): def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
variances = kern.variances variances = kern.variances
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
@ -53,7 +53,7 @@ class PSICOMP_Linear(PSICOMP):
else: else:
raise ValueError("unknown distriubtion received for psi-statistics") raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=10, ignore_args=(0,2,3,4)) @Cache_this(limit=3, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variances = kern.variances variances = kern.variances
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):

View file

@ -27,7 +27,7 @@ class PSICOMP_GH(PSICOMP):
def _setup_observers(self): def _setup_observers(self):
pass pass
@Cache_this(limit=10, ignore_args=(0,)) @Cache_this(limit=3, ignore_args=(0,))
def comp_K(self, Z, qX): def comp_K(self, Z, qX):
if self.Xs is None or self.Xs.shape != qX.mean.shape: if self.Xs is None or self.Xs.shape != qX.mean.shape:
from paramz import ObsAr from paramz import ObsAr
@ -38,7 +38,7 @@ class PSICOMP_GH(PSICOMP):
self.Xs[i] = self.locs[i]*S_sq+mu self.Xs[i] = self.locs[i]*S_sq+mu
return self.Xs return self.Xs
@Cache_this(limit=10, ignore_args=(0,)) @Cache_this(limit=3, ignore_args=(0,))
def psicomputations(self, kern, Z, qX, return_psi2_n=False): def psicomputations(self, kern, Z, qX, return_psi2_n=False):
mu, S = qX.mean.values, qX.variance.values mu, S = qX.mean.values, qX.variance.values
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
@ -62,7 +62,7 @@ class PSICOMP_GH(PSICOMP):
psi2 += self.weights[i]* tdot(Kfu.T) psi2 += self.weights[i]* tdot(Kfu.T)
return psi0, psi1, psi2 return psi0, psi1, psi2
@Cache_this(limit=10, ignore_args=(0, 2,3,4)) @Cache_this(limit=3, ignore_args=(0, 2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, qX): def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, qX):
mu, S = qX.mean.values, qX.variance.values mu, S = qX.mean.values, qX.variance.values
if self.cache_K: Xs = self.comp_K(Z, qX) if self.cache_K: Xs = self.comp_K(Z, qX)

View file

@ -132,5 +132,5 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
_psi1computations = Cacher(__psi1computations, limit=5) _psi1computations = Cacher(__psi1computations, limit=3)
_psi2computations = Cacher(__psi2computations, limit=5) _psi2computations = Cacher(__psi2computations, limit=3)

View file

@ -5,7 +5,6 @@ The module for psi-statistics for RBF kernel
import numpy as np import numpy as np
from paramz.caching import Cache_this from paramz.caching import Cache_this
from . import PSICOMP_RBF from . import PSICOMP_RBF
from ....util import gpu_init
gpu_code = """ gpu_code = """
// define THREADNUM // define THREADNUM
@ -238,8 +237,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
self.fall_back = PSICOMP_RBF() self.fall_back = PSICOMP_RBF()
from pycuda.compiler import SourceModule from pycuda.compiler import SourceModule
from ....util.gpu_init import initGPU import GPy.util.gpu_init
initGPU()
self.GPU_direct = GPU_direct self.GPU_direct = GPU_direct
self.gpuCache = None self.gpuCache = None
@ -326,7 +324,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
except: except:
return self.fall_back.psicomputations(kern, Z, variational_posterior, return_psi2_n) return self.fall_back.psicomputations(kern, Z, variational_posterior, return_psi2_n)
@Cache_this(limit=10, ignore_args=(0,)) @Cache_this(limit=3, ignore_args=(0,))
def _psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False): def _psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
""" """
Z - MxQ Z - MxQ
@ -371,7 +369,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
except: except:
return self.fall_back.psiDerivativecomputations(kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) return self.fall_back.psiDerivativecomputations(kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
@Cache_this(limit=10, ignore_args=(0,2,3,4)) @Cache_this(limit=3, ignore_args=(0,2,3,4))
def _psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def _psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
# resolve the requirement of dL_dpsi2 to be symmetric # resolve the requirement of dL_dpsi2 to be symmetric
if len(dL_dpsi2.shape)==2: dL_dpsi2 = (dL_dpsi2+dL_dpsi2.T)/2 if len(dL_dpsi2.shape)==2: dL_dpsi2 = (dL_dpsi2+dL_dpsi2.T)/2

View file

@ -88,7 +88,7 @@ try:
return psi0,psi1,psi2,psi2n return psi0,psi1,psi2,psi2n
from GPy.util.caching import Cacher from GPy.util.caching import Cacher
psicomputations = Cacher(_psicomputations, limit=1) psicomputations = Cacher(_psicomputations, limit=3)
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)

View file

@ -7,7 +7,6 @@ import numpy as np
from paramz.caching import Cache_this from paramz.caching import Cache_this
from . import PSICOMP_RBF from . import PSICOMP_RBF
gpu_code = """ gpu_code = """
// define THREADNUM // define THREADNUM
@ -287,8 +286,7 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False): def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
from pycuda.compiler import SourceModule from pycuda.compiler import SourceModule
from ....util.gpu_init import initGPU import GPy.util.gpu_init
initGPU()
self.GPU_direct = GPU_direct self.GPU_direct = GPU_direct
self.gpuCache = None self.gpuCache = None
@ -375,7 +373,7 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
def get_dimensions(self, Z, variational_posterior): def get_dimensions(self, Z, variational_posterior):
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1] return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
@Cache_this(limit=1, ignore_args=(0,)) @Cache_this(limit=3, ignore_args=(0,))
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False): def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
""" """
Z - MxQ Z - MxQ
@ -409,7 +407,7 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
else: else:
return psi0, psi1_gpu.get(), psi2_gpu.get() return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,2,3,4)) @Cache_this(limit=3, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variance, lengthscale = kern.variance, kern.lengthscale variance, lengthscale = kern.variance, kern.lengthscale
from ....util.linalg_gpu import sum_axis from ....util.linalg_gpu import sum_axis

View file

@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Brownian motion covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .brownian import Brownian
import numpy as np
class sde_Brownian(Brownian):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sigma^2 min(x,y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((1.0,),(0,)) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,0),) )
Pinf = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
#P0 = Pinf.copy()
P0 = np.zeros((2,2))
#Pinf = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.ones( (1,1,1) )
dPinf = np.zeros((2,2,1))
dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
#dP0 = dPinf.copy()
dP0 = np.zeros((2,2,1))
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Linear covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .linear import Linear
import numpy as np
class sde_Linear(Linear):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
"""
def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
"""
Modify the init method, because one extra parameter is required. X - points
on the X axis.
"""
super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
self.t0 = np.min(X)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variances.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
t0 = float(self.t0)
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((0,),(1.0,)) )
Qc = np.zeros((1,1))
H = np.array( ((1.0,0),) )
Pinf = np.zeros((2,2))
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.zeros( (1,1,1) )
dPinf = np.zeros((2,2,1))
dP0 = np.zeros((2,2,1))
dP0[:,:,0] = P0 / variance
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

137
GPy/kern/src/sde_matern.py Normal file
View file

@ -0,0 +1,137 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .stationary import Matern32
from .stationary import Matern52
import numpy as np
class sde_Matern32(Matern32):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 3/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array(((0, 1.0), (-foo**2, -2*foo)))
L = np.array(( (0,), (1.0,) ))
Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),))
H = np.array(((1.0, 0),))
Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2))))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros((2,2))
dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2)))
dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3))
dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance))
dPinfvariance = np.array(((1,0),(0,3./lengthscale**2)))
dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Matern52(Matern52):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
lamda = np.sqrt(5.0)/lengthscale
kappa = 5.0/3.0*variance/lengthscale**2
F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
L = np.array(((0,),(0,),(1,)))
Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
H = np.array(((1,0,0),))
Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty((3,3,2))
dQc = np.empty((1,1,2))
dPinf = np.empty((3,3,2))
# The partial derivatives
dFvariance = np.zeros((3,3))
dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
dPinf_variance = Pinf/variance
kappa2 = -2.0*kappa/lengthscale
dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
0,-100*variance/lengthscale**5)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,180 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .standard_periodic import StdPeriodic
import numpy as np
import scipy as sp
from scipy import special as special
class sde_StdPeriodic(StdPeriodic):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Standard Periodic kernel:
.. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.period.gradient = gradients[1]
self.lengthscale.gradient = gradients[2]
def sde(self):
"""
Return the state space representation of the covariance.
! Note: one must constrain lengthscale not to drop below 0.25.
After this bessel functions of the first kind grows to very high.
! Note: one must keep wevelength also not very low. Because then
the gradients wrt wavelength become ustable.
However this might depend on the data. For test example with
300 data points the low limit is 0.15.
"""
# Params to use: (in that order)
#self.variance
#self.period
#self.lengthscale
N = 7 # approximation order
w0 = 2*np.pi/self.period # frequency
lengthscale = 2*self.lengthscale
[q2,dq2l] = seriescoeff(N,lengthscale,self.variance)
# lengthscale is multiplied by 2 because of slightly different
# formula for periodic covariance function.
# For the same reason:
dq2l = 2*dq2l
if np.any( np.isfinite(q2) == False):
raise ValueError("SDE periodic covariance error 1")
if np.any( np.isfinite(dq2l) == False):
raise ValueError("SDE periodic covariance error 2")
F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
L = np.eye(2*(N+1))
Qc = np.zeros((2*(N+1), 2*(N+1)))
P_inf = np.kron(np.diag(q2),np.eye(2))
H = np.kron(np.ones((1,N+1)),np.array((1,0)) )
P0 = P_inf.copy()
# Derivatives
dF = np.empty((F.shape[0], F.shape[1], 3))
dQc = np.empty((Qc.shape[0], Qc.shape[1], 3))
dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3))
# Derivatives wrt self.variance
dF[:,:,0] = np.zeros(F.shape)
dQc[:,:,0] = np.zeros(Qc.shape)
dP_inf[:,:,0] = P_inf / self.variance
# Derivatives self.period
dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.period );
dQc[:,:,1] = np.zeros(Qc.shape)
dP_inf[:,:,1] = np.zeros(P_inf.shape)
# Derivatives self.lengthscales
dF[:,:,2] = np.zeros(F.shape)
dQc[:,:,2] = np.zeros(Qc.shape)
dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
dP0 = dP_inf.copy()
return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
"""
Calculate the coefficients q_j^2 for the covariance function
approximation:
k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
Reference is:
[1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic
covariance functions and state space models. In Proceedings of the
Seventeenth International Conference on Artifcial Intelligence and
Statistics (AISTATS 2014). JMLR: W&CP, volume 33.
Note! Only the infinite approximation (through Bessel function)
is currently implemented.
Input:
----------------
m: int
Degree of approximation. Default 6.
lengthScale: float
Length scale parameter in the kerenl
magnSigma2:float
Multiplier in front of the kernel.
Output:
-----------------
coeffs: array(m+1)
Covariance series coefficients
coeffs_dl: array(m+1)
Derivatives of the coefficients with respect to lengthscale.
"""
if true_covariance:
bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\
sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\
np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64)
M,J = np.meshgrid(range(0,m+1),range(0,m+1))
coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\
(lengthScale**(-2))**J *magnSigma2
coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0)
coeffs = np.sum(coeffs,0)
else:
coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
if np.any( np.isfinite(coeffs) == False):
raise ValueError("sde_standard_periodic: Coefficients are not finite!")
#import pdb; pdb.set_trace()
coeffs[0] = 0.5*coeffs[0]
# Derivatives wrt (lengthScale)
coeffs_dl = np.zeros(m+1)
coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
(-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) )
# The first element
coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\
(2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )
return coeffs, coeffs_dl

103
GPy/kern/src/sde_static.py Normal file
View file

@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Static covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .static import White
from .static import Bias
import numpy as np
class sde_White(White):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
White kernel:
.. math::
k(x,y) = \alpha*\delta(x-y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((-np.inf,),) )
L = np.array( ((1.0,),) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,),) )
Pinf = np.array( ((variance,),) )
P0 = Pinf.copy()
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dQc[:,:,0] = np.array( ((1.0,),) )
dPinf = np.zeros((1,1,1))
dPinf[:,:,0] = np.array( ((1.0,),) )
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Bias(Bias):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Bias kernel:
.. math::
k(x,y) = \alpha
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((0.0,),))
L = np.array( ((1.0,),))
Qc = np.zeros((1,1))
H = np.array( ((1.0,),))
Pinf = np.zeros((1,1))
P0 = np.array( ((variance,),) )
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dPinf = np.zeros((1,1,1))
dP0 = np.zeros((1,1,1))
dP0[:,:,0] = np.array( ((1.0,),) )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,192 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance several stationary covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .rbf import RBF
from .stationary import Exponential
from .stationary import RatQuad
import numpy as np
import scipy as sp
class sde_RBF(RBF):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Radial Basis Function kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
N = 10# approximation order ( number of terms in exponent series expansion)
roots_rounding_decimals = 6
fn = np.math.factorial(N)
kappa = 1.0/2.0/self.lengthscale**2
Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
pp = sp.poly1d(pp)
roots = sp.roots(pp)
neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
aa = sp.poly1d(neg_real_part_roots, r=True).coeffs
F = np.diag(np.ones((N-1,)),1)
F[-1,:] = -aa[-1:0:-1]
L= np.zeros((N,1))
L[N-1,0] = 1
H = np.zeros((1,N))
H[0,0] = 1
# Infinite covariance:
Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
Pinf = 0.5*(Pinf + Pinf.T)
# Allocating space for derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# Derivatives:
dFvariance = np.zeros(F.shape)
dFlengthscale = np.zeros(F.shape)
dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
dQcvariance = Qc/self.variance
dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
dPinf_variance = Pinf/self.variance
lp = Pinf.shape[0]
coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
coeff[np.mod(coeff,2) != 0] = 0
dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
P0 = Pinf.copy()
dP0 = dPinf.copy()
# Benefits of this are not very sound. Helps only in one case:
# SVD Kalman + RBF kernel
import GPy.models.state_space_main as ssm
(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Exponential(Exponential):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Exponential kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale)
F = np.array(((-1.0/lengthscale,),))
L = np.array(((1.0,),))
Qc = np.array( ((2.0*variance/lengthscale,),) )
H = np.array(((1.0,),))
Pinf = np.array(((variance,),))
P0 = Pinf.copy()
dF = np.zeros((1,1,2));
dQc = np.zeros((1,1,2));
dPinf = np.zeros((1,1,2));
dF[:,:,0] = 0.0
dF[:,:,1] = 1.0/lengthscale**2
dQc[:,:,0] = 2.0/lengthscale
dQc[:,:,1] = -2.0*variance/lengthscale**2
dPinf[:,:,0] = 1.0
dPinf[:,:,1] = 0.0
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_RatQuad(RatQuad):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Rational Quadratic kernel:
.. math::
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde(self):
"""
Return the state space representation of the covariance.
"""
assert False, 'Not Implemented'
# Params to use:
# self.lengthscale
# self.variance
#self.power
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)

View file

@ -1,6 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy
# Copyright (c) 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
""" """
The standard periodic kernel which mentioned in: The standard periodic kernel which mentioned in:
@ -9,7 +8,7 @@ The standard periodic kernel which mentioned in:
The MIT Press, 2005. The MIT Press, 2005.
[2] Introduction to Gaussian processes. D. J. C. MacKay. In C. M. Bishop, editor, [2] Introduction to Gaussian processes. D. J. C. MacKay. In C. M. Bishop, editor,
Neural Networks and Machine Learning, pages 133-165. Springer, 1998. Neural Networks and Machine Learning, pages 133-165. Springer, 1998.
""" """
@ -25,56 +24,56 @@ class StdPeriodic(Kern):
.. math:: .. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim} k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} \sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] } \left( \frac{\sin(\frac{\pi}{T_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
:param variance: the variance :math:`\theta_1` in the formula above :param variance: the variance :math:`\theta_1` in the formula above
:type variance: float :type variance: float
:param wavelength: the vector of wavelengths :math:`\lambda_i`. If None then 1.0 is assumed. :param period: the vector of periods :math:`\T_i`. If None then 1.0 is assumed.
:type wavelength: array or list of the appropriate size (or float if there is only one wavelength parameter) :type period: array or list of the appropriate size (or float if there is only one period parameter)
:param lengthscale: the vector of lengthscale :math:`\l_i`. If None then 1.0 is assumed. :param lengthscale: the vector of lengthscale :math:`\l_i`. If None then 1.0 is assumed.
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD1: Auto Relevance Determination with respect to wavelength. :param ARD1: Auto Relevance Determination with respect to period.
If equal to "False" one single wavelength parameter :math:`\lambda_i` for If equal to "False" one single period parameter :math:`\T_i` for
each dimension is assumed, otherwise there is one lengthscale each dimension is assumed, otherwise there is one lengthscale
parameter per dimension. parameter per dimension.
:type ARD1: Boolean :type ARD1: Boolean
:param ARD2: Auto Relevance Determination with respect to lengthscale. :param ARD2: Auto Relevance Determination with respect to lengthscale.
If equal to "False" one single wavelength parameter :math:`l_i` for If equal to "False" one single lengthscale parameter :math:`l_i` for
each dimension is assumed, otherwise there is one lengthscale each dimension is assumed, otherwise there is one lengthscale
parameter per dimension. parameter per dimension.
:type ARD2: Boolean :type ARD2: Boolean
:param active_dims: indices of dimensions which are used in the computation of the kernel :param active_dims: indices of dimensions which are used in the computation of the kernel
:type wavelength: array or list of the appropriate size :type active_dims: array or list of the appropriate size
:param name: Name of the kernel for output :param name: Name of the kernel for output
:type String :type String
:param useGPU: whether of not use GPU :param useGPU: whether of not use GPU
:type Boolean :type Boolean
""" """
def __init__(self, input_dim, variance=1., wavelength=None, lengthscale=None, ARD1=False, ARD2=False, active_dims=None, name='std_periodic',useGPU=False): def __init__(self, input_dim, variance=1., period=None, lengthscale=None, ARD1=False, ARD2=False, active_dims=None, name='std_periodic',useGPU=False):
super(StdPeriodic, self).__init__(input_dim, active_dims, name, useGPU=useGPU) super(StdPeriodic, self).__init__(input_dim, active_dims, name, useGPU=useGPU)
self.input_dim = input_dim self.input_dim = input_dim
self.ARD1 = ARD1 # correspond to wavelengths self.ARD1 = ARD1 # correspond to periods
self.ARD2 = ARD2 # correspond to lengthscales self.ARD2 = ARD2 # correspond to lengthscales
self.name = name self.name = name
if self.ARD1 == False: if self.ARD1 == False:
if wavelength is not None: if period is not None:
wavelength = np.asarray(wavelength) period = np.asarray(period)
assert wavelength.size == 1, "Only one wavelength needed for non-ARD kernel" assert period.size == 1, "Only one period needed for non-ARD kernel"
else: else:
wavelength = np.ones(1) period = np.ones(1.0)
else: else:
if wavelength is not None: if period is not None:
wavelength = np.asarray(wavelength) period = np.asarray(period)
assert wavelength.size == input_dim, "bad number of wavelengths" assert period.size == input_dim, "bad number of periods"
else: else:
wavelength = np.ones(input_dim) period = np.ones(input_dim)
if self.ARD2 == False: if self.ARD2 == False:
if lengthscale is not None: if lengthscale is not None:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
@ -87,33 +86,33 @@ class StdPeriodic(Kern):
assert lengthscale.size == input_dim, "bad number of lengthscales" assert lengthscale.size == input_dim, "bad number of lengthscales"
else: else:
lengthscale = np.ones(input_dim) lengthscale = np.ones(input_dim)
self.variance = Param('variance', variance, Logexp()) self.variance = Param('variance', variance, Logexp())
assert self.variance.size==1, "Variance size must be one" assert self.variance.size==1, "Variance size must be one"
self.wavelengths = Param('wavelengths', wavelength, Logexp()) self.period = Param('period', period, Logexp())
self.lengthscales = Param('lengthscales', lengthscale, Logexp()) self.lengthscale = Param('lengthscale', lengthscale, Logexp())
self.link_parameters(self.variance, self.wavelengths, self.lengthscales) self.link_parameters(self.variance, self.period, self.lengthscale)
def parameters_changed(self): def parameters_changed(self):
""" """
This functions deals as a callback for each optimization iteration. This functions deals as a callback for each optimization iteration.
If one optimization step was successfull and the parameters If one optimization step was successfull and the parameters
this callback function will be called to be able to update any this callback function will be called to be able to update any
precomputations for the kernel. precomputations for the kernel.
""" """
pass pass
def K(self, X, X2=None): def K(self, X, X2=None):
"""Compute the covariance matrix between X and X2.""" """Compute the covariance matrix between X and X2."""
if X2 is None: if X2 is None:
X2 = X X2 = X
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.period
exp_dist = np.exp( -0.5* np.sum( np.square( np.sin( base ) / self.lengthscales ), axis = -1 ) ) exp_dist = np.exp( -0.5* np.sum( np.square( np.sin( base ) / self.lengthscale ), axis = -1 ) )
return self.variance * exp_dist return self.variance * exp_dist
@ -125,42 +124,47 @@ class StdPeriodic(Kern):
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: if X2 is None:
X2 = X X2 = X
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.period
sin_base = np.sin( base ) sin_base = np.sin( base )
exp_dist = np.exp( -0.5* np.sum( np.square( sin_base / self.lengthscales ), axis = -1 ) ) exp_dist = np.exp( -0.5* np.sum( np.square( sin_base / self.lengthscale ), axis = -1 ) )
dwl = self.variance * (1.0/np.square(self.lengthscales)) * sin_base*np.cos(base) * (base / self.wavelengths) dwl = self.variance * (1.0/np.square(self.lengthscale)) * sin_base*np.cos(base) * (base / self.period)
dl = self.variance * np.square( sin_base) / np.power( self.lengthscales, 3) dl = self.variance * np.square( sin_base) / np.power( self.lengthscale, 3)
self.variance.gradient = np.sum(exp_dist * dL_dK) self.variance.gradient = np.sum(exp_dist * dL_dK)
#target[0] += np.sum( exp_dist * dL_dK) #target[0] += np.sum( exp_dist * dL_dK)
if self.ARD1: # different wavelengths if self.ARD1: # different periods
self.wavelengths.gradient = (dwl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0) self.period.gradient = (dwl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
else: # same wavelengths else: # same period
self.wavelengths.gradient = np.sum(dwl.sum(-1) * exp_dist * dL_dK) self.period.gradient = np.sum(dwl.sum(-1) * exp_dist * dL_dK)
if self.ARD2: # different lengthscales if self.ARD2: # different lengthscales
self.lengthscales.gradient = (dl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0) self.lengthscale.gradient = (dl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
else: # same lengthscales else: # same lengthscales
self.lengthscales.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK) self.lengthscale.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK)
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
self.variance.gradient = np.sum(dL_dKdiag) self.variance.gradient = np.sum(dL_dKdiag)
self.wavelengths.gradient = 0 self.period.gradient = 0
self.lengthscales.gradient = 0 self.lengthscale.gradient = 0
# def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
# """derivative of the covariance matrix with respect to X.""" K = self.K(X, X2)
# if X2 is None:
# raise NotImplemented("Periodic kernel: dK_dX not implemented") dL_dK = dL_dK+dL_dK.T
# X2 = X
# def gradients_X_diag(self, dL_dKdiag, X): dX = -np.pi*((dL_dK*K)[:,:,None]*np.sin(2*np.pi/self.period*(X[:,None,:] - X2[None,:,:]))/(2.*np.square(self.lengthscale)*self.period)).sum(1)
# return dX
# raise NotImplemented("Periodic kernel: dKdiag_dX not implemented")
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def input_sensitivity(self, summarize=True):
return self.variance*np.ones(self.input_dim)/self.lengthscale**2

View file

@ -81,6 +81,52 @@ class White(Static):
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum() self.variance.gradient = dL_dpsi0.sum()
class WhiteHeteroscedastic(Static):
def __init__(self, input_dim, num_data, variance=1., active_dims=None, name='white_hetero'):
"""
A heteroscedastic White kernel (nugget/noise).
It defines one variance (nugget) per input sample.
Prediction excludes any noise learnt by this Kernel, so be careful using this kernel.
You can plot the errors learnt by this kernel by something similar as:
plt.errorbar(m.X, m.Y, yerr=2*np.sqrt(m.kern.white.variance))
"""
super(Static, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', np.ones(num_data) * variance, Logexp())
self.link_parameters(self.variance)
def Kdiag(self, X):
if X.shape[0] == self.variance.shape[0]:
# If the input has the same number of samples as
# the number of variances, we return the variances
return self.variance
return 0.
def K(self, X, X2=None):
if X2 is None and X.shape[0] == self.variance.shape[0]:
return np.eye(X.shape[0]) * self.variance
else:
return 0.
def psi2(self, Z, variational_posterior):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def psi2n(self, Z, variational_posterior):
return np.zeros((1, Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
self.variance.gradient = np.diagonal(dL_dK)
else:
self.variance.gradient = 0.
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dKdiag
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0
class Bias(Static): class Bias(Static):
def __init__(self, input_dim, variance=1., active_dims=None, name='bias'): def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
super(Bias, self).__init__(input_dim, variance, active_dims, name) super(Bias, self).__init__(input_dim, variance, active_dims, name)

View file

@ -81,11 +81,11 @@ class Stationary(Kern):
def dK_dr(self, r): def dK_dr(self, r):
raise NotImplementedError("implement derivative of the covariance function wrt r to use this class") raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
@Cache_this(limit=20, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK2_drdr(self, r): def dK2_drdr(self, r):
raise NotImplementedError("implement second derivative of covariance wrt r to use this method") raise NotImplementedError("implement second derivative of covariance wrt r to use this method")
@Cache_this(limit=5, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def K(self, X, X2=None): def K(self, X, X2=None):
""" """
Kernel function applied on inputs X and X2. Kernel function applied on inputs X and X2.
@ -99,6 +99,9 @@ class Stationary(Kern):
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def dK_dr_via_X(self, X, X2): def dK_dr_via_X(self, X, X2):
"""
compute the derivative of K wrt X going through X
"""
#a convenience function, so we can cache dK_dr #a convenience function, so we can cache dK_dr
return self.dK_dr(self._scaled_dist(X, X2)) return self.dK_dr(self._scaled_dist(X, X2))
@ -312,11 +315,23 @@ class Exponential(Stationary):
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name) super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r): def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r) return self.variance * np.exp(-r)
def dK_dr(self, r): def dK_dr(self, r):
return -0.5*self.K_of_r(r) return -self.K_of_r(r)
# def sde(self):
# """
# Return the state space representation of the covariance.
# """
# F = np.array([[-1/self.lengthscale]])
# L = np.array([[1]])
# Qc = np.array([[2*self.variance/self.lengthscale]])
# H = np.array([[1]])
# Pinf = np.array([[self.variance]])
# # TODO: return the derivatives as well
#
# return (F, L, Qc, H, Pinf)
@ -385,6 +400,41 @@ class Matern32(Stationary):
F1lower = np.array([f(lower) for f in F1])[:, None] F1lower = np.array([f(lower) for f in F1])[:, None]
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T)) return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array([[0, 1], [-foo**2, -2*foo]])
L = np.array([[0], [1]])
Qc = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]])
H = np.array([[1, 0]])
Pinf = np.array([[variance, 0],
[0, 3.*variance/(lengthscale**2)]])
# Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros([2,2])
dFlengthscale = np.array([[0,0],
[6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]])
dQcvariance = np.array([12.*np.sqrt(3)/lengthscale**3])
dQclengthscale = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance])
dPinfvariance = np.array([[1,0],[0,3./lengthscale**2]])
dPinflengthscale = np.array([[0,0],
[0,-6*variance/lengthscale**3]])
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
class Matern52(Stationary): class Matern52(Stationary):
""" """
@ -486,18 +536,21 @@ class RatQuad(Stationary):
self.link_parameters(self.power) self.link_parameters(self.power)
def K_of_r(self, r): def K_of_r(self, r):
r2 = np.power(r, 2.) r2 = np.square(r)
return self.variance*np.power(1. + r2/2., -self.power) # return self.variance*np.power(1. + r2/2., -self.power)
return self.variance*np.exp(-self.power*np.log1p(r2/2.))
def dK_dr(self, r): def dK_dr(self, r):
r2 = np.power(r, 2.) r2 = np.square(r)
return -self.variance*self.power*r*np.power(1. + r2/2., - self.power - 1.) # return -self.variance*self.power*r*np.power(1. + r2/2., - self.power - 1.)
return-self.variance*self.power*r*np.exp(-(self.power+1)*np.log1p(r2/2.))
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
super(RatQuad, self).update_gradients_full(dL_dK, X, X2) super(RatQuad, self).update_gradients_full(dL_dK, X, X2)
r = self._scaled_dist(X, X2) r = self._scaled_dist(X, X2)
r2 = np.power(r, 2.) r2 = np.square(r)
dK_dpow = -self.variance * np.power(2., self.power) * np.power(r2 + 2., -self.power) * np.log(0.5*(r2+2.)) # dK_dpow = -self.variance * np.power(2., self.power) * np.power(r2 + 2., -self.power) * np.log(0.5*(r2+2.))
dK_dpow = -self.variance * np.exp(self.power*(np.log(2.)-np.log1p(r2+1)))*np.log1p(r2/2.)
grad = np.sum(dL_dK*dK_dpow) grad = np.sum(dL_dK*dK_dpow)
self.power.gradient = grad self.power.gradient = grad

File diff suppressed because it is too large Load diff

View file

@ -54,12 +54,12 @@ class TruncLinear(Kern):
self.add_parameter(self.variances) self.add_parameter(self.variances)
self.add_parameter(self.delta) self.add_parameter(self.delta)
@Cache_this(limit=2) @Cache_this(limit=3)
def K(self, X, X2=None): def K(self, X, X2=None):
XX = self.variances*self._product(X, X2) XX = self.variances*self._product(X, X2)
return XX.sum(axis=-1) return XX.sum(axis=-1)
@Cache_this(limit=2) @Cache_this(limit=3)
def _product(self, X, X2=None): def _product(self, X, X2=None):
if X2 is None: if X2 is None:
X2 = X X2 = X
@ -149,12 +149,12 @@ class TruncLinear_inf(Kern):
self.add_parameter(self.variances) self.add_parameter(self.variances)
# @Cache_this(limit=2) # @Cache_this(limit=3)
def K(self, X, X2=None): def K(self, X, X2=None):
tmp = self._product(X, X2) tmp = self._product(X, X2)
return (self.variances*tmp).sum(axis=-1) return (self.variances*tmp).sum(axis=-1)
# @Cache_this(limit=2) # @Cache_this(limit=3)
def _product(self, X, X2=None): def _product(self, X, X2=None):
if X2 is None: if X2 is None:
X2 = X X2 = X

View file

@ -27,9 +27,6 @@ class Binomial(Likelihood):
super(Binomial, self).__init__(gp_link, 'Binomial') super(Binomial, self).__init__(gp_link, 'Binomial')
def conditional_mean(self, gp, Y_metadata):
return self.gp_link(gp)*Y_metadata['trials']
def pdf_link(self, inv_link_f, y, Y_metadata): def pdf_link(self, inv_link_f, y, Y_metadata):
""" """
Likelihood function given inverse link of f. Likelihood function given inverse link of f.
@ -109,7 +106,7 @@ class Binomial(Likelihood):
N = Y_metadata['trials'] N = Y_metadata['trials']
return -y/np.square(inv_link_f) - (N-y)/np.square(1-inv_link_f) return -y/np.square(inv_link_f) - (N-y)/np.square(1-inv_link_f)
def samples(self, gp, Y_metadata=None): def samples(self, gp, Y_metadata=None, **kw):
""" """
Returns a set of samples of observations based on a given value of the latent variable. Returns a set of samples of observations based on a given value of the latent variable.
@ -123,3 +120,32 @@ class Binomial(Likelihood):
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None): def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
pass pass
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Probit):
if gh_points is None:
gh_x, gh_w = self._gh_points()
else:
gh_x, gh_w = gh_points
gh_w = gh_w / np.sqrt(np.pi)
shape = m.shape
C = np.atleast_1d(Y_metadata['trials'])
m,v,Y, C = m.flatten(), v.flatten(), Y.flatten()[:,None], C.flatten()[:,None]
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None]
p = std_norm_cdf(X)
p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability
N = std_norm_pdf(X)
#TODO: missing nchoosek coefficient! use gammaln?
F = (Y*np.log(p) + (C-Y)*np.log(1.-p)).dot(gh_w)
NoverP = N/p
NoverP_ = N/(1.-p)
dF_dm = (Y*NoverP - (C-Y)*NoverP_).dot(gh_w)
dF_dv = -0.5* ( Y*(NoverP**2 + NoverP*X) + (C-Y)*(NoverP_**2 - NoverP_*X) ).dot(gh_w)
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), None
else:
raise NotImplementedError

View file

@ -22,3 +22,5 @@ from .gp_var_gauss import GPVariationalGaussianApproximation
from .one_vs_all_classification import OneVsAllClassification from .one_vs_all_classification import OneVsAllClassification
from .one_vs_all_sparse_classification import OneVsAllSparseClassification from .one_vs_all_sparse_classification import OneVsAllSparseClassification
from .dpgplvm import DPBayesianGPLVM from .dpgplvm import DPBayesianGPLVM
from .state_space_model import StateSpace

View file

@ -61,7 +61,7 @@ class BayesianGPLVM(SparseGP_MPI):
else: else:
from ..inference.latent_function_inference.var_dtc import VarDTC from ..inference.latent_function_inference.var_dtc import VarDTC
self.logger.debug("creating inference_method var_dtc") self.logger.debug("creating inference_method var_dtc")
inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1]) inference_method = VarDTC(limit=3 if not missing_data else Y.shape[1])
if isinstance(inference_method,VarDTC_minibatch): if isinstance(inference_method,VarDTC_minibatch):
inference_method.mpi_comm = mpi_comm inference_method.mpi_comm = mpi_comm

View file

@ -40,12 +40,13 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
Z = np.random.permutation(X.copy())[:num_inducing] Z = np.random.permutation(X.copy())[:num_inducing]
assert Z.shape[1] == X.shape[1] assert Z.shape[1] == X.shape[1]
if X_variance == False: if X_variance is False:
self.logger.info('no variance on X, activating sparse GPLVM') self.logger.info('no variance on X, activating sparse GPLVM')
X = Param("latent space", X) X = Param("latent space", X)
elif X_variance is None: else:
self.logger.info("initializing latent space variance ~ uniform(0,.1)") if X_variance is None:
X_variance = np.random.uniform(0,.1,X.shape) self.logger.info("initializing latent space variance ~ uniform(0,.1)")
X_variance = np.random.uniform(0,.1,X.shape)
self.variational_prior = NormalPrior() self.variational_prior = NormalPrior()
X = NormalPosterior(X, X_variance) X = NormalPosterior(X, X_variance)
@ -61,7 +62,7 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
if inference_method is None: if inference_method is None:
from ..inference.latent_function_inference.var_dtc import VarDTC from ..inference.latent_function_inference.var_dtc import VarDTC
self.logger.debug("creating inference_method var_dtc") self.logger.debug("creating inference_method var_dtc")
inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1]) inference_method = VarDTC(limit=3 if not missing_data else Y.shape[1])
super(BayesianGPLVMMiniBatch,self).__init__(X, Y, Z, kernel, likelihood=likelihood, super(BayesianGPLVMMiniBatch,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
name=name, inference_method=inference_method, name=name, inference_method=inference_method,
@ -71,13 +72,13 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
self.X = X self.X = X
self.link_parameter(self.X, 0) self.link_parameter(self.X, 0)
def set_X_gradients(self, X, X_grad): #def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form.""" # """Set the gradients of the posterior distribution of X in its specific form."""
X.mean.gradient, X.variance.gradient = X_grad # X.mean.gradient, X.variance.gradient = X_grad
def get_X_gradients(self, X): #def get_X_gradients(self, X):
"""Get the gradients of the posterior distribution of X in its specific form.""" # """Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient # return X.mean.gradient, X.variance.gradient
def _outer_values_update(self, full_values): def _outer_values_update(self, full_values):
""" """
@ -106,7 +107,7 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
super(BayesianGPLVMMiniBatch,self).parameters_changed() super(BayesianGPLVMMiniBatch,self).parameters_changed()
kl_fctr = self.kl_factr kl_fctr = self.kl_factr
if kl_fctr > 0: if kl_fctr > 0 and self.has_uncertain_inputs():
Xgrad = self.X.gradient.copy() Xgrad = self.X.gradient.copy()
self.X.gradient[:] = 0 self.X.gradient[:] = 0
self.variational_prior.update_gradients_KL(self.X) self.variational_prior.update_gradients_KL(self.X)
@ -122,8 +123,8 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
if self.missing_data or not self.stochastics: if self.missing_data or not self.stochastics:
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
elif self.stochastics: else: #self.stochastics is given:
d = self.output_dim d = self.output_dim
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d
self._Xgrad = self.X.gradient.copy() self._Xgrad = self.X.gradient.copy()

View file

@ -28,7 +28,7 @@ class GPVariationalGaussianApproximation(GP):
self.beta = Param('beta', np.ones(num_data)) self.beta = Param('beta', np.ones(num_data))
inf = VarGauss(self.alpha, self.beta) inf = VarGauss(self.alpha, self.beta)
super(GPVariationalGaussianApproximation, self).__init__(X, Y, kernel, likelihood, name='VarGP', inference_method=inf) super(GPVariationalGaussianApproximation, self).__init__(X, Y, kernel, likelihood, name='VarGP', inference_method=inf, Y_metadata=Y_metadata)
self.link_parameter(self.alpha) self.link_parameter(self.alpha)
self.link_parameter(self.beta) self.link_parameter(self.beta)

View file

@ -41,4 +41,4 @@ class GPLVM(GP):
def parameters_changed(self): def parameters_changed(self):
super(GPLVM, self).parameters_changed() super(GPLVM, self).parameters_changed()
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None) self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)

View file

@ -127,8 +127,6 @@ class MRD(BayesianGPLVMMiniBatch):
self.unlink_parameter(self.likelihood) self.unlink_parameter(self.likelihood)
self.unlink_parameter(self.kern) self.unlink_parameter(self.kern)
del self.kern
del self.likelihood
self.num_data = Ylist[0].shape[0] self.num_data = Ylist[0].shape[0]
if isinstance(batchsize, int): if isinstance(batchsize, int):
@ -156,7 +154,11 @@ class MRD(BayesianGPLVMMiniBatch):
self.link_parameter(spgp, i+2) self.link_parameter(spgp, i+2)
self.bgplvms.append(spgp) self.bgplvms.append(spgp)
self.posterior = None b = self.bgplvms[0]
self.posterior = b.posterior
self.kern = b.kern
self.likelihood = b.likelihood
self.logger.info("init done") self.logger.info("init done")
def parameters_changed(self): def parameters_changed(self):
@ -236,7 +238,7 @@ class MRD(BayesianGPLVMMiniBatch):
# sharex=sharex, sharey=sharey) # sharex=sharex, sharey=sharey)
# return fig # return fig
def plot_scales(self, titles=None, fig_kwargs=dict(figsize=None, tight_layout=True), **kwargs): def plot_scales(self, titles=None, fig_kwargs={}, **kwargs):
""" """
Plot input sensitivity for all datasets, to see which input dimensions are Plot input sensitivity for all datasets, to see which input dimensions are
significant for which dataset. significant for which dataset.
@ -252,12 +254,9 @@ class MRD(BayesianGPLVMMiniBatch):
M = len(self.bgplvms) M = len(self.bgplvms)
fig = pl().figure(rows=1, cols=M, **fig_kwargs) fig = pl().figure(rows=1, cols=M, **fig_kwargs)
plots = {}
for c in range(M): for c in range(M):
canvas = self.bgplvms[c].kern.plot_ARD(title=titles[c], figure=fig, col=c+1, **kwargs) canvas = self.bgplvms[c].kern.plot_ARD(title=titles[c], figure=fig, col=c+1, **kwargs)
plots[titles[c]] = canvas return canvas
pl().show_canvas(canvas)
return plots
def plot_latent(self, labels=None, which_indices=None, def plot_latent(self, labels=None, which_indices=None,
resolution=60, legend=True, resolution=60, legend=True,

View file

@ -41,11 +41,12 @@ class SparseGPMiniBatch(SparseGP):
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
name='sparse gp', Y_metadata=None, normalizer=False, name='sparse gp', Y_metadata=None, normalizer=False,
missing_data=False, stochastic=False, batchsize=1): missing_data=False, stochastic=False, batchsize=1):
self._update_stochastics = False
# pick a sensible inference method # pick a sensible inference method
if inference_method is None: if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian): if isinstance(likelihood, likelihoods.Gaussian):
inference_method = var_dtc.VarDTC(limit=1 if not missing_data else Y.shape[1]) inference_method = var_dtc.VarDTC(limit=3 if not missing_data else Y.shape[1])
else: else:
#inference_method = ?? #inference_method = ??
raise NotImplementedError("what to do what to do?") raise NotImplementedError("what to do what to do?")
@ -73,7 +74,14 @@ class SparseGPMiniBatch(SparseGP):
logger.info("Adding Z as parameter") logger.info("Adding Z as parameter")
self.link_parameter(self.Z, index=0) self.link_parameter(self.Z, index=0)
self.posterior = None self.posterior = None
def optimize(self, optimizer=None, start=None, **kwargs):
try:
self._update_stochastics = True
SparseGP.optimize(self, optimizer=optimizer, start=start, **kwargs)
finally:
self._update_stochastics = False
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)
@ -226,16 +234,16 @@ class SparseGPMiniBatch(SparseGP):
woodbury_inv = self.posterior._woodbury_inv woodbury_inv = self.posterior._woodbury_inv
woodbury_vector = self.posterior._woodbury_vector woodbury_vector = self.posterior._woodbury_vector
if not self.stochastics: #if not self.stochastics:
m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim) # m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
message = m_f(-1) # message = m_f(-1)
print(message, end=' ') # print(message, end=' ')
for d, ninan in self.stochastics.d: for d, ninan in self.stochastics.d:
if not self.stochastics: #if not self.stochastics:
print(' '*(len(message)) + '\r', end=' ') # print(' '*(len(message)) + '\r', end=' ')
message = m_f(d) # message = m_f(d)
print(message, end=' ') # print(message, end=' ')
psi0ni = self.psi0[ninan] psi0ni = self.psi0[ninan]
psi1ni = self.psi1[ninan] psi1ni = self.psi1[ninan]
@ -262,8 +270,8 @@ class SparseGPMiniBatch(SparseGP):
woodbury_vector[:, d] = posterior.woodbury_vector woodbury_vector[:, d] = posterior.woodbury_vector
self._log_marginal_likelihood += log_marginal_likelihood self._log_marginal_likelihood += log_marginal_likelihood
if not self.stochastics: #if not self.stochastics:
print('') # print('')
if self.posterior is None: if self.posterior is None:
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
@ -314,6 +322,8 @@ class SparseGPMiniBatch(SparseGP):
if self.missing_data: if self.missing_data:
self._outer_loop_for_missing_data() self._outer_loop_for_missing_data()
elif self.stochastics: elif self.stochastics:
if self._update_stochastics:
self.stochastics.do_stochastics()
self._outer_loop_without_missing_data() self._outer_loop_without_missing_data()
else: else:
self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)

View file

@ -30,7 +30,7 @@ class SparseGPRegression(SparseGP_MPI):
""" """
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None, mpi_comm=None): def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None, mpi_comm=None, name='sparse_gp'):
num_data, input_dim = X.shape num_data, input_dim = X.shape
# kern defaults to rbf (plus white for stability) # kern defaults to rbf (plus white for stability)
@ -55,11 +55,11 @@ class SparseGPRegression(SparseGP_MPI):
else: else:
infr = VarDTC() infr = VarDTC()
SparseGP_MPI.__init__(self, X, Y, Z, kernel, likelihood, inference_method=infr, normalizer=normalizer, mpi_comm=mpi_comm) SparseGP_MPI.__init__(self, X, Y, Z, kernel, likelihood, inference_method=infr, normalizer=normalizer, mpi_comm=mpi_comm, name=name)
def parameters_changed(self): def parameters_changed(self):
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients_sparsegp,VarDTC_minibatch from ..inference.latent_function_inference.var_dtc_parallel import update_gradients_sparsegp,VarDTC_minibatch
if isinstance(self.inference_method,VarDTC_minibatch): if isinstance(self.inference_method,VarDTC_minibatch):
update_gradients_sparsegp(self, mpi_comm=self.mpi_comm) update_gradients_sparsegp(self, mpi_comm=self.mpi_comm)
else: else:
super(SparseGPRegression, self).parameters_changed() super(SparseGPRegression, self).parameters_changed()

View file

@ -4,6 +4,7 @@
import sys import sys
from .sparse_gp_regression import SparseGPRegression from .sparse_gp_regression import SparseGPRegression
from ..core import Param
class SparseGPLVM(SparseGPRegression): class SparseGPLVM(SparseGPRegression):
""" """
@ -21,7 +22,9 @@ class SparseGPLVM(SparseGPRegression):
if X is None: if X is None:
from ..util.initialization import initialize_latent from ..util.initialization import initialize_latent
X, fracs = initialize_latent(init, input_dim, Y) X, fracs = initialize_latent(init, input_dim, Y)
X = Param('latent space', X)
SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing) SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
self.link_parameter(self.X, 0)
def parameters_changed(self): def parameters_changed(self):
super(SparseGPLVM, self).parameters_changed() super(SparseGPLVM, self).parameters_changed()

View file

@ -94,6 +94,86 @@ class IBPPrior(VariationalPrior):
variational_posterior.tau.gradient[:,0] = -((tau[:,0]-gamma-ad)*polygamma(1,tau[:,0])+common) variational_posterior.tau.gradient[:,0] = -((tau[:,0]-gamma-ad)*polygamma(1,tau[:,0])+common)
variational_posterior.tau.gradient[:,1] = -((tau[:,1]+gamma-2)*polygamma(1,tau[:,1])+common) variational_posterior.tau.gradient[:,1] = -((tau[:,1]+gamma-2)*polygamma(1,tau[:,1])+common)
class SLVMPosterior(SpikeAndSlabPosterior):
'''
The SpikeAndSlab distribution for variational approximations.
'''
def __init__(self, means, variances, binary_prob, tau=None, name='latent space'):
"""
binary_prob : the probability of the distribution on the slab part.
"""
from paramz.transformations import Logexp
super(SLVMPosterior, self).__init__(means, variances, binary_prob, group_spike=False, name=name)
self.tau = Param("tau_", np.ones((self.gamma.shape[1],2)), Logexp())
self.link_parameter(self.tau)
def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient, self.tau.gradient = grad
def __getitem__(self, s):
if isinstance(s, (int, slice, tuple, list, np.ndarray)):
import copy
n = self.__new__(self.__class__, self.name)
dc = self.__dict__.copy()
dc['mean'] = self.mean[s]
dc['variance'] = self.variance[s]
dc['binary_prob'] = self.binary_prob[s]
dc['tau'] = self.tau
dc['parameters'] = copy.copy(self.parameters)
n.__dict__.update(dc)
n.parameters[dc['mean']._parent_index_] = dc['mean']
n.parameters[dc['variance']._parent_index_] = dc['variance']
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
n.parameters[dc['tau']._parent_index_] = dc['tau']
n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size - self.gamma.size - self.tau.size
n.size = n.mean.size + n.variance.size + n.gamma.size+ n.tau.size + oversize
n.ndim = n.mean.ndim
n.shape = n.mean.shape
n.num_data = n.mean.shape[0]
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
return n
else:
return super(IBPPosterior, self).__getitem__(s)
class SLVMPrior(VariationalPrior):
def __init__(self, input_dim, alpha =1., beta=1., Z=None, name='SLVMPrior', **kw):
super(SLVMPrior, self).__init__(name=name, **kw)
self.input_dim = input_dim
self.variance = 1.
self.alpha = alpha
self.beta = beta
self.Z = Z
if Z is not None:
assert np.all(np.unique(Z)==np.array([0,1]))
def KL_divergence(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma.values, variational_posterior.tau.values
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
part1 = (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
from scipy.special import betaln,digamma
part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + betaln(self.alpha,self.beta)*self.input_dim \
-betaln(tau[:,0], tau[:,1]).sum() + ((tau[:,0]-(gamma*self.Z).sum(0)-self.alpha)*digamma(tau[:,0])).sum() + \
((tau[:,1]-((1-gamma)*self.Z).sum(0)-self.beta)*digamma(tau[:,1])).sum() + ((self.Z.sum(0)+self.alpha+self.beta-tau[:,0]-tau[:,1])*digamma(tau.sum(axis=1))).sum()
return part1+part2
def update_gradients_KL(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma.values, variational_posterior.tau.values
variational_posterior.mean.gradient -= gamma*mu/self.variance
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * gamma /2.
from scipy.special import digamma,polygamma
dgamma = np.log(gamma/(1.-gamma))+ (digamma(tau[:,1])-digamma(tau[:,0]))*self.Z
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
common = (self.Z.sum(0)+self.alpha+self.beta-tau[:,0]-tau[:,1])*polygamma(1,tau.sum(axis=1))
variational_posterior.tau.gradient[:,0] = -((tau[:,0]-(gamma*self.Z).sum(0)-self.alpha)*polygamma(1,tau[:,0])+common)
variational_posterior.tau.gradient[:,1] = -((tau[:,1]-((1-gamma)*self.Z).sum(0)-self.beta)*polygamma(1,tau[:,1])+common)
class SSGPLVM(SparseGP_MPI): class SSGPLVM(SparseGP_MPI):
""" """
Spike-and-Slab Gaussian Process Latent Variable Model Spike-and-Slab Gaussian Process Latent Variable Model
@ -107,7 +187,7 @@ class SSGPLVM(SparseGP_MPI):
""" """
def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10, def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, IBP=False, alpha=2., tau=None, mpi_comm=None, pi=None, learnPi=False,normalizer=False, sharedX=False, variational_prior=None,**kwargs): Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, IBP=False,SLVM=False, alpha=2., beta=2., connM=None, tau=None, mpi_comm=None, pi=None, learnPi=False,normalizer=False, sharedX=False, variational_prior=None,**kwargs):
self.group_spike = group_spike self.group_spike = group_spike
self.init = init self.init = init
@ -152,6 +232,9 @@ class SSGPLVM(SparseGP_MPI):
if IBP: if IBP:
self.variational_prior = IBPPrior(input_dim=input_dim, alpha=alpha) if variational_prior is None else variational_prior self.variational_prior = IBPPrior(input_dim=input_dim, alpha=alpha) if variational_prior is None else variational_prior
X = IBPPosterior(X, X_variance, gamma, tau=tau,sharedX=sharedX) X = IBPPosterior(X, X_variance, gamma, tau=tau,sharedX=sharedX)
elif SLVM:
self.variational_prior = SLVMPrior(input_dim=input_dim, alpha=alpha, beta=beta, Z=connM) if variational_prior is None else variational_prior
X = SLVMPosterior(X, X_variance, gamma, tau=tau)
else: else:
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) if variational_prior is None else variational_prior self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) if variational_prior is None else variational_prior
X = SpikeAndSlabPosterior(X, X_variance, gamma, group_spike=group_spike,sharedX=sharedX) X = SpikeAndSlabPosterior(X, X_variance, gamma, group_spike=group_spike,sharedX=sharedX)

745
GPy/models/state_space.py Normal file
View file

@ -0,0 +1,745 @@
# Copyright (c) 2013, Arno Solin.
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#
# This implementation of converting GPs to state space models is based on the article:
#
# @article{Sarkka+Solin+Hartikainen:2013,
# author = {Simo S\"arkk\"a and Arno Solin and Jouni Hartikainen},
# year = {2013},
# title = {Spatiotemporal learning via infinite-dimensional {B}ayesian filtering and smoothing},
# journal = {IEEE Signal Processing Magazine},
# volume = {30},
# number = {4},
# pages = {51--61}
# }
#
import numpy as np
from scipy import linalg
from ..core import Model
from .. import kern
from GPy.plotting.matplot_dep.models_plots import gpplot
from GPy.plotting.matplot_dep.base_plots import x_frame1D
from GPy.plotting.matplot_dep import Tango
import pylab as pb
from GPy.core.parameterization.param import Param
class StateSpace(Model):
def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'):
super(StateSpace, self).__init__(name=name)
self.num_data, input_dim = X.shape
assert input_dim==1, "State space methods for time only"
num_data_Y, self.output_dim = Y.shape
assert num_data_Y == self.num_data, "X and Y data don't match"
assert self.output_dim == 1, "State space methods for single outputs only"
# Make sure the observations are ordered in time
sort_index = np.argsort(X[:,0])
self.X = X[sort_index]
self.Y = Y[sort_index]
# Noise variance
self.sigma2 = Param('Gaussian_noise', sigma2)
self.link_parameter(self.sigma2)
# Default kernel
if kernel is None:
self.kern = kern.Matern32(1)
else:
self.kern = kernel
self.link_parameter(self.kern)
self.sigma2.constrain_positive()
# Assert that the kernel is supported
if not hasattr(self.kern, 'sde'):
raise NotImplementedError('SDE must be implemented for the kernel being used')
#assert self.kern.sde() not False, "This kernel is not supported for state space estimation"
def parameters_changed(self):
"""
Parameters have now changed
"""
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Use the Kalman filter to evaluate the likelihood
self._log_marginal_likelihood = self.kf_likelihood(F,L,Qc,H,self.sigma2,Pinf,self.X.T,self.Y.T)
gradients = self.compute_gradients()
self.sigma2.gradient_full[:] = gradients[-1]
self.kern.gradient_full[:] = gradients[:-1]
def log_likelihood(self):
return self._log_marginal_likelihood
def compute_gradients(self):
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dFt,dQct,dPinft) = self.kern.sde()
# Allocate space for the full partial derivative matrices
dF = np.zeros([dFt.shape[0],dFt.shape[1],dFt.shape[2]+1])
dQc = np.zeros([dQct.shape[0],dQct.shape[1],dQct.shape[2]+1])
dPinf = np.zeros([dPinft.shape[0],dPinft.shape[1],dPinft.shape[2]+1])
# Assign the values for the kernel function
dF[:,:,:-1] = dFt
dQc[:,:,:-1] = dQct
dPinf[:,:,:-1] = dPinft
# The sigma2 derivative
dR = np.zeros([1,1,dF.shape[2]])
dR[:,:,-1] = 1
# Calculate the likelihood gradients
gradients = self.kf_likelihood_g(F,L,Qc,H,self.sigma2,Pinf,dF,dQc,dPinf,dR,self.X.T,self.Y.T)
return gradients
def predict_raw(self, Xnew, Ynew=None, filteronly=False):
# Set defaults
if Ynew is None:
Ynew = self.Y
# Make a single matrix containing training and testing points
X = np.vstack((self.X, Xnew))
Y = np.vstack((Ynew, np.nan*np.zeros(Xnew.shape)))
# Sort the matrix (save the order)
_, return_index, return_inverse = np.unique(X,True,True)
X = X[return_index]
Y = Y[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Run the Kalman filter
(M, P) = self.kalman_filter(F,L,Qc,H,self.sigma2,Pinf,X.T,Y.T)
# Run the Rauch-Tung-Striebel smoother
if not filteronly:
(M, P) = self.rts_smoother(F,L,Qc,X.T,M,P)
# Put the data back in the original order
M = M[:,return_inverse]
P = P[:,:,return_inverse]
# Only return the values for Xnew
M = M[:,self.num_data:]
P = P[:,:,self.num_data:]
# Calculate the mean and variance
m = H.dot(M).T
V = np.tensordot(H[0],P,(0,0))
V = np.tensordot(V,H[0],(0,0))
V = V[:,None]
# Return the posterior of the state
return (m, V)
def predict(self, Xnew, filteronly=False):
# Run the Kalman filter to get the state
(m, V) = self.predict_raw(Xnew,filteronly=filteronly)
# Add the noise variance to the state variance
V += self.sigma2
# Lower and upper bounds
lower = m - 2*np.sqrt(V)
upper = m + 2*np.sqrt(V)
# Return mean and variance
return (m, V, lower, upper)
def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
ax=None, resolution=None, plot_raw=False, plot_filter=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
# Deal with optional parameters
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
# Define the frame on which to plot
resolution = resolution or 200
Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
# Make a prediction on the frame and plot it
if plot_raw:
m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v)
Y = self.Y
else:
m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
Y = self.Y
# Plot the values
gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(self.X, self.Y, 'kx', mew=1.5)
# Optionally plot some samples
if samples:
if plot_raw:
Ysim = self.posterior_samples_f(Xgrid, samples)
else:
Ysim = self.posterior_samples(Xgrid, samples)
for yi in Ysim.T:
ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
# Set the limits of the plot to some sensible values
ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
def prior_samples_f(self,X,size=10):
# Sort the matrix (save the order)
(_, return_index, return_inverse) = np.unique(X,True,True)
X = X[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Allocate space for results
Y = np.empty((size,X.shape[0]))
# Simulate random draws
#for j in range(0,size):
# Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
Y = self.simulate(F,L,Qc,Pinf,X.T,size)
# Only observations
Y = np.tensordot(H[0],Y,(0,0))
# Reorder simulated values
Y = Y[:,return_inverse]
# Return trajectory
return Y.T
def posterior_samples_f(self,X,size=10):
# Sort the matrix (save the order)
(_, return_index, return_inverse) = np.unique(X,True,True)
X = X[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Run smoother on original data
(m,V) = self.predict_raw(X)
# Simulate random draws from the GP prior
y = self.prior_samples_f(np.vstack((self.X, X)),size)
# Allocate space for sample trajectories
Y = np.empty((size,X.shape[0]))
# Run the RTS smoother on each of these values
for j in range(0,size):
yobs = y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
(m2,V2) = self.predict_raw(X,Ynew=yobs)
Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
# Reorder simulated values
Y = Y[:,return_inverse]
# Return posterior sample trajectories
return Y.T
def posterior_samples(self, X, size=10):
# Make samples of f
Y = self.posterior_samples_f(X,size)
# Add noise
Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
# Return trajectory
return Y
def kalman_filter(self,F,L,Qc,H,R,Pinf,X,Y):
# KALMAN_FILTER - Run the Kalman filter for a given model and data
# Allocate space for results
MF = np.empty((F.shape[0],Y.shape[1]))
PF = np.empty((F.shape[0],F.shape[0],Y.shape[1]))
# Initialize
MF[:,-1] = np.zeros(F.shape[0])
PF[:,:,-1] = Pinf.copy()
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = self.lti_disc(F,L,Qc,dt)
# Kalman filter
for k in range(0,Y.shape[1]):
# Form discrete-time model
#(A, Q) = self.lti_disc(F,L,Qc,dt[:,k])
A = As[:,:,index[k]];
Q = Qs[:,:,index[k]];
# Prediction step
MF[:,k] = A.dot(MF[:,k-1])
PF[:,:,k] = A.dot(PF[:,:,k-1]).dot(A.T) + Q
# Update step (only if there is data)
if not np.isnan(Y[:,k]):
if Y.shape[0]==1:
K = PF[:,:,k].dot(H.T)/(H.dot(PF[:,:,k]).dot(H.T) + R)
else:
LL = linalg.cho_factor(H.dot(PF[:,:,k]).dot(H.T) + R)
K = linalg.cho_solve(LL, H.dot(PF[:,:,k].T)).T
MF[:,k] += K.dot(Y[:,k]-H.dot(MF[:,k]))
PF[:,:,k] -= K.dot(H).dot(PF[:,:,k])
# Return values
return (MF, PF)
def rts_smoother(self,F,L,Qc,X,MS,PS):
# RTS_SMOOTHER - Run the RTS smoother for a given model and data
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = self.lti_disc(F,L,Qc,dt)
# Sequentially smooth states starting from the end
for k in range(2,X.shape[1]+1):
# Form discrete-time model
#(A, Q) = self.lti_disc(F,L,Qc,dt[:,1-k])
A = As[:,:,index[1-k]];
Q = Qs[:,:,index[1-k]];
# Smoothing step
LL = linalg.cho_factor(A.dot(PS[:,:,-k]).dot(A.T)+Q)
G = linalg.cho_solve(LL,A.dot(PS[:,:,-k])).T
MS[:,-k] += G.dot(MS[:,1-k]-A.dot(MS[:,-k]))
PS[:,:,-k] += G.dot(PS[:,:,1-k]-A.dot(PS[:,:,-k]).dot(A.T)-Q).dot(G.T)
# Return
return (MS, PS)
def kf_likelihood(self,F,L,Qc,H,R,Pinf,X,Y):
# Evaluate marginal likelihood
# Initialize
lik = 0
m = np.zeros((F.shape[0],1))
P = Pinf.copy()
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = self.lti_disc(F,L,Qc,dt)
# Kalman filter for likelihood evaluation
for k in range(0,Y.shape[1]):
# Form discrete-time model
#(A,Q) = self.lti_disc(F,L,Qc,dt[:,k])
A = As[:,:,index[k]];
Q = Qs[:,:,index[k]];
# Prediction step
m = A.dot(m)
P = A.dot(P).dot(A.T) + Q
# Update step only if there is data
if not np.isnan(Y[:,k]):
v = Y[:,k]-H.dot(m)
if Y.shape[0]==1:
S = H.dot(P).dot(H.T) + R
K = P.dot(H.T)/S
lik -= 0.5*np.log(S)
lik -= 0.5*v.shape[0]*np.log(2*np.pi)
lik -= 0.5*v*v/S
else:
LL, isupper = linalg.cho_factor(H.dot(P).dot(H.T) + R)
lik -= np.sum(np.log(np.diag(LL)))
lik -= 0.5*v.shape[0]*np.log(2*np.pi)
lik -= 0.5*linalg.cho_solve((LL, isupper),v).dot(v)
K = linalg.cho_solve((LL, isupper), H.dot(P.T)).T
m += K.dot(v)
P -= K.dot(H).dot(P)
# Return likelihood
return lik[0,0]
def kf_likelihood_g(self,F,L,Qc,H,R,Pinf,dF,dQc,dPinf,dR,X,Y):
# Evaluate marginal likelihood gradient
# State dimension, number of data points and number of parameters
n = F.shape[0]
steps = Y.shape[1]
nparam = dF.shape[2]
# Time steps
t = X.squeeze()
# Allocate space
e = 0
eg = np.zeros(nparam)
# Set up
m = np.zeros([n,1])
P = Pinf.copy()
dm = np.zeros([n,nparam])
dP = dPinf.copy()
mm = m.copy()
PP = P.copy()
# Initial dt
dt = -np.Inf
# Allocate space for expm results
AA = np.zeros([2*n, 2*n, nparam])
FF = np.zeros([2*n, 2*n])
# Loop over all observations
for k in range(0,steps):
# The previous time step
dt_old = dt;
# The time discretization step length
if k>0:
dt = t[k]-t[k-1]
else:
dt = 0
# Loop through all parameters (Kalman filter prediction step)
for j in range(0,nparam):
# Should we recalculate the matrix exponential?
if abs(dt-dt_old) > 1e-9:
# The first matrix for the matrix factor decomposition
FF[:n,:n] = F
FF[n:,:n] = dF[:,:,j]
FF[n:,n:] = F
# Solve the matrix exponential
AA[:,:,j] = linalg.expm3(FF*dt)
# Solve the differential equation
foo = AA[:,:,j].dot(np.vstack([m, dm[:,j:j+1]]))
mm = foo[:n,:]
dm[:,j:j+1] = foo[n:,:]
# The discrete-time dynamical model
if j==0:
A = AA[:n,:n,j]
Q = Pinf - A.dot(Pinf).dot(A.T)
PP = A.dot(P).dot(A.T) + Q
# The derivatives of A and Q
dA = AA[n:,:n,j]
dQ = dPinf[:,:,j] - dA.dot(Pinf).dot(A.T) \
- A.dot(dPinf[:,:,j]).dot(A.T) - A.dot(Pinf).dot(dA.T)
# The derivatives of P
dP[:,:,j] = dA.dot(P).dot(A.T) + A.dot(dP[:,:,j]).dot(A.T) \
+ A.dot(P).dot(dA.T) + dQ
# Set predicted m and P
m = mm
P = PP
# Start the Kalman filter update step and precalculate variables
S = H.dot(P).dot(H.T) + R
# We should calculate the Cholesky factor if S is a matrix
# [LS,notposdef] = chol(S,'lower');
# The Kalman filter update (S is scalar)
HtiS = H.T/S
iS = 1/S
K = P.dot(HtiS)
v = Y[:,k]-H.dot(m)
vtiS = v.T/S
# Loop through all parameters (Kalman filter update step derivative)
for j in range(0,nparam):
# Innovation covariance derivative
dS = H.dot(dP[:,:,j]).dot(H.T) + dR[:,:,j];
# Evaluate the energy derivative for j
eg[j] = eg[j] \
- .5*np.sum(iS*dS) \
+ .5*H.dot(dm[:,j:j+1]).dot(vtiS.T) \
+ .5*vtiS.dot(dS).dot(vtiS.T) \
+ .5*vtiS.dot(H.dot(dm[:,j:j+1]))
# Kalman filter update step derivatives
dK = dP[:,:,j].dot(HtiS) - P.dot(HtiS).dot(dS)/S
dm[:,j:j+1] = dm[:,j:j+1] + dK.dot(v) - K.dot(H).dot(dm[:,j:j+1])
dKSKt = dK.dot(S).dot(K.T)
dP[:,:,j] = dP[:,:,j] - dKSKt - K.dot(dS).dot(K.T) - dKSKt.T
# Evaluate the energy
# e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.diag(LS))) - .5*vtiS.dot(v);
e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.sqrt(S))) - .5*vtiS.dot(v)
# Finish Kalman filter update step
m = m + K.dot(v)
P = P - K.dot(S).dot(K.T)
# Make sure the covariances stay symmetric
P = (P+P.T)/2
dP = (dP + dP.transpose([1,0,2]))/2
# raise NameError('Debug me')
# Return the gradient
return eg
def kf_likelihood_g_notstable(self,F,L,Qc,H,R,Pinf,dF,dQc,dPinf,dR,X,Y):
# Evaluate marginal likelihood gradient
# State dimension, number of data points and number of parameters
steps = Y.shape[1]
nparam = dF.shape[2]
n = F.shape[0]
# Time steps
t = X.squeeze()
# Allocate space
e = 0
eg = np.zeros(nparam)
# Set up
Z = np.zeros(F.shape)
QC = L.dot(Qc).dot(L.T)
m = np.zeros([n,1])
P = Pinf.copy()
dm = np.zeros([n,nparam])
dP = dPinf.copy()
mm = m.copy()
PP = P.copy()
# % Initial dt
dt = -np.Inf
# Allocate space for expm results
AA = np.zeros([2*F.shape[0], 2*F.shape[0], nparam])
AAA = np.zeros([4*F.shape[0], 4*F.shape[0], nparam])
FF = np.zeros([2*F.shape[0], 2*F.shape[0]])
FFF = np.zeros([4*F.shape[0], 4*F.shape[0]])
# Loop over all observations
for k in range(0,steps):
# The previous time step
dt_old = dt;
# The time discretization step length
if k>0:
dt = t[k]-t[k-1]
else:
dt = t[1]-t[0]
# Loop through all parameters (Kalman filter prediction step)
for j in range(0,nparam):
# Should we recalculate the matrix exponential?
if abs(dt-dt_old) > 1e-9:
# The first matrix for the matrix factor decomposition
FF[:n,:n] = F
FF[n:,:n] = dF[:,:,j]
FF[n:,n:] = F
# Solve the matrix exponential
AA[:,:,j] = linalg.expm3(FF*dt)
# Solve using matrix fraction decomposition
foo = AA[:,:,j].dot(np.vstack([m, dm[:,j:j+1]]))
# Pick the parts
mm = foo[:n,:]
dm[:,j:j+1] = foo[n:,:]
# Should we recalculate the matrix exponential?
if abs(dt-dt_old) > 1e-9:
# Define W and G
W = L.dot(dQc[:,:,j]).dot(L.T)
G = dF[:,:,j];
# The second matrix for the matrix factor decomposition
FFF[:n,:n] = F
FFF[2*n:-n,:n] = G
FFF[:n, n:2*n] = QC
FFF[n:2*n, n:2*n] = -F.T
FFF[2*n:-n,n:2*n] = W
FFF[-n:, n:2*n] = -G.T
FFF[2*n:-n,2*n:-n] = F
FFF[2*n:-n,-n:] = QC
FFF[-n:,-n:] = -F.T
# Solve the matrix exponential
AAA[:,:,j] = linalg.expm3(FFF*dt)
# Solve using matrix fraction decomposition
foo = AAA[:,:,j].dot(np.vstack([P, np.eye(n), dP[:,:,j], np.zeros([n,n])]))
# Pick the parts
C = foo[:n, :]
D = foo[n:2*n, :]
dC = foo[2*n:-n,:]
dD = foo[-n:, :]
# The prediction step covariance (PP = C/D)
if j==0:
PP = linalg.solve(D.T,C.T).T
PP = (PP + PP.T)/2
# Sove dP for j (C/D == P_{k|k-1})
dP[:,:,j] = linalg.solve(D.T,(dC - PP.dot(dD)).T).T
# Set predicted m and P
m = mm
P = PP
# Start the Kalman filter update step and precalculate variables
S = H.dot(P).dot(H.T) + R
# We should calculate the Cholesky factor if S is a matrix
# [LS,notposdef] = chol(S,'lower');
# The Kalman filter update (S is scalar)
HtiS = H.T/S
iS = 1/S
K = P.dot(HtiS)
v = Y[:,k]-H.dot(m)
vtiS = v.T/S
# Loop through all parameters (Kalman filter update step derivative)
for j in range(0,nparam):
# Innovation covariance derivative
dS = H.dot(dP[:,:,j]).dot(H.T) + dR[:,:,j];
# Evaluate the energy derivative for j
eg[j] = eg[j] \
- .5*np.sum(iS*dS) \
+ .5*H.dot(dm[:,j:j+1]).dot(vtiS.T) \
+ .5*vtiS.dot(dS).dot(vtiS.T) \
+ .5*vtiS.dot(H.dot(dm[:,j:j+1]))
# Kalman filter update step derivatives
dK = dP[:,:,j].dot(HtiS) - P.dot(HtiS).dot(dS)/S
dm[:,j:j+1] = dm[:,j:j+1] + dK.dot(v) - K.dot(H).dot(dm[:,j:j+1])
dKSKt = dK.dot(S).dot(K.T)
dP[:,:,j] = dP[:,:,j] - dKSKt - K.dot(dS).dot(K.T) - dKSKt.T
# Evaluate the energy
# e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.diag(LS))) - .5*vtiS.dot(v);
e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.sqrt(S))) - .5*vtiS.dot(v)
# Finish Kalman filter update step
m = m + K.dot(v)
P = P - K.dot(S).dot(K.T)
# Make sure the covariances stay symmetric
P = (P+P.T)/2
dP = (dP + dP.transpose([1,0,2]))/2
# raise NameError('Debug me')
# Report
#print e
#print eg
# Return the gradient
return eg
def simulate(self,F,L,Qc,Pinf,X,size=1):
# Simulate a trajectory using the state space model
# Allocate space for results
f = np.zeros((F.shape[0],size,X.shape[1]))
# Initial state
f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = self.lti_disc(F,L,Qc,dt)
# Sweep through remaining time points
for k in range(1,X.shape[1]):
# Form discrete-time model
A = As[:,:,index[1-k]]
Q = Qs[:,:,index[1-k]]
# Draw the state
f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
# Return values
return f
def lti_disc(self,F,L,Qc,dt):
# Discrete-time solution to the LTI SDE
# Dimensionality
n = F.shape[0]
index = 0
# Check for numbers of time steps
if dt.flatten().shape[0]==1:
# The covariance matrix by matrix fraction decomposition
Phi = np.zeros((2*n,2*n))
Phi[:n,:n] = F
Phi[:n,n:] = L.dot(Qc).dot(L.T)
Phi[n:,n:] = -F.T
AB = linalg.expm(Phi*dt).dot(np.vstack((np.zeros((n,n)),np.eye(n))))
Q = linalg.solve(AB[n:,:].T,AB[:n,:].T)
# The dynamical model
A = linalg.expm(F*dt)
# Return
return A, Q
# Optimize for cases where time steps occur repeatedly
else:
# Time discretizations (round to 14 decimals to avoid problems)
dt, _, index = np.unique(np.round(dt,14),True,True)
# Allocate space for A and Q
A = np.empty((n,n,dt.shape[0]))
Q = np.empty((n,n,dt.shape[0]))
# Call this function for each dt
for j in range(0,dt.shape[0]):
A[:,:,j], Q[:,:,j] = self.lti_disc(F,L,Qc,dt[j])
# Return
return A, Q, index

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,964 @@
# -*- coding: utf-8 -*-
"""
Contains some cython code for state space modelling.
"""
import numpy as np
cimport numpy as np
import scipy as sp
cimport cython
#from libc.math cimport isnan # for nan checking in kalman filter cycle
cdef extern from "numpy/npy_math.h":
bint npy_isnan(double x)
DTYPE = np.float64
DTYPE_int = np.int64
ctypedef np.float64_t DTYPE_t
ctypedef np.int64_t DTYPE_int_t
# Template class for dynamic callables
cdef class Dynamic_Callables_Cython:
cpdef f_a(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] A):
raise NotImplemented("(cython) f_a is not implemented!")
cpdef Ak(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] P): # returns state iteration matrix
raise NotImplemented("(cython) Ak is not implemented!")
cpdef Qk(self, int k):
raise NotImplemented("(cython) Qk is not implemented!")
cpdef Q_srk(self, int k):
raise NotImplemented("(cython) Q_srk is not implemented!")
cpdef dAk(self, int k):
raise NotImplemented("(cython) dAk is not implemented!")
cpdef dQk(self, int k):
raise NotImplemented("(cython) dQk is not implemented!")
cpdef reset(self, bint compute_derivatives = False):
raise NotImplemented("(cython) reset is not implemented!")
# Template class for measurement callables
cdef class Measurement_Callables_Cython:
cpdef f_h(self, int k, np.ndarray[DTYPE_t, ndim=2] m_pred, np.ndarray[DTYPE_t, ndim=2] Hk):
raise NotImplemented("(cython) f_a is not implemented!")
cpdef Hk(self, int k, np.ndarray[DTYPE_t, ndim=2] m_pred, np.ndarray[DTYPE_t, ndim=2] P_pred): # returns state iteration matrix
raise NotImplemented("(cython) Hk is not implemented!")
cpdef Rk(self, int k):
raise NotImplemented("(cython) Rk is not implemented!")
cpdef R_isrk(self, int k):
raise NotImplemented("(cython) Q_srk is not implemented!")
cpdef dHk(self, int k):
raise NotImplemented("(cython) dAk is not implemented!")
cpdef dRk(self, int k):
raise NotImplemented("(cython) dQk is not implemented!")
cpdef reset(self,compute_derivatives = False):
raise NotImplemented("(cython) reset is not implemented!")
cdef class R_handling_Cython(Measurement_Callables_Cython):
"""
The calss handles noise matrix R.
"""
cdef:
np.ndarray R
np.ndarray index
int R_time_var_index
np.ndarray dR
bint svd_each_time
dict R_square_root
def __init__(self, np.ndarray[DTYPE_t, ndim=3] R, np.ndarray[DTYPE_t, ndim=2] index,
int R_time_var_index, int p_unique_R_number, np.ndarray[DTYPE_t, ndim=3] dR = None):
"""
Input:
---------------
R - array with noise on various steps. The result of preprocessing
the noise input.
index - for each step of Kalman filter contains the corresponding index
in the array.
R_time_var_index - another index in the array R. Computed earlier and passed here.
unique_R_number - number of unique noise matrices below which square roots
are cached and above which they are computed each time.
dR: 3D array[:, :, param_num]
derivative of R. Derivative is supported only when R do not change over time
Output:
--------------
Object which has two necessary functions:
f_R(k)
inv_R_square_root(k)
"""
self.R = R
self.index = index
self.R_time_var_index = R_time_var_index
self.dR = dR
cdef int unique_len = len(np.unique(index))
if (unique_len > p_unique_R_number):
self.svd_each_time = True
else:
self.svd_each_time = False
self.R_square_root = {}
cpdef Rk(self, int k):
return self.R[:,:, <int>self.index[self.R_time_var_index, k]]
cpdef dRk(self,int k):
if self.dR is None:
raise ValueError("dR derivative is None")
return self.dR # the same dirivative on each iteration
cpdef R_isrk(self, int k):
"""
Function returns the inverse square root of R matrix on step k.
"""
cdef int ind = <int>self.index[self.R_time_var_index, k]
cdef np.ndarray[DTYPE_t, ndim=2] R = self.R[:,:, ind ]
cdef np.ndarray[DTYPE_t, ndim=2] inv_square_root
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
if (R.shape[0] == 1): # 1-D case handle simplier. No storage
# of the result, just compute it each time.
inv_square_root = np.sqrt( 1.0/R )
else:
if self.svd_each_time:
U,S,Vh = sp.linalg.svd( R,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
inv_square_root = U * 1.0/np.sqrt(S)
else:
if ind in self.R_square_root:
inv_square_root = self.R_square_root[ind]
else:
U,S,Vh = sp.linalg.svd( R,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
inv_square_root = U * 1.0/np.sqrt(S)
self.R_square_root[ind] = inv_square_root
return inv_square_root
cdef class Std_Measurement_Callables_Cython(R_handling_Cython):
cdef:
np.ndarray H
int H_time_var_index
np.ndarray dH
def __init__(self, np.ndarray[DTYPE_t, ndim=3] H, int H_time_var_index,
np.ndarray[DTYPE_t, ndim=3] R, np.ndarray[DTYPE_t, ndim=2] index, int R_time_var_index,
int unique_R_number, np.ndarray[DTYPE_t, ndim=3] dH = None,
np.ndarray[DTYPE_t, ndim=3] dR=None):
super(Std_Measurement_Callables_Cython,self).__init__(R, index, R_time_var_index, unique_R_number,dR)
self.H = H
self.H_time_var_index = H_time_var_index
self.dH = dH
cpdef f_h(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] H):
"""
function (k, x_{k}, H_{k}). Measurement function.
k (iteration number), starts at 0
x_{k} state
H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
"""
return np.dot(H, m)
cpdef Hk(self, int k, np.ndarray[DTYPE_t, ndim=2] m_pred, np.ndarray[DTYPE_t, ndim=2] P_pred): # returns state iteration matrix
"""
function (k, m, P) return Jacobian of measurement function, it is
passed into p_h.
k (iteration number), starts at 0
m: point where Jacobian is evaluated
P: parameter for Jacobian, usually covariance matrix.
"""
return self.H[:,:, <int>self.index[self.H_time_var_index, k]]
cpdef dHk(self,int k):
if self.dH is None:
raise ValueError("dH derivative is None")
return self.dH # the same dirivative on each iteration
cdef class Q_handling_Cython(Dynamic_Callables_Cython):
cdef:
np.ndarray Q
np.ndarray index
int Q_time_var_index
np.ndarray dQ
dict Q_square_root
bint svd_each_time
def __init__(self, np.ndarray[DTYPE_t, ndim=3] Q, np.ndarray[DTYPE_t, ndim=2] index,
int Q_time_var_index, int p_unique_Q_number, np.ndarray[DTYPE_t, ndim=3] dQ = None):
"""
Input:
---------------
Q - array with noise on various steps. The result of preprocessing
the noise input.
index - for each step of Kalman filter contains the corresponding index
in the array.
Q_time_var_index - another index in the array R. Computed earlier and passed here.
unique_Q_number - number of unique noise matrices below which square roots
are cached and above which they are computed each time.
dQ: 3D array[:, :, param_num]
derivative of Q. Derivative is supported only when Q do not change over time
Output:
--------------
Object which has three necessary functions:
Qk(k)
dQk(k)
Q_srkt(k)
"""
self.Q = Q
self.index = index
self.Q_time_var_index = Q_time_var_index
self.dQ = dQ
cdef int unique_len = len(np.unique(index))
if (unique_len > p_unique_Q_number):
self.svd_each_time = True
else:
self.svd_each_time = False
self.Q_square_root = {}
cpdef Qk(self, int k):
"""
function (k). Returns noise matrix of dynamic model on iteration k.
k (iteration number). starts at 0
"""
return self.Q[:,:, <int>self.index[self.Q_time_var_index, k]]
cpdef dQk(self, int k):
if self.dQ is None:
raise ValueError("dQ derivative is None")
return self.dQ # the same dirivative on each iteration
cpdef Q_srk(self, int k):
"""
function (k). Returns the square root of noise matrix of dynamic model on iteration k.
k (iteration number). starts at 0
This function is implemented to use SVD prediction step.
"""
cdef int ind = <int>self.index[self.Q_time_var_index, k]
cdef np.ndarray[DTYPE_t, ndim=2] Q = self.Q[:,:, ind]
cdef np.ndarray[DTYPE_t, ndim=2] square_root
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
if (Q.shape[0] == 1): # 1-D case handle simplier. No storage
# of the result, just compute it each time.
square_root = np.sqrt( Q )
else:
if self.svd_each_time:
U,S,Vh = sp.linalg.svd( Q,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
square_root = U * np.sqrt(S)
else:
if ind in self.Q_square_root:
square_root = self.Q_square_root[ind]
else:
U,S,Vh = sp.linalg.svd( Q,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
square_root = U * np.sqrt(S)
self.Q_square_root[ind] = square_root
return square_root
cdef class Std_Dynamic_Callables_Cython(Q_handling_Cython):
cdef:
np.ndarray A
int A_time_var_index
np.ndarray dA
def __init__(self, np.ndarray[DTYPE_t, ndim=3] A, int A_time_var_index,
np.ndarray[DTYPE_t, ndim=3] Q,
np.ndarray[DTYPE_t, ndim=2] index,
int Q_time_var_index, int unique_Q_number,
np.ndarray[DTYPE_t, ndim=3] dA = None,
np.ndarray[DTYPE_t, ndim=3] dQ=None):
super(Std_Dynamic_Callables_Cython,self).__init__(Q, index, Q_time_var_index, unique_Q_number,dQ)
self.A = A
self.A_time_var_index = A_time_var_index
self.dA = dA
cpdef f_a(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] A):
"""
f_a: function (k, x_{k-1}, A_{k}). Dynamic function.
k (iteration number), starts at 0
x_{k-1} State from the previous step
A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
"""
return np.dot(A,m)
cpdef Ak(self, int k, np.ndarray[DTYPE_t, ndim=2] m_pred, np.ndarray[DTYPE_t, ndim=2] P_pred): # returns state iteration matrix
"""
function (k, m, P) return Jacobian of measurement function, it is
passed into p_h.
k (iteration number), starts at 0
m: point where Jacobian is evaluated
P: parameter for Jacobian, usually covariance matrix.
"""
return self.A[:,:, <int>self.index[self.A_time_var_index, k]]
cpdef dAk(self, int k):
if self.dA is None:
raise ValueError("dA derivative is None")
return self.dA # the same dirivative on each iteration
cpdef reset(self, bint compute_derivatives=False):
"""
For reusing this object e.g. in smoother computation. It makes sence
because necessary matrices have been already computed for all
time steps.
"""
return self
cdef class AQcompute_batch_Cython(Q_handling_Cython):
"""
Class for calculating matrices A, Q, dA, dQ of the discrete Kalman Filter
from the matrices F, L, Qc, P_ing, dF, dQc, dP_inf of the continuos state
equation. dt - time steps.
It has the same interface as AQcompute_once.
It computes matrices for all time steps. This object is used when
there are not so many (controlled by internal variable)
different time steps and storing all the matrices do not take too much memory.
Since all the matrices are computed all together, this object can be used
in smoother without repeating the computations.
"""
#def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
cdef:
np.ndarray As
np.ndarray Qs
np.ndarray dAs
np.ndarray dQs
np.ndarray reconstruct_indices
#long total_size_of_data
dict Q_svd_dict
int last_k
def __init__(self, np.ndarray[DTYPE_t, ndim=3] As, np.ndarray[DTYPE_t, ndim=3] Qs,
np.ndarray[DTYPE_int_t, ndim=1] reconstruct_indices,
np.ndarray[DTYPE_t, ndim=4] dAs=None,
np.ndarray[DTYPE_t, ndim=4] dQs=None):
"""
Constructor. All necessary parameters are passed here and stored
in the opject.
Input:
-------------------
F, L, Qc, P_inf : matrices
Parameters of corresponding continuous state model
dt: array
All time steps
compute_derivatives: bool
Whether to calculate derivatives
dP_inf, dF, dQc: 3D array
Derivatives if they are required
Output:
-------------------
"""
self.As = As
self.Qs = Qs
self.dAs = dAs
self.dQs = dQs
self.reconstruct_indices = reconstruct_indices
self.total_size_of_data = self.As.nbytes + self.Qs.nbytes +\
(self.dAs.nbytes if (self.dAs is not None) else 0) +\
(self.dQs.nbytes if (self.dQs is not None) else 0) +\
(self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
self.Q_svd_dict = {}
self.last_k = 0
# !!!Print statistics! Which object is created
# !!!Print statistics! Print sizes of matrices
cpdef f_a(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] A):
"""
Dynamic model
"""
return np.dot(A, m) # default dynamic model
cpdef reset(self, bint compute_derivatives=False):
"""
For reusing this object e.g. in smoother computation. It makes sence
because necessary matrices have been already computed for all
time steps.
"""
return self
cpdef Ak(self,int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] P):
self.last_k = k
return self.As[:,:, <int>self.reconstruct_indices[k]]
cpdef Qk(self,int k):
self.last_k = k
return self.Qs[:,:, <int>self.reconstruct_indices[k]]
cpdef dAk(self, int k):
self.last_k = k
return self.dAs[:,:, :, <int>self.reconstruct_indices[k]]
cpdef dQk(self, int k):
self.last_k = k
return self.dQs[:,:, :, <int>self.reconstruct_indices[k]]
cpdef Q_srk(self, int k):
"""
Square root of the noise matrix Q
"""
cdef int matrix_index = <int>self.reconstruct_indices[k]
cdef np.ndarray[DTYPE_t, ndim=2] square_root
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
if matrix_index in self.Q_svd_dict:
square_root = self.Q_svd_dict[matrix_index]
else:
U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index],
full_matrices=False, compute_uv=True,
overwrite_a=False, check_finite=False)
square_root = U * np.sqrt(S)
self.Q_svd_dict[matrix_index] = square_root
return square_root
# def return_last(self):
# """
# Function returns last available matrices.
# """
#
# if (self.last_k is None):
# raise ValueError("Matrices are not computed.")
# else:
# ind = self.reconstruct_indices[self.last_k]
# A = self.As[:,:, ind]
# Q = self.Qs[:,:, ind]
# dA = self.dAs[:,:, :, ind]
# dQ = self.dQs[:,:, :, ind]
#
# return self.last_k, A, Q, dA, dQ
@cython.boundscheck(False)
def _kalman_prediction_step_SVD_Cython(long k, np.ndarray[DTYPE_t, ndim=2] p_m , tuple p_P,
Dynamic_Callables_Cython p_dynamic_callables,
bint calc_grad_log_likelihood=False,
np.ndarray[DTYPE_t, ndim=3] p_dm = None,
np.ndarray[DTYPE_t, ndim=3] p_dP = None):
"""
Desctrete prediction function
Input:
k:int
Iteration No. Starts at 0. Total number of iterations equal to the
number of measurements.
p_m: matrix of size (state_dim, time_series_no)
Mean value from the previous step. For "multiple time series mode"
it is matrix, second dimension of which correspond to different
time series.
p_P: tuple (Prev_cov, S, V)
Covariance matrix from the previous step and its SVD decomposition.
Prev_cov = V * S * V.T The tuple is (Prev_cov, S, V)
p_a: function (k, x_{k-1}, A_{k}). Dynamic function.
k (iteration number), starts at 0
x_{k-1} State from the previous step
A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
p_f_A: function (k, m, P) return Jacobian of dynamic function, it is
passed into p_a.
k (iteration number), starts at 0
m: point where Jacobian is evaluated
P: parameter for Jacobian, usually covariance matrix.
p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k.
k (iteration number). starts at 0
p_f_Qsr: function (k). Returns square root of noise matrix of the
dynamic model on iteration k. k (iteration number). starts at 0
calc_grad_log_likelihood: boolean
Whether to calculate gradient of the marginal likelihood
of the state-space model. If true then the next parameter must
provide the extra parameters for gradient calculation.
p_dm: 3D array (state_dim, time_series_no, parameters_no)
Mean derivatives from the previous step. For "multiple time series mode"
it is 3D array, second dimension of which correspond to different
time series.
p_dP: 3D array (state_dim, state_dim, parameters_no)
Mean derivatives from the previous step
grad_calc_params_1: List or None
List with derivatives. The first component is 'f_dA' - function(k)
which returns the derivative of A. The second element is 'f_dQ'
- function(k). Function which returns the derivative of Q.
Output:
----------------------------
m_pred, P_pred, dm_pred, dP_pred: metrices, 3D objects
Results of the prediction steps.
"""
# covariance from the previous step# p_prev_cov = v * S * V.T
cdef np.ndarray[DTYPE_t, ndim=2] Prev_cov = p_P[0]
cdef np.ndarray[DTYPE_t, ndim=1] S_old = p_P[1]
cdef np.ndarray[DTYPE_t, ndim=2] V_old = p_P[2]
#p_prev_cov_tst = np.dot(p_V, (p_S * p_V).T) # reconstructed covariance from the previous step
# index correspond to values from previous iteration.
cdef np.ndarray[DTYPE_t, ndim=2] A = p_dynamic_callables.Ak(k,p_m,Prev_cov) # state transition matrix (or Jacobian)
cdef np.ndarray[DTYPE_t, ndim=2] Q = p_dynamic_callables.Qk(k) # state noise matrx. This is necessary for the square root calculation (next step)
cdef np.ndarray[DTYPE_t, ndim=2] Q_sr = p_dynamic_callables.Q_srk(k)
# Prediction step ->
cdef np.ndarray[DTYPE_t, ndim=2] m_pred = p_dynamic_callables.f_a(k, p_m, A) # predicted mean
# coavariance prediction have changed:
cdef np.ndarray[DTYPE_t, ndim=2] svd_1_matr = np.vstack( ( (np.sqrt(S_old)* np.dot(A,V_old)).T , Q_sr.T) )
res = sp.linalg.svd( svd_1_matr,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
# (U,S,Vh)
cdef np.ndarray[DTYPE_t, ndim=2] U = res[0]
cdef np.ndarray[DTYPE_t, ndim=1] S = res[1]
cdef np.ndarray[DTYPE_t, ndim=2] Vh = res[2]
# predicted variance computed by the regular method. For testing
#P_pred_tst = A.dot(Prev_cov).dot(A.T) + Q
cdef np.ndarray[DTYPE_t, ndim=2] V_new = Vh.T
cdef np.ndarray[DTYPE_t, ndim=1] S_new = S**2
cdef np.ndarray[DTYPE_t, ndim=2] P_pred = np.dot(V_new * S_new, V_new.T) # prediction covariance
#tuple P_pred = (P_pred, S_new, Vh.T)
# Prediction step <-
# derivatives
cdef np.ndarray[DTYPE_t, ndim=3] dA_all_params
cdef np.ndarray[DTYPE_t, ndim=3] dQ_all_params
cdef np.ndarray[DTYPE_t, ndim=3] dm_pred
cdef np.ndarray[DTYPE_t, ndim=3] dP_pred
cdef int param_number
cdef int j
cdef tuple ret
cdef np.ndarray[DTYPE_t, ndim=2] dA
cdef np.ndarray[DTYPE_t, ndim=2] dQ
if calc_grad_log_likelihood:
dA_all_params = p_dynamic_callables.dAk(k) # derivatives of A wrt parameters
dQ_all_params = p_dynamic_callables.dQk(k) # derivatives of Q wrt parameters
param_number = p_dP.shape[2]
# p_dm, p_dP - derivatives form the previoius step
dm_pred = np.empty((p_dm.shape[0], p_dm.shape[1], p_dm.shape[2]), dtype = DTYPE)
dP_pred = np.empty((p_dP.shape[0], p_dP.shape[1], p_dP.shape[2]), dtype = DTYPE)
for j in range(param_number):
dA = dA_all_params[:,:,j]
dQ = dQ_all_params[:,:,j]
dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, p_dm[:,:,j])
# prediction step derivatives for current parameter:
dP_pred[:,:,j] = np.dot( dA ,np.dot(Prev_cov, A.T))
dP_pred[:,:,j] += dP_pred[:,:,j].T
dP_pred[:,:,j] += np.dot( A ,np.dot( p_dP[:,:,j] , A.T)) + dQ
dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize
else:
dm_pred = None
dP_pred = None
ret = (P_pred, S_new, Vh.T)
return m_pred, ret, dm_pred, dP_pred
@cython.boundscheck(False)
def _kalman_update_step_SVD_Cython(long k, np.ndarray[DTYPE_t, ndim=2] p_m, tuple p_P,
Measurement_Callables_Cython p_measurement_callables,
np.ndarray[DTYPE_t, ndim=2] measurement,
bint calc_log_likelihood= False,
bint calc_grad_log_likelihood=False,
np.ndarray[DTYPE_t, ndim=3] p_dm = None,
np.ndarray[DTYPE_t, ndim=3] p_dP = None):
"""
Input:
k: int
Iteration No. Starts at 0. Total number of iterations equal to the
number of measurements.
m_P: matrix of size (state_dim, time_series_no)
Mean value from the previous step. For "multiple time series mode"
it is matrix, second dimension of which correspond to different
time series.
p_P: tuple (P_pred, S, V)
Covariance matrix from the prediction step and its SVD decomposition.
P_pred = V * S * V.T The tuple is (P_pred, S, V)
p_h: function (k, x_{k}, H_{k}). Measurement function.
k (iteration number), starts at 0
x_{k} state
H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
p_f_H: function (k, m, P) return Jacobian of dynamic function, it is
passed into p_h.
k (iteration number), starts at 0
m: point where Jacobian is evaluated
P: parameter for Jacobian, usually covariance matrix.
p_f_R: function (k). Returns noise matrix of measurement equation
on iteration k.
k (iteration number). starts at 0
p_f_iRsr: function (k). Returns the square root of the noise matrix of
measurement equation on iteration k.
k (iteration number). starts at 0
measurement: (measurement_dim, time_series_no) matrix
One measurement used on the current update step. For
"multiple time series mode" it is matrix, second dimension of
which correspond to different time series.
calc_log_likelihood: boolean
Whether to calculate marginal likelihood of the state-space model.
calc_grad_log_likelihood: boolean
Whether to calculate gradient of the marginal likelihood
of the state-space model. If true then the next parameter must
provide the extra parameters for gradient calculation.
p_dm: 3D array (state_dim, time_series_no, parameters_no)
Mean derivatives from the prediction step. For "multiple time series mode"
it is 3D array, second dimension of which correspond to different
time series.
p_dP: array
Covariance derivatives from the prediction step.
grad_calc_params_2: List or None
List with derivatives. The first component is 'f_dH' - function(k)
which returns the derivative of H. The second element is 'f_dR'
- function(k). Function which returns the derivative of R.
Output:
----------------------------
m_upd, P_upd, dm_upd, dP_upd: metrices, 3D objects
Results of the prediction steps.
log_likelihood_update: double or 1D array
Update to the log_likelihood from this step
d_log_likelihood_update: (grad_params_no, time_series_no) matrix
Update to the gradient of log_likelihood, "multiple time series mode"
adds extra columns to the gradient.
"""
cdef np.ndarray[DTYPE_t, ndim=2] m_pred = p_m # from prediction step
#P_pred,S_pred,V_pred = p_P # from prediction step
cdef np.ndarray[DTYPE_t, ndim=2] P_pred = p_P[0]
cdef np.ndarray[DTYPE_t, ndim=1] S_pred = p_P[1]
cdef np.ndarray[DTYPE_t, ndim=2] V_pred = p_P[2]
cdef np.ndarray[DTYPE_t, ndim=2] H = p_measurement_callables.Hk(k, m_pred, P_pred)
cdef np.ndarray[DTYPE_t, ndim=2] R = p_measurement_callables.Rk(k)
cdef np.ndarray[DTYPE_t, ndim=2] R_isr =p_measurement_callables.R_isrk(k) # square root of the inverse of R matrix
cdef int time_series_no = p_m.shape[1] # number of time serieses
cdef np.ndarray[DTYPE_t, ndim=2] log_likelihood_update # log_likelihood_update=None;
# Update step (only if there is data)
#if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other.
cdef np.ndarray[DTYPE_t, ndim=2] v = measurement-p_measurement_callables.f_h(k, m_pred, H)
cdef np.ndarray[DTYPE_t, ndim=2] svd_2_matr = np.vstack( ( np.dot( R_isr.T, np.dot(H, V_pred)) , np.diag( 1.0/np.sqrt(S_pred) ) ) )
res = sp.linalg.svd( svd_2_matr,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
#(U,S,Vh)
cdef np.ndarray[DTYPE_t, ndim=2] U = res[0]
cdef np.ndarray[DTYPE_t, ndim=1] S_svd = res[1]
cdef np.ndarray[DTYPE_t, ndim=2] Vh = res[2]
# P_upd = U_upd S_upd**2 U_upd.T
cdef np.ndarray[DTYPE_t, ndim=2] U_upd = np.dot(V_pred, Vh.T)
cdef np.ndarray[DTYPE_t, ndim=1] S_upd = (1.0/S_svd)**2
cdef np.ndarray[DTYPE_t, ndim=2] P_upd = np.dot(U_upd * S_upd, U_upd.T) # update covariance
#P_upd = (P_upd,S_upd,U_upd) # tuple to pass to the next step
# stil need to compute S and K for derivative computation
cdef np.ndarray[DTYPE_t, ndim=2] S = H.dot(P_pred).dot(H.T) + R
cdef np.ndarray[DTYPE_t, ndim=2] K
cdef bint measurement_dim_gt_one = False
if measurement.shape[0]==1: # measurements are one dimensional
if (S < 0):
raise ValueError("Kalman Filter Update SVD: S is negative step %i" % k )
#import pdb; pdb.set_trace()
K = P_pred.dot(H.T) / S
if calc_log_likelihood:
log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) +
v*v / S)
#log_likelihood_update = log_likelihood_update[0,0] # to make int
if np.any(np.isnan(log_likelihood_update)): # some member in P_pred is None.
raise ValueError("Nan values in likelihood update!")
else:
log_likelihood_update = None
#LL = None; islower = None
else:
measurement_dim_gt_one = True
raise ValueError("""Measurement dimension larger then 1 is currently not supported""")
# Old method of computing updated covariance (for testing) ->
#P_upd_tst = K.dot(S).dot(K.T)
#P_upd_tst = 0.5*(P_upd_tst + P_upd_tst.T)
#P_upd_tst = P_pred - P_upd_tst# this update matrix is symmetric
# Old method of computing updated covariance (for testing) <-
cdef np.ndarray[DTYPE_t, ndim=3] dm_upd # dm_upd=None;
cdef np.ndarray[DTYPE_t, ndim=3] dP_upd # dP_upd=None;
cdef np.ndarray[DTYPE_t, ndim=2] d_log_likelihood_update # d_log_likelihood_update=None
cdef np.ndarray[DTYPE_t, ndim=3] dm_pred_all_params
cdef np.ndarray[DTYPE_t, ndim=3] dP_pred_all_params
cdef int param_number
cdef np.ndarray[DTYPE_t, ndim=3] dH_all_params
cdef np.ndarray[DTYPE_t, ndim=3] dR_all_params
cdef int param
cdef np.ndarray[DTYPE_t, ndim=2] dH, dR, dm_pred, dP_pred, dv, dS, tmp1, tmp2, tmp3, dK, tmp5
cdef tuple ret
if calc_grad_log_likelihood:
dm_pred_all_params = p_dm # derivativas of the prediction phase
dP_pred_all_params = p_dP
param_number = p_dP.shape[2]
dH_all_params = p_measurement_callables.dHk(k)
dR_all_params = p_measurement_callables.dRk(k)
dm_upd = np.empty((dm_pred_all_params.shape[0], dm_pred_all_params.shape[1], dm_pred_all_params.shape[2]), dtype = DTYPE)
dP_upd = np.empty((dP_pred_all_params.shape[0], dP_pred_all_params.shape[1], dP_pred_all_params.shape[2]), dtype = DTYPE)
# firts dimension parameter_no, second - time series number
d_log_likelihood_update = np.empty((param_number,time_series_no), dtype = DTYPE)
for param in range(param_number):
dH = dH_all_params[:,:,param]
dR = dR_all_params[:,:,param]
dm_pred = dm_pred_all_params[:,:,param]
dP_pred = dP_pred_all_params[:,:,param]
# Terms in the likelihood derivatives
dv = - np.dot( dH, m_pred) - np.dot( H, dm_pred)
dS = np.dot(dH, np.dot( P_pred, H.T))
dS += dS.T
dS += np.dot(H, np.dot( dP_pred, H.T)) + dR
# TODO: maybe symmetrize dS
tmp1 = H.T / S
tmp2 = dH.T / S
tmp3 = dS.T / S
dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \
np.dot( P_pred, np.dot( tmp1, tmp3 ) )
# terms required for the next step, save this for each parameter
dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv)
dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T))
dP_upd[:,:,param] += dP_upd[:,:,param].T
dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T))
dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize
# computing the likelihood change for each parameter:
tmp5 = v / S
d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \
np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) )
# Compute the actual updates for mean of the states. Variance update
# is computed earlier.
else:
dm_upd = None
dP_upd = None
d_log_likelihood_update = None
m_upd = m_pred + K.dot( v )
ret = (P_upd,S_upd,U_upd)
return m_upd, ret, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update
@cython.boundscheck(False)
def _cont_discr_kalman_filter_raw_Cython(int state_dim, Dynamic_Callables_Cython p_dynamic_callables,
Measurement_Callables_Cython p_measurement_callables, X, Y,
np.ndarray[DTYPE_t, ndim=2] m_init=None, np.ndarray[DTYPE_t, ndim=2] P_init=None,
p_kalman_filter_type='regular',
bint calc_log_likelihood=False,
bint calc_grad_log_likelihood=False,
int grad_params_no=0,
np.ndarray[DTYPE_t, ndim=3] dm_init=None,
np.ndarray[DTYPE_t, ndim=3] dP_init=None):
cdef int steps_no = Y.shape[0] # number of steps in the Kalman Filter
cdef int time_series_no = Y.shape[2] # multiple time series mode
# Allocate space for results
# Mean estimations. Initial values will be included
cdef np.ndarray[DTYPE_t, ndim=3] M = np.empty(((steps_no+1),state_dim,time_series_no), dtype=DTYPE)
M[0,:,:] = m_init # Initialize mean values
# Variance estimations. Initial values will be included
cdef np.ndarray[DTYPE_t, ndim=3] P = np.empty(((steps_no+1),state_dim,state_dim))
P_init = 0.5*( P_init + P_init.T) # symmetrize initial covariance. In some ustable cases this is uiseful
P[0,:,:] = P_init # Initialize initial covariance matrix
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
U,S,Vh = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
S[ (S==0) ] = 1e-17 # allows to run algorithm for singular initial variance
cdef tuple P_upd = (P_init, S,U)
#log_likelihood = 0
#grad_log_likelihood = np.zeros((grad_params_no,1))
cdef np.ndarray[DTYPE_t, ndim=2] log_likelihood = np.zeros((1, time_series_no), dtype = DTYPE) #if calc_log_likelihood else None
cdef np.ndarray[DTYPE_t, ndim=2] grad_log_likelihood = np.zeros((grad_params_no, time_series_no), dtype = DTYPE) #if calc_grad_log_likelihood else None
#setting initial values for derivatives update
cdef np.ndarray[DTYPE_t, ndim=3] dm_upd = dm_init
cdef np.ndarray[DTYPE_t, ndim=3] dP_upd = dP_init
# Main loop of the Kalman filter
cdef np.ndarray[DTYPE_t, ndim=2] prev_mean, k_measurment
cdef np.ndarray[DTYPE_t, ndim=2] m_pred, m_upd
cdef tuple P_pred
cdef np.ndarray[DTYPE_t, ndim=3] dm_pred, dP_pred
cdef np.ndarray[DTYPE_t, ndim=2] log_likelihood_update, d_log_likelihood_update
cdef int k
#print "Hi I am cython"
for k in range(0,steps_no):
# In this loop index for new estimations is (k+1), old - (k)
# This happened because initial values are stored at 0-th index.
#import pdb; pdb.set_trace()
prev_mean = M[k,:,:] # mean from the previous step
m_pred, P_pred, dm_pred, dP_pred = \
_kalman_prediction_step_SVD_Cython(k, prev_mean ,P_upd, p_dynamic_callables,
calc_grad_log_likelihood, dm_upd, dP_upd)
k_measurment = Y[k,:,:]
if (np.any(np.isnan(k_measurment)) == False):
# if np.any(np.isnan(k_measurment)):
# raise ValueError("Nan measurements are currently not supported")
m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
_kalman_update_step_SVD_Cython(k, m_pred , P_pred, p_measurement_callables,
k_measurment, calc_log_likelihood=calc_log_likelihood,
calc_grad_log_likelihood=calc_grad_log_likelihood,
p_dm = dm_pred, p_dP = dP_pred)
else:
if not np.all(np.isnan(k_measurment)):
raise ValueError("""Nan measurements are currently not supported if
they are intermixed with not NaN measurements""")
else:
m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred
if calc_log_likelihood:
log_likelihood_update = np.zeros((1,time_series_no))
if calc_grad_log_likelihood:
d_log_likelihood_update = np.zeros((grad_params_no,time_series_no))
if calc_log_likelihood:
log_likelihood += log_likelihood_update
if calc_grad_log_likelihood:
grad_log_likelihood += d_log_likelihood_update
M[k+1,:,:] = m_upd # separate mean value for each time series
P[k+1,:,:] = P_upd[0]
return (M, P, log_likelihood, grad_log_likelihood, p_dynamic_callables.reset(False))

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,424 @@
# Copyright (c) 2013, Arno Solin.
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#
# This implementation of converting GPs to state space models is based on the article:
#
# @article{Sarkka+Solin+Hartikainen:2013,
# author = {Simo S\"arkk\"a and Arno Solin and Jouni Hartikainen},
# year = {2013},
# title = {Spatiotemporal learning via infinite-dimensional {B}ayesian filtering and smoothing},
# journal = {IEEE Signal Processing Magazine},
# volume = {30},
# number = {4},
# pages = {51--61}
# }
#
import numpy as np
from scipy import stats
from .. import likelihoods
#from . import state_space_setup as ss_setup
from ..core import Model
from . import state_space_main as ssm
from . import state_space_setup as ss_setup
class StateSpace(Model):
def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, name='StateSpace'):
super(StateSpace, self).__init__(name=name)
if len(X.shape) == 1:
X = np.atleast_2d(X).T
self.num_data, self.input_dim = X.shape
if len(Y.shape) == 1:
Y = np.atleast_2d(Y).T
assert self.input_dim==1, "State space methods are only for 1D data"
if len(Y.shape)==2:
num_data_Y, self.output_dim = Y.shape
ts_number = None
elif len(Y.shape)==3:
num_data_Y, self.output_dim, ts_number = Y.shape
self.ts_number = ts_number
assert num_data_Y == self.num_data, "X and Y data don't match"
assert self.output_dim == 1, "State space methods are for single outputs only"
self.kalman_filter_type = kalman_filter_type
#self.kalman_filter_type = 'svd' # temp test
ss_setup.use_cython = use_cython
#import pdb; pdb.set_trace()
global ssm
#from . import state_space_main as ssm
if (ssm.cython_code_available) and (ssm.use_cython != ss_setup.use_cython):
reload(ssm)
# Make sure the observations are ordered in time
sort_index = np.argsort(X[:,0])
self.X = X[sort_index]
self.Y = Y[sort_index]
# Noise variance
self.likelihood = likelihoods.Gaussian(variance=noise_var)
# Default kernel
if kernel is None:
raise ValueError("State-Space Model: the kernel must be provided.")
else:
self.kern = kernel
self.link_parameter(self.kern)
self.link_parameter(self.likelihood)
self.posterior = None
# Assert that the kernel is supported
if not hasattr(self.kern, 'sde'):
raise NotImplementedError('SDE must be implemented for the kernel being used')
#assert self.kern.sde() not False, "This kernel is not supported for state space estimation"
def parameters_changed(self):
"""
Parameters have now changed
"""
#np.set_printoptions(16)
#print(self.param_array)
#import pdb; pdb.set_trace()
# Get the model matrices from the kernel
(F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
# necessary parameters
measurement_dim = self.output_dim
grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
# add measurement noise as a parameter and get the gradient matrices
dF = np.zeros([dFt.shape[0],dFt.shape[1],grad_params_no])
dQc = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no])
dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no])
dP0 = np.zeros([dP0t.shape[0],dP0t.shape[1],grad_params_no])
# Assign the values for the kernel function
dF[:,:,:-1] = dFt
dQc[:,:,:-1] = dQct
dP_inf[:,:,:-1] = dP_inft
dP0[:,:,:-1] = dP0t
# The sigma2 derivative
dR = np.zeros([measurement_dim,measurement_dim,grad_params_no])
dR[:,:,-1] = np.eye(measurement_dim)
# Balancing
#(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
# Use the Kalman filter to evaluate the likelihood
grad_calc_params = {}
grad_calc_params['dP_inf'] = dP_inf
grad_calc_params['dF'] = dF
grad_calc_params['dQc'] = dQc
grad_calc_params['dR'] = dR
grad_calc_params['dP_init'] = dP0
kalman_filter_type = self.kalman_filter_type
# The following code is required because sometimes the shapes of self.Y
# becomes 3D even though is must be 2D. The reason is undescovered.
Y = self.Y
if self.ts_number is None:
Y.shape = (self.num_data,1)
else:
Y.shape = (self.num_data,1,self.ts_number)
(filter_means, filter_covs, log_likelihood,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
float(self.Gaussian_noise.variance),P_inf,self.X,Y,m_init=None,
P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=grad_params_no,
grad_calc_params=grad_calc_params)
if np.any( np.isfinite(log_likelihood) == False):
#import pdb; pdb.set_trace()
print("State-Space: NaN valkues in the log_likelihood")
if np.any( np.isfinite(grad_log_likelihood) == False):
#import pdb; pdb.set_trace()
print("State-Space: NaN valkues in the grad_log_likelihood")
#print(grad_log_likelihood)
grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
grad_log_likelihood_sum.shape = (grad_log_likelihood_sum.shape[0],1)
self._log_marginal_likelihood = np.sum( log_likelihood,axis=1 )
self.likelihood.update_gradients(grad_log_likelihood_sum[-1,0])
self.kern.sde_update_gradient_full(grad_log_likelihood_sum[:-1,0])
def log_likelihood(self):
return self._log_marginal_likelihood
def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, **kw):
"""
Performs the actual prediction for new X points.
Inner function. It is called only from inside this class.
Input:
---------------------
Xnews: vector or (n_points,1) matrix
New time points where to evaluate predictions.
Ynews: (n_train_points, ts_no) matrix
This matrix can substitude the original training points (in order
to use only the parameters of the model).
filteronly: bool
Use only Kalman Filter for prediction. In this case the output does
not coincide with corresponding Gaussian process.
Output:
--------------------
m: vector
Mean prediction
V: vector
Variance in every point
"""
# Set defaults
if Ynew is None:
Ynew = self.Y
# Make a single matrix containing training and testing points
if Xnew is not None:
X = np.vstack((self.X, Xnew))
Y = np.vstack((Ynew, np.nan*np.zeros(Xnew.shape)))
predict_only_training = False
else:
X = self.X
Y = Ynew
predict_only_training = True
# Sort the matrix (save the order)
_, return_index, return_inverse = np.unique(X,True,True)
X = X[return_index] # TODO they are not used
Y = Y[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
state_dim = F.shape[0]
#Y = self.Y[:, 0,0]
# Run the Kalman filter
#import pdb; pdb.set_trace()
kalman_filter_type = self.kalman_filter_type
(M, P, log_likelihood,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(
F,L,Qc,H,float(self.Gaussian_noise.variance),P_inf,X,Y,m_init=None,
P_init=P0, p_kalman_filter_type = kalman_filter_type,
calc_log_likelihood=False,
calc_grad_log_likelihood=False)
# (filter_means, filter_covs, log_likelihood,
# grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
# float(self.Gaussian_noise.variance),P_inf,self.X,self.Y,m_init=None,
# P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True,
# calc_grad_log_likelihood=True,
# grad_params_no=grad_params_no,
# grad_calc_params=grad_calc_params)
# Run the Rauch-Tung-Striebel smoother
if not filteronly:
(M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P,
p_dynamic_callables=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc)
# remove initial values
M = M[1:,:,:]
P = P[1:,:,:]
# Put the data back in the original order
M = M[return_inverse,:,:]
P = P[return_inverse,:,:]
# Only return the values for Xnew
if not predict_only_training:
M = M[self.num_data:,:,:]
P = P[self.num_data:,:,:]
# Calculate the mean and variance
# after einsum m has dimension in 3D (sample_num, dim_no,time_series_no)
m = np.einsum('ijl,kj', M, H)# np.dot(M,H.T)
m.shape = (m.shape[0], m.shape[1]) # remove the third dimension
V = np.einsum('ij,ajk,kl', H, P, H.T)
V.shape = (V.shape[0], V.shape[1]) # remove the third dimension
# Return the posterior of the state
return (m, V)
def predict(self, Xnew=None, filteronly=False, include_likelihood=True, **kw):
# Run the Kalman filter to get the state
(m, V) = self._raw_predict(Xnew,filteronly=filteronly)
# Add the noise variance to the state variance
if include_likelihood:
V += float(self.likelihood.variance)
# Lower and upper bounds
#lower = m - 2*np.sqrt(V)
#upper = m + 2*np.sqrt(V)
# Return mean and variance
return m, V
def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), **kw):
mu, var = self._raw_predict(Xnew)
#import pdb; pdb.set_trace()
return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
# def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
# ax=None, resolution=None, plot_raw=False, plot_filter=False,
# linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
#
# # Deal with optional parameters
# if ax is None:
# fig = pb.figure(num=fignum)
# ax = fig.add_subplot(111)
#
# # Define the frame on which to plot
# resolution = resolution or 200
# Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
#
# # Make a prediction on the frame and plot it
# if plot_raw:
# m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
# lower = m - 2*np.sqrt(v)
# upper = m + 2*np.sqrt(v)
# Y = self.Y
# else:
# m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
# Y = self.Y
#
# # Plot the values
# gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
# ax.plot(self.X, self.Y, 'kx', mew=1.5)
#
# # Optionally plot some samples
# if samples:
# if plot_raw:
# Ysim = self.posterior_samples_f(Xgrid, samples)
# else:
# Ysim = self.posterior_samples(Xgrid, samples)
# for yi in Ysim.T:
# ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
#
# # Set the limits of the plot to some sensible values
# ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
# ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
# ax.set_xlim(xmin, xmax)
# ax.set_ylim(ymin, ymax)
#
# def prior_samples_f(self,X,size=10):
#
# # Sort the matrix (save the order)
# (_, return_index, return_inverse) = np.unique(X,True,True)
# X = X[return_index]
#
# # Get the model matrices from the kernel
# (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
#
# # Allocate space for results
# Y = np.empty((size,X.shape[0]))
#
# # Simulate random draws
# #for j in range(0,size):
# # Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
# Y = self.simulate(F,L,Qc,Pinf,X.T,size)
#
# # Only observations
# Y = np.tensordot(H[0],Y,(0,0))
#
# # Reorder simulated values
# Y = Y[:,return_inverse]
#
# # Return trajectory
# return Y.T
#
# def posterior_samples_f(self,X,size=10):
#
# # Sort the matrix (save the order)
# (_, return_index, return_inverse) = np.unique(X,True,True)
# X = X[return_index]
#
# # Get the model matrices from the kernel
# (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
#
# # Run smoother on original data
# (m,V) = self.predict_raw(X)
#
# # Simulate random draws from the GP prior
# y = self.prior_samples_f(np.vstack((self.X, X)),size)
#
# # Allocate space for sample trajectories
# Y = np.empty((size,X.shape[0]))
#
# # Run the RTS smoother on each of these values
# for j in range(0,size):
# yobs = y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
# (m2,V2) = self.predict_raw(X,Ynew=yobs)
# Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
#
# # Reorder simulated values
# Y = Y[:,return_inverse]
#
# # Return posterior sample trajectories
# return Y.T
#
# def posterior_samples(self, X, size=10):
#
# # Make samples of f
# Y = self.posterior_samples_f(X,size)
#
# # Add noise
# Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
#
# # Return trajectory
# return Y
#
#
# def simulate(self,F,L,Qc,Pinf,X,size=1):
# # Simulate a trajectory using the state space model
#
# # Allocate space for results
# f = np.zeros((F.shape[0],size,X.shape[1]))
#
# # Initial state
# f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
#
# # Time step lengths
# dt = np.empty(X.shape)
# dt[:,0] = X[:,1]-X[:,0]
# dt[:,1:] = np.diff(X)
#
# # Solve the LTI SDE for these time steps
# As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
#
# # Sweep through remaining time points
# for k in range(1,X.shape[1]):
#
# # Form discrete-time model
# A = As[:,:,index[1-k]]
# Q = Qs[:,:,index[1-k]]
#
# # Draw the state
# f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
#
# # Return values
# return f

View file

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
This module is intended for the setup of state_space_main module.
The need of this module appeared because of the way state_space_main module
connected with cython code.
"""
use_cython = False

View file

@ -104,4 +104,4 @@ cdict_Alu = {'red' :((0./5,colorsRGB['Aluminium1'][0]/256.,colorsRGB['Aluminium1
(2./5,colorsRGB['Aluminium3'][2]/256.,colorsRGB['Aluminium3'][2]/256.), (2./5,colorsRGB['Aluminium3'][2]/256.,colorsRGB['Aluminium3'][2]/256.),
(3./5,colorsRGB['Aluminium4'][2]/256.,colorsRGB['Aluminium4'][2]/256.), (3./5,colorsRGB['Aluminium4'][2]/256.,colorsRGB['Aluminium4'][2]/256.),
(4./5,colorsRGB['Aluminium5'][2]/256.,colorsRGB['Aluminium5'][2]/256.), (4./5,colorsRGB['Aluminium5'][2]/256.,colorsRGB['Aluminium5'][2]/256.),
(5./5,colorsRGB['Aluminium6'][2]/256.,colorsRGB['Aluminium6'][2]/256.))} (5./5,colorsRGB['Aluminium6'][2]/256.,colorsRGB['Aluminium6'][2]/256.))}

View file

@ -52,6 +52,17 @@ def inject_plotting():
GP.plot_f = gpy_plot.gp_plots.plot_f GP.plot_f = gpy_plot.gp_plots.plot_f
GP.plot_magnification = gpy_plot.latent_plots.plot_magnification GP.plot_magnification = gpy_plot.latent_plots.plot_magnification
from ..models import StateSpace
StateSpace.plot_data = gpy_plot.data_plots.plot_data
StateSpace.plot_data_error = gpy_plot.data_plots.plot_data_error
StateSpace.plot_errorbars_trainset = gpy_plot.data_plots.plot_errorbars_trainset
StateSpace.plot_mean = gpy_plot.gp_plots.plot_mean
StateSpace.plot_confidence = gpy_plot.gp_plots.plot_confidence
StateSpace.plot_density = gpy_plot.gp_plots.plot_density
StateSpace.plot_samples = gpy_plot.gp_plots.plot_samples
StateSpace.plot = gpy_plot.gp_plots.plot
StateSpace.plot_f = gpy_plot.gp_plots.plot_f
from ..core import SparseGP from ..core import SparseGP
SparseGP.plot_inducing = gpy_plot.data_plots.plot_inducing SparseGP.plot_inducing = gpy_plot.data_plots.plot_inducing
@ -107,4 +118,4 @@ try:
lib = config.get('plotting', 'library') lib = config.get('plotting', 'library')
change_plotting_library(lib) change_plotting_library(lib)
except NoOptionError: except NoOptionError:
print("No plotting library was specified in config file. \n{}".format(error_suggestion)) print("No plotting library was specified in config file. \n{}".format(error_suggestion))

View file

@ -235,8 +235,6 @@ def plot_density(self, plot_limits=None, fixed_inputs=None,
Give the Y_metadata in the predict_kw if you need it. Give the Y_metadata in the predict_kw if you need it.
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array :type plot_limits: np.array
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input dimension i should be set to value v. :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input dimension i should be set to value v.
@ -420,4 +418,4 @@ def _plot(self, canvas, plots, helper_data, helper_prediction, levels, plot_indu
if helper_prediction[2] is not None: if helper_prediction[2] is not None:
plots.update(_plot_samples(self, canvas, helper_data, helper_prediction, projection, "Samples")) plots.update(_plot_samples(self, canvas, helper_data, helper_prediction, projection, "Samples"))
return plots return plots

View file

@ -140,4 +140,4 @@ def plot_covariance(kernel, x=None, label=None,
return pl().add_to_canvas(canvas, plots) return pl().add_to_canvas(canvas, plots)
else: else:
raise NotImplementedError("Cannot plot a kernel with more than two input dimensions") raise NotImplementedError("Cannot plot a kernel with more than two input dimensions")

View file

@ -131,7 +131,9 @@ def plot_latent_inducing(self,
Z = self.Z.values Z = self.Z.values
labels = np.array(['inducing'] * Z.shape[0]) labels = np.array(['inducing'] * Z.shape[0])
scatters = _plot_latent_scatter(canvas, Z, sig_dims, labels, marker, num_samples, projection=projection, **kwargs) kwargs['marker'] = marker
update_not_existing_kwargs(kwargs, pl().defaults.inducing_2d) # @UndefinedVariable
scatters = _plot_latent_scatter(canvas, Z, sig_dims, labels, num_samples=num_samples, projection=projection, **kwargs)
return pl().add_to_canvas(canvas, dict(scatter=scatters), legend=legend) return pl().add_to_canvas(canvas, dict(scatter=scatters), legend=legend)
@ -147,6 +149,7 @@ def _plot_magnification(self, canvas, which_indices, Xgrid,
def plot_function(x): def plot_function(x):
Xtest_full = np.zeros((x.shape[0], Xgrid.shape[1])) Xtest_full = np.zeros((x.shape[0], Xgrid.shape[1]))
Xtest_full[:, which_indices] = x Xtest_full[:, which_indices] = x
mf = self.predict_magnification(Xtest_full, kern=kern, mean=mean, covariance=covariance) mf = self.predict_magnification(Xtest_full, kern=kern, mean=mean, covariance=covariance)
return mf.reshape(resolution, resolution).T return mf.reshape(resolution, resolution).T
imshow_kwargs = update_not_existing_kwargs(imshow_kwargs, pl().defaults.magnification) imshow_kwargs = update_not_existing_kwargs(imshow_kwargs, pl().defaults.magnification)
@ -215,7 +218,12 @@ def _plot_latent(self, canvas, which_indices, Xgrid,
def plot_function(x): def plot_function(x):
Xtest_full = np.zeros((x.shape[0], Xgrid.shape[1])) Xtest_full = np.zeros((x.shape[0], Xgrid.shape[1]))
Xtest_full[:, which_indices] = x Xtest_full[:, which_indices] = x
mf = np.log(self.predict(Xtest_full, kern=kern)[1]) mf = self.predict(Xtest_full, kern=kern)[1]
if mf.shape[1]==self.output_dim:
mf = mf.sum(-1)
else:
mf *= self.output_dim
mf = np.log(mf)
return mf.reshape(resolution, resolution).T return mf.reshape(resolution, resolution).T
imshow_kwargs = update_not_existing_kwargs(imshow_kwargs, pl().defaults.latent) imshow_kwargs = update_not_existing_kwargs(imshow_kwargs, pl().defaults.latent)

View file

@ -194,6 +194,7 @@ def scatter_label_generator(labels, X, visible_dims, marker=None):
x = X[index, input_1] x = X[index, input_1]
y = X[index, input_2] y = X[index, input_2]
z = X[index, input_3] z = X[index, input_3]
yield x, y, z, this_label, index, m yield x, y, z, this_label, index, m
def subsample_X(X, labels, num_samples=1000): def subsample_X(X, labels, num_samples=1000):
@ -385,5 +386,5 @@ def x_frame2D(X,plot_limits=None,resolution=None):
resolution = resolution or 50 resolution = resolution or 50
xx, yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution] xx, yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xnew = np.vstack((xx.flatten(),yy.flatten())).T Xnew = np.c_[xx.flat, yy.flat]
return Xnew, xx, yy, xmin, xmax return Xnew, xx, yy, xmin, xmax

View file

@ -18,4 +18,4 @@
from .util import align_subplot_array, align_subplots, fewerXticks, removeRightTicks, removeUpperTicks from .util import align_subplot_array, align_subplots, fewerXticks, removeRightTicks, removeUpperTicks
from . import controllers, base_plots from . import controllers, base_plots

View file

@ -1 +1 @@
from .imshow_controller import ImshowController, ImAnnotateController from .imshow_controller import ImshowController, ImAnnotateController

View file

@ -72,4 +72,4 @@ class ImAnnotateController(ImshowController):
text.set_x(x+xoffset) text.set_x(x+xoffset)
text.set_y(y+yoffset) text.set_y(y+yoffset)
text.set_text("{}".format(X[1][j, i])) text.set_text("{}".format(X[1][j, i]))
return view return view

View file

@ -1,21 +1,21 @@
#=============================================================================== #===============================================================================
# Copyright (c) 2015, Max Zwiessele # Copyright (c) 2015, Max Zwiessele
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met: # modification, are permitted provided that the following conditions are met:
# #
# * Redistributions of source code must retain the above copyright notice, this # * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer. # list of conditions and the following disclaimer.
# #
# * Redistributions in binary form must reproduce the above copyright notice, # * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation # this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution. # and/or other materials provided with the distribution.
# #
# * Neither the name of GPy nor the names of its # * Neither the name of GPy nor the names of its
# contributors may be used to endorse or promote products derived from # contributors may be used to endorse or promote products derived from
# this software without specific prior written permission. # this software without specific prior written permission.
# #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@ -34,20 +34,20 @@ from .. import Tango
''' '''
This file is for defaults for the gpy plot, specific to the plotting library. This file is for defaults for the gpy plot, specific to the plotting library.
Create a kwargs dictionary with the right name for the plotting function Create a kwargs dictionary with the right name for the plotting function
you are implementing. If you do not provide defaults, the default behaviour of you are implementing. If you do not provide defaults, the default behaviour of
the plotting library will be used. the plotting library will be used.
In the code, always ise plotting.gpy_plots.defaults to get the defaults, as In the code, always ise plotting.gpy_plots.defaults to get the defaults, as
it gives back an empty default, when defaults are not defined. it gives back an empty default, when defaults are not defined.
''' '''
# Data plots: # Data plots:
data_1d = dict(lw=1.5, marker='x', edgecolor='k') data_1d = dict(lw=1.5, marker='x', color='k')
data_2d = dict(s=35, edgecolors='none', linewidth=0., cmap=cm.get_cmap('hot'), alpha=.5) data_2d = dict(s=35, edgecolors='none', linewidth=0., cmap=cm.get_cmap('hot'), alpha=.5)
inducing_1d = dict(lw=0, s=500, facecolors=Tango.colorsHex['darkRed']) inducing_1d = dict(lw=0, s=500, facecolors=Tango.colorsHex['darkRed'])
inducing_2d = dict(s=14, edgecolors='k', linewidth=.4, facecolors='white', alpha=.5, marker='^') inducing_2d = dict(s=17, edgecolor='k', linewidth=.4, color='white', alpha=.5, marker='^')
inducing_3d = dict(lw=.3, s=500, facecolors='white', edgecolors='k') inducing_3d = dict(lw=.3, s=500, color=Tango.colorsHex['darkRed'], edgecolor='k')
xerrorbar = dict(color='k', fmt='none', elinewidth=.5, alpha=.5) xerrorbar = dict(color='k', fmt='none', elinewidth=.5, alpha=.5)
yerrorbar = dict(color=Tango.colorsHex['darkRed'], fmt='none', elinewidth=.5, alpha=.5) yerrorbar = dict(color=Tango.colorsHex['darkRed'], fmt='none', elinewidth=.5, alpha=.5)
@ -71,5 +71,5 @@ ard = dict(edgecolor='k', linewidth=1.2)
latent = dict(aspect='auto', cmap='Greys', interpolation='bicubic') latent = dict(aspect='auto', cmap='Greys', interpolation='bicubic')
gradient = dict(aspect='auto', cmap='RdBu', interpolation='nearest', alpha=.7) gradient = dict(aspect='auto', cmap='RdBu', interpolation='nearest', alpha=.7)
magnification = dict(aspect='auto', cmap='Greys', interpolation='bicubic') magnification = dict(aspect='auto', cmap='Greys', interpolation='bicubic')
latent_scatter = dict(s=40, linewidth=.2, edgecolor='k', alpha=.9) latent_scatter = dict(s=20, linewidth=.2, edgecolor='k', alpha=.9)
annotation = dict(fontdict=dict(family='sans-serif', weight='light', fontsize=9), zorder=.3, alpha=.7) annotation = dict(fontdict=dict(family='sans-serif', weight='light', fontsize=9), zorder=.3, alpha=.7)

View file

@ -1,5 +1,5 @@
#=============================================================================== #===============================================================================
# Copyright (c) 2015, Max Zwiessele # Copyright (c) 2016, Max Zwiessele, Alan saul
# All rights reserved. # All rights reserved.
# #
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
@ -116,4 +116,43 @@ def align_subplot_array(axes,xlim=None, ylim=None):
if i<(M*(N-1)): if i<(M*(N-1)):
ax.set_xticks([]) ax.set_xticks([])
else: else:
removeUpperTicks(ax) removeUpperTicks(ax)
def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True, X_all=False):
"""
Convenience function for returning back fixed_inputs where the other inputs
are fixed using fix_routine
:param model: model
:type model: Model
:param non_fixed_inputs: dimensions of non fixed inputs
:type non_fixed_inputs: list
:param fix_routine: fixing routine to use, 'mean', 'median', 'zero'
:type fix_routine: string
:param as_list: if true, will return a list of tuples with (dimension, fixed_val) otherwise it will create the corresponding X matrix
:type as_list: boolean
"""
from ...inference.latent_function_inference.posterior import VariationalPosterior
f_inputs = []
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean.values.copy()
elif isinstance(model.X, VariationalPosterior):
X = model.X.values.copy()
else:
if X_all:
X = model.X_all.copy()
else:
X = model.X.copy()
for i in range(X.shape[1]):
if i not in non_fixed_inputs:
if fix_routine == 'mean':
f_inputs.append( (i, np.mean(X[:,i])) )
if fix_routine == 'median':
f_inputs.append( (i, np.median(X[:,i])) )
else: # set to zero zero
f_inputs.append( (i, 0) )
if not as_list:
X[:,i] = f_inputs[-1][1]
if as_list:
return f_inputs
else:
return X

View file

@ -15,7 +15,9 @@ def plot(parameterized, fignum=None, ax=None, colors=None, figsize=(12, 6)):
if ax is None: if ax is None:
fig = pb.figure(num=fignum, figsize=figsize) fig = pb.figure(num=fignum, figsize=figsize)
if colors is None: if colors is None:
colors = pb.gca()._get_lines.color_cycle from ..Tango import mediumList
from itertools import cycle
colors = cycle(mediumList)
pb.clf() pb.clf()
else: else:
colors = iter(colors) colors = iter(colors)
@ -64,7 +66,9 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_sid
else: else:
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1])))) fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1]))))
if colors is None: if colors is None:
colors = pb.gca()._get_lines.color_cycle from ..Tango import mediumList
from itertools import cycle
colors = cycle(mediumList)
pb.clf() pb.clf()
else: else:
colors = iter(colors) colors = iter(colors)

View file

@ -73,4 +73,4 @@ latent = dict(colorscale='Greys', reversescale=True, zsmooth='best')
gradient = dict(colorscale='RdBu', opacity=.7) gradient = dict(colorscale='RdBu', opacity=.7)
magnification = dict(colorscale='Greys', zsmooth='best', reversescale=True) magnification = dict(colorscale='Greys', zsmooth='best', reversescale=True)
latent_scatter = dict(marker_kwargs=dict(size='5', opacity=.7)) latent_scatter = dict(marker_kwargs=dict(size='5', opacity=.7))
# annotation = dict(fontdict=dict(family='sans-serif', weight='light', fontsize=9), zorder=.3, alpha=.7) # annotation = dict(fontdict=dict(family='sans-serif', weight='light', fontsize=9), zorder=.3, alpha=.7)

View file

@ -131,14 +131,15 @@ class PlotlyPlots(AbstractPlottingLibrary):
#not matplotlib marker #not matplotlib marker
pass pass
marker_kwargs = marker_kwargs or {} marker_kwargs = marker_kwargs or {}
marker_kwargs.setdefault('symbol', marker) if 'symbol' not in marker_kwargs:
marker_kwargs['symbol'] = marker
if Z is not None: if Z is not None:
return Scatter3d(x=X, y=Y, z=Z, mode='markers', return Scatter3d(x=X, y=Y, z=Z, mode='markers',
showlegend=label is not None, showlegend=label is not None,
marker=Marker(color=color, colorscale=cmap, **marker_kwargs), marker=Marker(color=color, colorscale=cmap, **marker_kwargs),
name=label, **kwargs) name=label, **kwargs)
return Scatter(x=X, y=Y, mode='markers', showlegend=label is not None, return Scatter(x=X, y=Y, mode='markers', showlegend=label is not None,
marker=Marker(color=color, colorscale=cmap, **marker_kwargs or {}), marker=Marker(color=color, colorscale=cmap, **marker_kwargs),
name=label, **kwargs) name=label, **kwargs)
def plot(self, ax, X, Y, Z=None, color=None, label=None, line_kwargs=None, **kwargs): def plot(self, ax, X, Y, Z=None, color=None, label=None, line_kwargs=None, **kwargs):
@ -254,7 +255,7 @@ class PlotlyPlots(AbstractPlottingLibrary):
font=dict(color='white' if np.abs(var) > 0.8 else 'black', size=10), font=dict(color='white' if np.abs(var) > 0.8 else 'black', size=10),
opacity=.5, opacity=.5,
showarrow=False, showarrow=False,
hoverinfo='x')) ))
return imshow, annotations return imshow, annotations
def annotation_heatmap_interact(self, ax, plot_function, extent, label=None, resolution=15, imshow_kwargs=None, **annotation_kwargs): def annotation_heatmap_interact(self, ax, plot_function, extent, label=None, resolution=15, imshow_kwargs=None, **annotation_kwargs):

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 42 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 16 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.1 KiB

After

Width:  |  Height:  |  Size: 135 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 196 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9 KiB

After

Width:  |  Height:  |  Size: 121 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

After

Width:  |  Height:  |  Size: 191 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.7 KiB

After

Width:  |  Height:  |  Size: 50 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.6 KiB

After

Width:  |  Height:  |  Size: 8.9 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

After

Width:  |  Height:  |  Size: 5.2 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2 KiB

After

Width:  |  Height:  |  Size: 10 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.8 KiB

After

Width:  |  Height:  |  Size: 13 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.9 KiB

After

Width:  |  Height:  |  Size: 11 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.2 KiB

After

Width:  |  Height:  |  Size: 12 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

After

Width:  |  Height:  |  Size: 118 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.6 KiB

After

Width:  |  Height:  |  Size: 116 KiB

Before After
Before After

Some files were not shown because too many files have changed in this diff Show more