Merge branch 'devel' into binomial_laplace

This commit is contained in:
Max Zwiessele 2016-09-01 14:21:27 +01:00
commit 589c5d9eac
206 changed files with 41346 additions and 4247 deletions

View file

@ -2,27 +2,23 @@
[run]
branch = True
source = GPy
omit = ./GPy/testing/*.py, travis_tests.py, setup.py, ./GPy/__version__.py, ./GPy/plotting/*
omit = ./GPy/examples/*.py, ./GPy/testing/*.py, travis_tests.py, setup.py, ./GPy/__version__.py
[report]
# Regexes for lines to exclude from consideration
exclude_lines =
# Have to re-enable the standard pragma
pragma: no cover
verbose
# Don't complain about missing debug-only code:
if self\.debug
# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError
raise NotImplemented
except NotImplementedError
except NotImplemented
except AssertionError
except ImportError
raise
except
pass
Not implemented
# Don't complain if non-runnable code isn't run:
if 0:

View file

@ -15,7 +15,7 @@ addons:
env:
- PYTHON_VERSION=2.7
- PYTHON_VERSION=3.3
#- PYTHON_VERSION=3.3
- PYTHON_VERSION=3.4
- PYTHON_VERSION=3.5
@ -29,9 +29,11 @@ install:
- echo $PATH
- source install_retry.sh
- pip install codecov
- pip install coveralls
- pip install pypandoc
- pip install git+git://github.com/BRML/climin.git
- pip install autograd
- pip install nose-show-skipped
- python setup.py develop
script:
@ -39,6 +41,7 @@ script:
after_success:
- codecov
- coveralls
before_deploy:
- cd doc
@ -47,9 +50,11 @@ before_deploy:
- make html
- cd ../
- if [[ "$TRAVIS_OS_NAME" == "linux" ]];
then export DIST='sdist';
then
export DIST='sdist';
elif [[ "$TRAVIS_OS_NAME" == "osx" ]];
then export DIST='bdist_wheel';
then
export DIST='bdist_wheel';
fi;
deploy:
@ -58,8 +63,6 @@ deploy:
password:
secure: "vMEOlP7DQhFJ7hQAKtKC5hrJXFl5BkUt4nXdosWWiw//Kg8E+PPLg88XPI2gqIosir9wwgtbSBBbbwCxkM6uxRNMpoNR8Ixyv9fmSXp4rLl7bbBY768W7IRXKIBjpuEy2brQjoT+CwDDSzUkckHvuUjJDNRvUv8ab4P/qYO1LG4="
on:
tags: false
branch: devel
server: https://testpypi.python.org/pypi
branch: deploy
distributions: $DIST
skip_cleanup: true

View file

@ -1 +1 @@
[GPy Authors](https://github.com/SheffieldML/GPy/graphs/contributors)
GPy Authors: https://github.com/SheffieldML/GPy/graphs/contributors

View file

@ -14,6 +14,8 @@ from . import testing
from . import kern
from . import plotting
from .util import normalizer
# backwards compatibility
import sys
backwards_compatibility = ['lists_and_dicts', 'observable_array', 'ties_and_remappings', 'index_operations']
@ -28,16 +30,18 @@ from .core.parameterization import Param, Parameterized, ObsAr, transformations
from .__version__ import __version__
from numpy.testing import Tester
#@nottest
try:
#Get rid of nose dependency by only ignoring if you have nose installed
from nose.tools import nottest
@nottest
def tests(verbose=10):
Tester(testing).test(verbose=verbose)
except:
def tests(verbose=10):
Tester(testing).test(verbose=verbose)
with warnings.catch_warnings():
warnings.simplefilter('ignore')
try:
#Get rid of nose dependency by only ignoring if you have nose installed
from nose.tools import nottest
@nottest
def tests(verbose=10):
Tester(testing).test(verbose=verbose)
except:
def tests(verbose=10):
Tester(testing).test(verbose=verbose)
def load(file_or_path):
"""

View file

@ -1 +1 @@
__version__ = "0.9.7"
__version__ = "1.4.0"

View file

@ -2,17 +2,17 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from .. import kern
from GPy.core.model import Model
from paramz import ObsAr
from .model import Model
from .parameterization.variational import VariationalPosterior
from .mapping import Mapping
from .. import likelihoods
from .. import kern
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from GPy.core.parameterization.variational import VariationalPosterior
from ..util.normalizer import Standardize
from paramz import ObsAr
import logging
import warnings
from GPy.util.normalizer import MeanNorm
logger = logging.getLogger("GP")
class GP(Model):
@ -28,7 +28,7 @@ class GP(Model):
:param Norm normalizer:
normalize the outputs Y.
Prediction will be un-normalized using this normalizer.
If normalizer is None, we will normalize using MeanNorm.
If normalizer is None, we will normalize using Standardize.
If normalizer is False, no normalization will be done.
.. Note:: Multiple independent outputs are allowed using columns of Y
@ -49,7 +49,7 @@ class GP(Model):
logger.info("initializing Y")
if normalizer is True:
self.normalizer = MeanNorm()
self.normalizer = Standardize()
elif normalizer is False:
self.normalizer = None
else:
@ -64,6 +64,7 @@ class GP(Model):
self.Y_normalized = self.Y
else:
self.Y = Y
self.Y_normalized = self.Y
if Y.shape[0] != self.num_data:
#There can be cases where we want inputs than outputs, for example if we have multiple latent
@ -148,14 +149,16 @@ class GP(Model):
# LVM models
if isinstance(self.X, VariationalPosterior):
assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
index = self.X._parent_index_
self.unlink_parameter(self.X)
self.X = X
self.link_parameter(self.X)
self.link_parameter(self.X, index=index)
else:
index = self.X._parent_index_
self.unlink_parameter(self.X)
from ..core import Param
self.X = Param('latent mean',X)
self.link_parameter(self.X)
self.X = Param('latent mean', X)
self.link_parameter(self.X, index=index)
else:
self.X = ObsAr(X)
self.update_model(True)
@ -217,9 +220,13 @@ class GP(Model):
mu += self.mean_function.f(Xnew)
return mu, var
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None):
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True):
"""
Predict the function(s) at the new point(s) Xnew.
Predict the function(s) at the new point(s) Xnew. This includes the likelihood
variance added to the predicted underlying function (usually referred to as f).
In order to predict without adding in the likelihood give
`include_likelihood=False`, or refer to self.predict_noiseless().
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray (Nnew x self.input_dim)
@ -229,6 +236,8 @@ class GP(Model):
:param Y_metadata: metadata about the predicting point to pass to the likelihood
:param kern: The kernel to use for prediction (defaults to the model
kern). this is useful for examining e.g. subprocesses.
:param bool include_likelihood: Whether or not to add likelihood noise to the predicted underlying latent function f.
:returns: (mean, var):
mean: posterior mean, a Numpy array, Nnew x self.input_dim
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
@ -240,14 +249,45 @@ class GP(Model):
"""
#predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
if include_likelihood:
# now push through likelihood
if likelihood is None:
likelihood = self.likelihood
mu, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
if self.normalizer is not None:
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
# now push through likelihood
if likelihood is None:
likelihood = self.likelihood
mean, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
return mean, var
return mu, var
def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
"""
Convenience function to predict the underlying function of the GP (often
referred to as f) without adding the likelihood variance on the
prediction function.
This is most likely what you want to use for your predictions.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray (Nnew x self.input_dim)
:param full_cov: whether to return the full covariance matrix, or just
the diagonal
:type full_cov: bool
:param Y_metadata: metadata about the predicting point to pass to the likelihood
:param kern: The kernel to use for prediction (defaults to the model
kern). this is useful for examining e.g. subprocesses.
:returns: (mean, var):
mean: posterior mean, a Numpy array, Nnew x self.input_dim
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions.
Note: If you want the predictive quantiles (e.g. 95% confidence interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
"""
return self.predict(Xnew, full_cov, Y_metadata, kern, None, False)
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None, likelihood=None):
"""
@ -263,11 +303,14 @@ class GP(Model):
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
"""
m, v = self._raw_predict(X, full_cov=False, kern=kern)
if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
if likelihood is None:
likelihood = self.likelihood
return likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
quantiles = likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
if self.normalizer is not None:
quantiles = [self.normalizer.inverse_mean(q) for q in quantiles]
return quantiles
def predictive_gradients(self, Xnew, kern=None):
"""
@ -300,8 +343,7 @@ class GP(Model):
dv_dX += kern.gradients_X(alpha, Xnew, self._predictive_variable)
return mean_jac, dv_dX
def predict_jacobian(self, Xnew, kern=None, full_cov=True):
def predict_jacobian(self, Xnew, kern=None, full_cov=False):
"""
Compute the derivatives of the posterior of the GP.
@ -319,15 +361,11 @@ class GP(Model):
:param X: The points at which to get the predictive gradients.
:type X: np.ndarray (Xnew x self.input_dim)
:param kern: The kernel to compute the jacobian for.
:param boolean full_cov: whether to return the full covariance of the jacobian.
:param boolean full_cov: whether to return the cross-covariance terms between
the N* Jacobian vectors
:returns: dmu_dX, dv_dX
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ]
Note: We always return sum in input_dim gradients, as the off-diagonals
in the input_dim are not needed for further calculations.
This is a compromise for increase in speed. Mathematically the jacobian would
have another dimension in Q.
"""
if kern is None:
kern = self.kern
@ -346,24 +384,26 @@ class GP(Model):
dK2_dXdX = kern.gradients_XX(one, Xnew)
else:
dK2_dXdX = kern.gradients_XX_diag(one, Xnew)
#dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1]))
#for i in range(Xnew.shape[0]):
# dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:])
def compute_cov_inner(wi):
if full_cov:
# full covariance gradients:
var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
var_jac = dK2_dXdX - np.einsum('qnm,msr->nsqr', dK_dXnew_full.T.dot(wi), dK_dXnew_full) # n,s = Xnew.shape[0], m = pred_var.shape[0]
else:
var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
var_jac = dK2_dXdX - np.einsum('qnm,mnr->nqr', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
return var_jac
if self.posterior.woodbury_inv.ndim == 3: # Missing data:
if full_cov:
var_jac = np.empty((Xnew.shape[0],Xnew.shape[0],Xnew.shape[1],self.output_dim))
var_jac = np.empty((Xnew.shape[0],Xnew.shape[0],Xnew.shape[1],Xnew.shape[1],self.output_dim))
for d in range(self.posterior.woodbury_inv.shape[2]):
var_jac[:, :, :, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
else:
var_jac = np.empty((Xnew.shape[0],Xnew.shape[1],Xnew.shape[1],self.output_dim))
for d in range(self.posterior.woodbury_inv.shape[2]):
var_jac[:, :, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
else:
var_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
for d in range(self.posterior.woodbury_inv.shape[2]):
var_jac[:, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
else:
var_jac = compute_cov_inner(self.posterior.woodbury_inv)
return mean_jac, var_jac
@ -387,10 +427,11 @@ class GP(Model):
mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False)
mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
Sigma = np.zeros(mumuT.shape)
if var_jac.ndim == 3:
Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = var_jac.sum(-1)
if var_jac.ndim == 4: # Missing data
Sigma = var_jac.sum(-1)
else:
Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = self.output_dim*var_jac
Sigma = self.output_dim*var_jac
G = 0.
if mean:
G += mumuT
@ -402,15 +443,22 @@ class GP(Model):
warnings.warn("Wrong naming, use predict_wishart_embedding instead. Will be removed in future versions!", DeprecationWarning)
return self.predict_wishart_embedding(Xnew, kern, mean, covariance)
def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True):
def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True, dimensions=None):
"""
Predict the magnification factor as
sqrt(det(G))
for each point N in Xnew
for each point N in Xnew.
:param bool mean: whether to include the mean of the wishart embedding.
:param bool covariance: whether to include the covariance of the wishart embedding.
:param array-like dimensions: which dimensions of the input space to use [defaults to self.get_most_significant_input_dimensions()[:2]]
"""
G = self.predict_wishard_embedding(Xnew, kern, mean, covariance)
G = self.predict_wishart_embedding(Xnew, kern, mean, covariance)
if dimensions is None:
dimensions = self.get_most_significant_input_dimensions()[:2]
G = G[:, dimensions][:,:,dimensions]
from ..util.linalg import jitchol
mag = np.empty(Xnew.shape[0])
for n in range(Xnew.shape[0]):
@ -490,21 +538,23 @@ class GP(Model):
def get_most_significant_input_dimensions(self, which_indices=None):
return self.kern.get_most_significant_input_dimensions(which_indices)
def optimize(self, optimizer=None, start=None, **kwargs):
def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs):
"""
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations
:type max_f_eval: int
:param max_iters: maximum number of function evaluations
:type max_iters: int
:messages: whether to display during optimisation
:type messages: bool
:param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'.
:type optimizer: string
:param bool ipython_notebook: whether to use ipython notebook widgets or not.
:param bool clear_after_finish: if in ipython notebook, we can clear the widgets after optimization.
"""
self.inference_method.on_optimization_start()
try:
super(GP, self).optimize(optimizer, start, **kwargs)
super(GP, self).optimize(optimizer, start, messages, max_iters, ipython_notebook, clear_after_finish, **kwargs)
except KeyboardInterrupt:
print("KeyboardInterrupt caught, calling on_optimization_end() to round things up")
self.inference_method.on_optimization_end()

View file

@ -16,7 +16,7 @@ class Priorizable(Parameterizable):
def __setstate__(self, state):
super(Priorizable, self).__setstate__(state)
self._index_operations['priors'] = self.priors
#self._index_operations['priors'] = self.priors
#===========================================================================

View file

@ -773,7 +773,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
def compute_cls(self, x):
cls = {}
# Appending each data point to its proper class
for j in xrange(self.datanum):
for j in range(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in cls:
cls[class_label] = []
@ -792,7 +792,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Adding data points as tuple to the dictionary so that we can access indices
def compute_indices(self, x):
data_idx = {}
for j in xrange(self.datanum):
for j in range(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in data_idx:
data_idx[class_label] = []
@ -811,7 +811,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
else:
lst_idx = []
# Here we put indices of each class in to the list called lst_idx_all
for m in xrange(len(data_idx[i])):
for m in range(len(data_idx[i])):
lst_idx.append(data_idx[i][m][0])
lst_idx_all.append(lst_idx)
return lst_idx_all
@ -847,7 +847,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
# pdb.set_trace()
# Calculating Bi
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
for k in xrange(self.datanum):
for k in range(self.datanum):
for i in data_idx:
N_i = float(len(data_idx[i]))
if k in lst_idx_all[i]:
@ -1309,3 +1309,52 @@ class Exponential(Prior):
def rvs(self, n):
return np.random.exponential(scale=self.l, size=n)
class StudentT(Prior):
"""
Implementation of the student t probability function, coupled with random variables.
:param mu: mean
:param sigma: standard deviation
:param nu: degrees of freedom
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = _REAL
_instances = []
def __new__(cls, mu=0, sigma=1, nu=4): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().mu == mu and instance().sigma == sigma and instance().nu == nu:
return instance()
newfunc = super(Prior, cls).__new__
if newfunc is object.__new__:
o = newfunc(cls)
else:
o = newfunc(cls, mu, sigma, nu)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu, sigma, nu):
self.mu = float(mu)
self.sigma = float(sigma)
self.sigma2 = np.square(self.sigma)
self.nu = float(nu)
def __str__(self):
return "St({:.2g}, {:.2g}, {:.2g})".format(self.mu, self.sigma, self.nu)
def lnpdf(self, x):
from scipy.stats import t
return t.logpdf(x,self.nu,self.mu,self.sigma)
def lnpdf_grad(self, x):
return -(self.nu + 1.)*(x - self.mu)/( self.nu*self.sigma2 + np.square(x - self.mu) )
def rvs(self, n):
from scipy.stats import t
ret = t.rvs(self.nu, loc=self.mu, scale=self.sigma, size=n)
return ret

View file

@ -111,8 +111,8 @@ class Symbolic_core():
# rows = func['function'].shape[0]
# cols = func['function'].shape[1]
# self.expressions[key]['derivative'] = sym.zeros(rows, cols)
# for i in xrange(rows):
# for j in xrange(cols):
# for i in range(rows):
# for j in range(cols):
# self.expressions[key]['derivative'][i, j] = extract_derivative(func['function'][i, j], derivative_arguments)
# else:
self.expressions[key]['derivative'] = extract_derivative(func['function'], derivative_arguments)
@ -123,7 +123,7 @@ class Symbolic_core():
val = 1.0
# TODO: improve approach for initializing parameters.
if parameters is not None:
if parameters.has_key(theta.name):
if theta.name in parameters:
val = parameters[theta.name]
# Add parameter.
@ -176,7 +176,7 @@ class Symbolic_core():
return gradient
def eval_gradients_X(self, function, partial, **kwargs):
if kwargs.has_key('X'):
if 'X' in kwargs:
gradients_X = np.zeros_like(kwargs['X'])
self.eval_update_cache(**kwargs)
for i, theta in enumerate(self.variables['X']):
@ -405,7 +405,7 @@ class Symbolic_core():
if var_name == var.name:
expr = expr.subs(var, sub)
break
for m, r in function_substitutes.iteritems():
for m, r in function_substitutes.items():
expr = expr.replace(m, r)#normcdfln, lambda arg : sym.log(normcdf(arg)))
return expr.simplify()
@ -417,4 +417,4 @@ class Symbolic_core():
else:
return x[0]
return sorted(var_dict.iteritems(), key=sort_key, reverse=reverse)
return sorted(var_dict.items(), key=sort_key, reverse=reverse)

View file

@ -184,7 +184,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0, :]))
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1, :], # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
raw_input('Press enter to finish')
input('Press enter to finish')
plt.close(fig)
return m
@ -210,7 +210,7 @@ def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40
data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0, :]))
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1, :], # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
raw_input('Press enter to finish')
input('Press enter to finish')
plt.close(fig)
return m
@ -242,7 +242,7 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
fig.clf()
ax = fig.add_subplot(2, 1, 1)
labls = slist_names
for S, lab in itertools.izip(slist, labls):
for S, lab in zip(slist, labls):
ax.plot(S, label=lab)
ax.legend()
for i, Y in enumerate(Ylist):
@ -288,7 +288,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
fig.clf()
ax = fig.add_subplot(2, 1, 1)
labls = slist_names
for S, lab in itertools.izip(slist, labls):
for S, lab in zip(slist, labls):
ax.plot(S, label=lab)
ax.legend()
for i, Y in enumerate(Ylist):
@ -340,7 +340,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
gtol=.05)
if plot:
m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
m.kern.plot_ARD()
return m
def gplvm_simulation(optimize=True, verbose=1,
@ -364,7 +364,7 @@ def gplvm_simulation(optimize=True, verbose=1,
gtol=.05)
if plot:
m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
m.kern.plot_ARD()
return m
def ssgplvm_simulation(optimize=True, verbose=1,
plot=True, plot_sim=False,
@ -388,7 +388,7 @@ def ssgplvm_simulation(optimize=True, verbose=1,
gtol=.05)
if plot:
m.X.plot("SSGPLVM Latent Space 1D")
m.kern.plot_ARD('SSGPLVM Simulation ARD Parameters')
m.kern.plot_ARD()
return m
def bgplvm_simulation_missing_data(optimize=True, verbose=1,
@ -418,7 +418,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
gtol=.05)
if plot:
m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
m.kern.plot_ARD()
return m
def bgplvm_simulation_missing_data_stochastics(optimize=True, verbose=1,
@ -448,7 +448,7 @@ def bgplvm_simulation_missing_data_stochastics(optimize=True, verbose=1,
gtol=.05)
if plot:
m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
m.kern.plot_ARD()
return m
@ -469,7 +469,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
m.optimize(messages=verbose, max_iters=8e3)
if plot:
m.X.plot("MRD Latent Space 1D")
m.plot_scales("MRD Scales")
m.plot_scales()
return m
def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
@ -496,7 +496,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
if plot:
m.X.plot("MRD Latent Space 1D")
m.plot_scales("MRD Scales")
m.plot_scales()
return m
def brendan_faces(optimize=True, verbose=True, plot=True):
@ -520,7 +520,7 @@ def brendan_faces(optimize=True, verbose=True, plot=True):
y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False)
lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
input('Press enter to finish')
return m
@ -542,7 +542,7 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):
y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
input('Press enter to finish')
return m
@ -577,7 +577,7 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[:1, :].copy(), m, data_show, latent_axes=ax)
raw_input('Press enter to finish')
input('Press enter to finish')
lvm_visualizer.close()
data_show.close()
return m
@ -598,7 +598,7 @@ def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
y = m.likelihood.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
input('Press enter to finish')
return m
@ -619,7 +619,7 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
y = m.likelihood.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
# raw_input('Press enter to finish')
# input('Press enter to finish')
return m
@ -669,7 +669,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
fig.canvas.draw()
# Canvas.show doesn't work on OSX.
#fig.canvas.show()
raw_input('Press enter to finish')
input('Press enter to finish')
return m
@ -693,7 +693,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose
y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel'])
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0].copy(), m, data_show, latent_axes=ax)
raw_input('Press enter to finish')
input('Press enter to finish')
lvm_visualizer.close()
data_show.close()

View file

@ -550,3 +550,34 @@ def parametric_mean_function(max_iters=100, optimize=True, plot=True):
return m
def warped_gp_cubic_sine(max_iters=100):
"""
A test replicating the cubic sine regression problem from
Snelson's paper.
"""
X = (2 * np.pi) * np.random.random(151) - np.pi
Y = np.sin(X) + np.random.normal(0,0.2,151)
Y = np.array([np.power(abs(y),float(1)/3) * (1,-1)[y<0] for y in Y])
X = X[:, None]
Y = Y[:, None]
warp_k = GPy.kern.RBF(1)
warp_f = GPy.util.warping_functions.TanhFunction(n_terms=2)
warp_m = GPy.models.WarpedGP(X, Y, kernel=warp_k, warping_function=warp_f)
warp_m['.*\.d'].constrain_fixed(1.0)
m = GPy.models.GPRegression(X, Y)
m.optimize_restarts(parallel=False, robust=True, num_restarts=5, max_iters=max_iters)
warp_m.optimize_restarts(parallel=False, robust=True, num_restarts=5, max_iters=max_iters)
#m.optimize(max_iters=max_iters)
#warp_m.optimize(max_iters=max_iters)
print(warp_m)
print(warp_m['.*warp.*'])
warp_m.predict_in_warped_space = False
warp_m.plot(title="Warped GP - Latent space")
warp_m.predict_in_warped_space = True
warp_m.plot(title="Warped GP - Warped space")
m.plot(title="Standard GP")
warp_m.plot_warping()
pb.show()

View file

@ -0,0 +1,26 @@
import GPy
import numpy as np
import matplotlib.pyplot as plt
import GPy.models.state_space_model as SS_model
X = np.linspace(0, 10, 2000)[:, None]
Y = np.sin(X) + np.random.randn(*X.shape)*0.1
kernel1 = GPy.kern.Matern32(X.shape[1])
m1 = GPy.models.GPRegression(X,Y, kernel1)
print(m1)
m1.optimize(optimizer='bfgs',messages=True)
print(m1)
kernel2 = GPy.kern.sde_Matern32(X.shape[1])
#m2 = SS_model.StateSpace(X,Y, kernel2)
m2 = GPy.models.StateSpace(X,Y, kernel2)
print(m2)
m2.optimize(optimizer='bfgs',messages=True)
print(m2)

View file

@ -21,7 +21,7 @@ class ExactGaussianInference(LatentFunctionInference):
def __init__(self):
pass#self._YYTfactor_cache = caching.cache()
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, precision=None, Z_tilde=None):
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, variance=None, Z_tilde=None):
"""
Returns a Posterior class containing essential quantities of the posterior
"""
@ -31,8 +31,8 @@ class ExactGaussianInference(LatentFunctionInference):
else:
m = mean_function.f(X)
if precision is None:
precision = likelihood.gaussian_variance(Y_metadata)
if variance is None:
variance = likelihood.gaussian_variance(Y_metadata)
YYT_factor = Y-m
@ -40,7 +40,7 @@ class ExactGaussianInference(LatentFunctionInference):
K = kern.K(X)
Ky = K.copy()
diag.add(Ky, precision+1e-8)
diag.add(Ky, variance+1e-8)
Wi, LW, LWi, W_logdet = pdinv(Ky)

View file

@ -40,6 +40,14 @@ class EPBase(object):
# TODO: update approximation in the end as well? Maybe even with a switch?
pass
def __setstate__(self, state):
super(EPBase, self).__setstate__(state[0])
self.epsilon, self.eta, self.delta = state[1]
self.reset()
def __getstate__(self):
return [super(EPBase, self).__getstate__() , [self.epsilon, self.eta, self.delta]]
class EP(EPBase, ExactGaussianInference):
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None):
if self.always_reset:
@ -51,14 +59,14 @@ class EP(EPBase, ExactGaussianInference):
if K is None:
K = kern.K(X)
if self._ep_approximation is None:
if getattr(self, '_ep_approximation', None) is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation
return super(EP, self).inference(kern, X, likelihood, mu_tilde[:,None], mean_function=mean_function, Y_metadata=Y_metadata, precision=1./tau_tilde, K=K, Z_tilde=np.log(Z_tilde).sum())
return super(EP, self).inference(kern, X, likelihood, mu_tilde[:,None], mean_function=mean_function, Y_metadata=Y_metadata, variance=1./tau_tilde, K=K, Z_tilde=np.log(Z_tilde).sum())
def expectation_propagation(self, K, Y, likelihood, Y_metadata):
@ -159,7 +167,7 @@ class EPDTC(EPBase, VarDTC):
else:
Kmn = psi1.T
if self._ep_approximation is None:
if getattr(self, '_ep_approximation', None) is None:
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
else:
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation

View file

@ -22,7 +22,7 @@ class VarDTC(LatentFunctionInference):
"""
const_jitter = 1e-8
def __init__(self, limit=3):
def __init__(self, limit=1):
from paramz.caching import Cacher
self.limit = limit
self.get_trYYT = Cacher(self._get_trYYT, limit)

View file

@ -10,7 +10,7 @@ from .src.add import Add
from .src.prod import Prod
from .src.rbf import RBF
from .src.linear import Linear, LinearFull
from .src.static import Bias, White, Fixed, WhiteHeteroscedastic
from .src.static import Bias, White, Fixed, WhiteHeteroscedastic, Precomputed
from .src.brownian import Brownian
from .src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from .src.mlp import MLP
@ -24,8 +24,20 @@ from .src.ODE_st import ODE_st
from .src.ODE_t import ODE_t
from .src.poly import Poly
from .src.eq_ode2 import EQ_ODE2
from .src.integral import Integral
from .src.integral_limits import Integral_Limits
from .src.multidimensional_integral_limits import Multidimensional_Integral_Limits
from .src.eq_ode1 import EQ_ODE1
from .src.trunclinear import TruncLinear,TruncLinear_inf
from .src.splitKern import SplitKern,DEtime
from .src.splitKern import DEtime as DiffGenomeKern
from .src.spline import Spline
from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
from .src.sde_matern import sde_Matern32
from .src.sde_matern import sde_Matern52
from .src.sde_linear import sde_Linear
from .src.sde_standard_periodic import sde_StdPeriodic
from .src.sde_static import sde_White, sde_Bias
from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad
from .src.sde_brownian import sde_Brownian

View file

@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Brownian motion covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .brownian import Brownian
import numpy as np
class sde_Brownian(Brownian):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sigma^2 min(x,y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((1.0,),(0,)) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,0),) )
Pinf = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
#P0 = Pinf.copy()
P0 = np.zeros((2,2))
#Pinf = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.ones( (1,1,1) )
dPinf = np.zeros((2,2,1))
dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
#dP0 = dPinf.copy()
dP0 = np.zeros((2,2,1))
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Linear covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .linear import Linear
import numpy as np
class sde_Linear(Linear):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
"""
def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
"""
Modify the init method, because one extra parameter is required. X - points
on the X axis.
"""
super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
self.t0 = np.min(X)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variances.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
t0 = float(self.t0)
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((0,),(1.0,)) )
Qc = np.zeros((1,1))
H = np.array( ((1.0,0),) )
Pinf = np.zeros((2,2))
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.zeros( (1,1,1) )
dPinf = np.zeros((2,2,1))
dP0 = np.zeros((2,2,1))
dP0[:,:,0] = P0 / variance
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

135
GPy/kern/_src/sde_matern.py Normal file
View file

@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .stationary import Matern32
from .stationary import Matern52
import numpy as np
class sde_Matern32(Matern32):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 3/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array(((0, 1.0), (-foo**2, -2*foo)))
L = np.array(( (0,), (1.0,) ))
Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),))
H = np.array(((1.0, 0),))
Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2))))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros((2,2))
dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2)))
dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3))
dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance))
dPinfvariance = np.array(((1,0),(0,3./lengthscale**2)))
dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Matern52(Matern52):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
lamda = np.sqrt(5.0)/lengthscale
kappa = 5.0/3.0*variance/lengthscale**2
F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
L = np.array(((0,),(0,),(1,)))
Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
H = np.array(((1,0,0),))
Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty((3,3,2))
dQc = np.empty((1,1,2))
dPinf = np.empty((3,3,2))
# The partial derivatives
dFvariance = np.zeros((3,3))
dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
dPinf_variance = Pinf/variance
kappa2 = -2.0*kappa/lengthscale
dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
0,-100*variance/lengthscale**5)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,178 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .standard_periodic import StdPeriodic
import numpy as np
import scipy as sp
from scipy import special as special
class sde_StdPeriodic(StdPeriodic):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Standard Periodic kernel:
.. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.wavelengths.gradient = gradients[1]
self.lengthscales.gradient = gradients[2]
def sde(self):
"""
Return the state space representation of the covariance.
! Note: one must constrain lengthscale not to drop below 0.25.
After this bessel functions of the first kind grows to very high.
! Note: one must keep wevelength also not very low. Because then
the gradients wrt wavelength become ustable.
However this might depend on the data. For test example with
300 data points the low limit is 0.15.
"""
# Params to use: (in that order)
#self.variance
#self.wavelengths
#self.lengthscales
N = 7 # approximation order
w0 = 2*np.pi/self.wavelengths # frequency
lengthscales = 2*self.lengthscales
[q2,dq2l] = seriescoeff(N,lengthscales,self.variance)
# lengthscale is multiplied by 2 because of slightly different
# formula for periodic covariance function.
# For the same reason:
dq2l = 2*dq2l
if np.any( np.isfinite(q2) == False):
raise ValueError("SDE periodic covariance error 1")
if np.any( np.isfinite(dq2l) == False):
raise ValueError("SDE periodic covariance error 2")
F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
L = np.eye(2*(N+1))
Qc = np.zeros((2*(N+1), 2*(N+1)))
P_inf = np.kron(np.diag(q2),np.eye(2))
H = np.kron(np.ones((1,N+1)),np.array((1,0)) )
P0 = P_inf.copy()
# Derivatives
dF = np.empty((F.shape[0], F.shape[1], 3))
dQc = np.empty((Qc.shape[0], Qc.shape[1], 3))
dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3))
# Derivatives wrt self.variance
dF[:,:,0] = np.zeros(F.shape)
dQc[:,:,0] = np.zeros(Qc.shape)
dP_inf[:,:,0] = P_inf / self.variance
# Derivatives self.wavelengths
dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.wavelengths );
dQc[:,:,1] = np.zeros(Qc.shape)
dP_inf[:,:,1] = np.zeros(P_inf.shape)
# Derivatives self.lengthscales
dF[:,:,2] = np.zeros(F.shape)
dQc[:,:,2] = np.zeros(Qc.shape)
dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
dP0 = dP_inf.copy()
return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
"""
Calculate the coefficients q_j^2 for the covariance function
approximation:
k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
Reference is:
[1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic
covariance functions and state space models. In Proceedings of the
Seventeenth International Conference on Artifcial Intelligence and
Statistics (AISTATS 2014). JMLR: W&CP, volume 33.
Note! Only the infinite approximation (through Bessel function)
is currently implemented.
Input:
----------------
m: int
Degree of approximation. Default 6.
lengthScale: float
Length scale parameter in the kerenl
magnSigma2:float
Multiplier in front of the kernel.
Output:
-----------------
coeffs: array(m+1)
Covariance series coefficients
coeffs_dl: array(m+1)
Derivatives of the coefficients with respect to lengthscale.
"""
if true_covariance:
bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\
sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\
np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64)
M,J = np.meshgrid(range(0,m+1),range(0,m+1))
coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\
(lengthScale**(-2))**J *magnSigma2
coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0)
coeffs = np.sum(coeffs,0)
else:
coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
if np.any( np.isfinite(coeffs) == False):
raise ValueError("sde_standard_periodic: Coefficients are not finite!")
#import pdb; pdb.set_trace()
coeffs[0] = 0.5*coeffs[0]
# Derivatives wrt (lengthScale)
coeffs_dl = np.zeros(m+1)
coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
(-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) )
# The first element
coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\
(2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )
return coeffs, coeffs_dl

101
GPy/kern/_src/sde_static.py Normal file
View file

@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance Static covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .static import White
from .static import Bias
import numpy as np
class sde_White(White):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
White kernel:
.. math::
k(x,y) = \alpha*\delta(x-y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((-np.inf,),) )
L = np.array( ((1.0,),) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,),) )
Pinf = np.array( ((variance,),) )
P0 = Pinf.copy()
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dQc[:,:,0] = np.array( ((1.0,),) )
dPinf = np.zeros((1,1,1))
dPinf[:,:,0] = np.array( ((1.0,),) )
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Bias(Bias):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Bias kernel:
.. math::
k(x,y) = \alpha
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((0.0,),))
L = np.array( ((1.0,),))
Qc = np.zeros((1,1))
H = np.array( ((1.0,),))
Pinf = np.zeros((1,1))
P0 = np.array( ((variance,),) )
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dPinf = np.zeros((1,1,1))
dP0 = np.zeros((1,1,1))
dP0[:,:,0] = np.array( ((1.0,),) )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,190 @@
# -*- coding: utf-8 -*-
"""
Classes in this module enhance several stationary covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .rbf import RBF
from .stationary import Exponential
from .stationary import RatQuad
import numpy as np
import scipy as sp
class sde_RBF(RBF):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Radial Basis Function kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
N = 10# approximation order ( number of terms in exponent series expansion)
roots_rounding_decimals = 6
fn = np.math.factorial(N)
kappa = 1.0/2.0/self.lengthscale**2
Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
pp = sp.poly1d(pp)
roots = sp.roots(pp)
neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
aa = sp.poly1d(neg_real_part_roots, r=True).coeffs
F = np.diag(np.ones((N-1,)),1)
F[-1,:] = -aa[-1:0:-1]
L= np.zeros((N,1))
L[N-1,0] = 1
H = np.zeros((1,N))
H[0,0] = 1
# Infinite covariance:
Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
Pinf = 0.5*(Pinf + Pinf.T)
# Allocating space for derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# Derivatives:
dFvariance = np.zeros(F.shape)
dFlengthscale = np.zeros(F.shape)
dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
dQcvariance = Qc/self.variance
dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
dPinf_variance = Pinf/self.variance
lp = Pinf.shape[0]
coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
coeff[np.mod(coeff,2) != 0] = 0
dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
P0 = Pinf.copy()
dP0 = dPinf.copy()
# Benefits of this are not very sound. Helps only in one case:
# SVD Kalman + RBF kernel
import GPy.models.state_space_main as ssm
(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Exponential(Exponential):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Exponential kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale)
F = np.array(((-1.0/lengthscale,),))
L = np.array(((1.0,),))
Qc = np.array( ((2.0*variance/lengthscale,),) )
H = np.array(((1.0,),))
Pinf = np.array(((variance,),))
P0 = Pinf.copy()
dF = np.zeros((1,1,2));
dQc = np.zeros((1,1,2));
dPinf = np.zeros((1,1,2));
dF[:,:,0] = 0.0
dF[:,:,1] = 1.0/lengthscale**2
dQc[:,:,0] = 2.0/lengthscale
dQc[:,:,1] = -2.0*variance/lengthscale**2
dPinf[:,:,0] = 1.0
dPinf[:,:,1] = 0.0
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_RatQuad(RatQuad):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Rational Quadratic kernel:
.. math::
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde(self):
"""
Return the state space representation of the covariance.
"""
assert False, 'Not Implemented'
# Params to use:
# self.lengthscale
# self.variance
#self.power
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)

View file

@ -13,15 +13,21 @@ class Add(CombinationKernel):
propagates gradients through.
This kernel will take over the active dims of it's subkernels passed in.
NOTE: The subkernels will be copies of the original kernels, to prevent
unexpected behavior.
"""
def __init__(self, subkerns, name='sum'):
for i, kern in enumerate(subkerns[:]):
_newkerns = []
for kern in subkerns:
if isinstance(kern, Add):
del subkerns[i]
for part in kern.parts[::-1]:
for part in kern.parts:
#kern.unlink_parameter(part)
subkerns.insert(i, part.copy())
super(Add, self).__init__(subkerns, name)
_newkerns.append(part.copy())
else:
_newkerns.append(kern.copy())
super(Add, self).__init__(_newkerns, name)
self._exact_psicomp = self._check_exact_psicomp()
def _check_exact_psicomp(self):
@ -87,14 +93,19 @@ class Add(CombinationKernel):
def gradients_XX(self, dL_dK, X, X2):
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1]))
else:
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
#else: # diagonal covariance
# if X2 is None:
# target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
# else:
# target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
[target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_XX_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
target = np.zeros(X.shape+(X.shape[1],))
[target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts]
return target
@ -263,4 +274,94 @@ class Add(CombinationKernel):
i_s[k._all_dims_active] += k.input_sensitivity(summarize)
return i_s
else:
return super(Add, self).input_sensitivity(summarize)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
part_start_param_index = 0
for p in self.parts:
if not p.is_fixed:
part_param_num = len(p.param_array) # number of parameters in the part
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
part_start_param_index += part_param_num
def sde(self):
"""
Support adding kernels for sde representation
"""
import scipy.linalg as la
F = None
L = None
Qc = None
H = None
Pinf = None
P0 = None
dF = None
dQc = None
dPinf = None
dP0 = None
n = 0
nq = 0
nd = 0
# Assign models
for p in self.parts:
(Ft,Lt,Qct,Ht,Pinft,P0t,dFt,dQct,dPinft,dP0t) = p.sde()
F = la.block_diag(F,Ft) if (F is not None) else Ft
L = la.block_diag(L,Lt) if (L is not None) else Lt
Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
H = np.hstack((H,Ht)) if (H is not None) else Ht
Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t
if dF is not None:
dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
'constant', constant_values=0)
dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt
else:
dF = dFt
if dQc is not None:
dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])),
'constant', constant_values=0)
dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct
else:
dQc = dQct
if dPinf is not None:
dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])),
'constant', constant_values=0)
dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
else:
dPinf = dPinft
if dP0 is not None:
dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])),
'constant', constant_values=0)
dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t
else:
dP0 = dP0t
n += Ft.shape[0]
nq += Qct.shape[0]
nd += dFt.shape[2]
assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed"
assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"
assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed"
return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)

View file

@ -15,6 +15,7 @@ class BasisFuncKernel(Kern):
This class does NOT automatically add an offset to the design matrix phi!
"""
super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
assert self.input_dim==1, "Basis Function Kernel only implemented for one dimension. Use one kernel per dimension (and add them together) for more dimensions"
self.ARD = ARD
if self.ARD:
phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim)))
@ -60,6 +61,11 @@ class BasisFuncKernel(Kern):
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta
def concatenate_offset(self, X):
"""
Convenience function to add an offset column to phi.
You can use this function to add an offset (bias on y axis)
to phi in your custom self._phi(X).
"""
return np.c_[np.ones((X.shape[0], 1)), X]
def posterior_inf(self, X=None, posterior=None):
@ -120,6 +126,12 @@ class LinearSlopeBasisFuncKernel(BasisFuncKernel):
return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.
class ChangePointBasisFuncKernel(BasisFuncKernel):
"""
The basis function has a changepoint. That is, it is constant, jumps at a
single point (given as changepoint) and is constant again. You can
give multiple changepoints. The changepoints are calculated using
np.where(self.X < self.changepoint), -1, 1)
"""
def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'):
self.changepoint = np.array(changepoint)
super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
@ -129,6 +141,11 @@ class ChangePointBasisFuncKernel(BasisFuncKernel):
return np.where((X < self.changepoint), -1, 1)
class DomainKernel(LinearSlopeBasisFuncKernel):
"""
Create a constant plateou of correlation between start and stop and zero
elsewhere. This is a constant shift of the outputs along the yaxis
in the range from start to stop.
"""
def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'):
super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name)
@ -138,19 +155,25 @@ class DomainKernel(LinearSlopeBasisFuncKernel):
return phi#((phi-self.start)/(self.stop-self.start))-.5
class LogisticBasisFuncKernel(BasisFuncKernel):
"""
Create a series of logistic basis functions with centers given. The
slope gets computed by datafit. The number of centers determines the
number of logistic functions.
"""
def __init__(self, input_dim, centers, variance=1., slope=1., active_dims=None, ARD=False, ARD_slope=True, name='logistic'):
self.centers = np.atleast_2d(centers)
if ARD:
assert ARD_slope, "If we have one variance per center, we want also one slope per center."
self.ARD_slope = ARD_slope
if self.ARD_slope:
self.slope = Param('slope', slope * np.ones(self.centers.size), Logexp())
self.slope = Param('slope', slope * np.ones(self.centers.size))
else:
self.slope = Param('slope', slope, Logexp())
self.slope = Param('slope', slope)
super(LogisticBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
self.link_parameter(self.slope)
@Cache_this(limit=3, ignore_args=())
def _phi(self, X):
import scipy as sp
phi = 1/(1+np.exp(-((X-self.centers)*self.slope)))
return np.where(np.isnan(phi), 0, phi)#((phi-self.start)/(self.stop-self.start))-.5
@ -167,7 +190,7 @@ class LogisticBasisFuncKernel(BasisFuncKernel):
if self.ARD_slope:
self.slope.gradient = self.variance * 2 * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi1_dl)
else:
self.slope.gradient = self.variance * 2 * (dL_dK * phi1.dot(dphi1_dl.T)).sum()
self.slope.gradient = np.sum(self.variance * 2 * (dL_dK * phi1.dot(dphi1_dl.T)).sum())
else:
phi1 = self.phi(X)
phi2 = self.phi(X2)
@ -179,5 +202,5 @@ class LogisticBasisFuncKernel(BasisFuncKernel):
if self.ARD_slope:
self.slope.gradient = (self.variance * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi2_dl) + np.einsum('ij,iq,jq->q', dL_dK, phi2, dphi1_dl))
else:
self.slope.gradient = self.variance * (dL_dK * phi1.dot(dphi2_dl.T)).sum() + (dL_dK * phi2.dot(dphi1_dl.T)).sum()
self.slope.gradient = np.sum(self.variance * (dL_dK * phi1.dot(dphi2_dl.T)).sum() + (dL_dK * phi2.dot(dphi1_dl.T)).sum())
self.slope.gradient = np.where(np.isnan(self.slope.gradient), 0, self.slope.gradient)

649
GPy/kern/src/eq_ode1.py Normal file
View file

@ -0,0 +1,649 @@
# Copyright (c) 2014, Cristian Guarnizo.
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy.special import erf, erfcx
from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
from paramz.caching import Cache_this
class EQ_ODE1(Kern):
"""
Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
This outputs of this kernel have the form
.. math::
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} u_i(t-\delta_j) - d_jy_j(t)
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`u_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
:param output_dim: number of outputs driven by latent function.
:type output_dim: int
:param W: sensitivities of each output to the latent driving function.
:type W: ndarray (output_dim x rank).
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
:type rank: int
:param decay: decay rates for the first order system.
:type decay: array of length output_dim.
:param delay: delay between latent force and output response.
:type delay: array of length output_dim.
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
:type kappa: array of length output_dim.
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
"""
def __init__(self, input_dim=2, output_dim=1, rank=1, W = None, lengthscale=None, decay=None, active_dims=None, name='eq_ode1'):
assert input_dim == 2, "only defined for 1 input dims"
super(EQ_ODE1, self).__init__(input_dim=input_dim, active_dims=active_dims, name=name)
self.rank = rank
self.output_dim = output_dim
if lengthscale is None:
lengthscale = .5 + np.random.rand(self.rank)
else:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size in [1, self.rank], "Bad number of lengthscales"
if lengthscale.size != self.rank:
lengthscale = np.ones(self.rank)*lengthscale
if W is None:
W = .5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
else:
assert W.shape == (self.output_dim, self.rank)
if decay is None:
decay = np.ones(self.output_dim)
else:
decay = np.asarray(decay)
assert decay.size in [1, self.output_dim], "Bad number of decay"
if decay.size != self.output_dim:
decay = np.ones(self.output_dim)*decay
# if kappa is None:
# self.kappa = np.ones(self.output_dim)
# else:
# kappa = np.asarray(kappa)
# assert kappa.size in [1, self.output_dim], "Bad number of kappa"
# if decay.size != self.output_dim:
# decay = np.ones(self.output_dim)*kappa
#self.kappa = Param('kappa', kappa, Logexp())
#self.delay = Param('delay', delay, Logexp())
#self.is_normalized = True
#self.is_stationary = False
#self.gaussian_initial = False
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
self.decay = Param('decay', decay, Logexp())
self.W = Param('W', W)
self.link_parameters(self.lengthscale, self.decay, self.W)
@Cache_this(limit=3)
def K(self, X, X2=None):
#This way is not working, indexes are lost after using k._slice_X
#index = np.asarray(X, dtype=np.int)
#index = index.reshape(index.size,)
if hasattr(X, 'values'):
X = X.values
index = np.int_(np.round(X[:, 1]))
index = index.reshape(index.size,)
X_flag = index[0] >= self.output_dim
if X2 is None:
if X_flag:
#Calculate covariance function for the latent functions
index -= self.output_dim
return self._Kuu(X, index)
else:
raise NotImplementedError
else:
#This way is not working, indexes are lost after using k._slice_X
#index2 = np.asarray(X2, dtype=np.int)
#index2 = index2.reshape(index2.size,)
if hasattr(X2, 'values'):
X2 = X2.values
index2 = np.int_(np.round(X2[:, 1]))
index2 = index2.reshape(index2.size,)
X2_flag = index2[0] >= self.output_dim
#Calculate cross-covariance function
if not X_flag and X2_flag:
index2 -= self.output_dim
return self._Kfu(X, index, X2, index2) #Kfu
elif X_flag and not X2_flag:
index -= self.output_dim
return self._Kfu(X2, index2, X, index).T #Kuf
elif X_flag and X2_flag:
index -= self.output_dim
index2 -= self.output_dim
return self._Kusu(X, index, X2, index2) #Ku_s u
else:
raise NotImplementedError #Kf_s f
#Calculate the covariance function for diag(Kff(X,X))
def Kdiag(self, X):
if hasattr(X, 'values'):
index = np.int_(np.round(X[:, 1].values))
else:
index = np.int_(np.round(X[:, 1]))
index = index.reshape(index.size,)
X_flag = index[0] >= self.output_dim
if X_flag: #Kuudiag
return np.ones(X[:,0].shape)
else: #Kffdiag
kdiag = self._Kdiag(X)
return np.sum(kdiag, axis=1)
def _Kdiag(self, X):
#This way is not working, indexes are lost after using k._slice_X
#index = np.asarray(X, dtype=np.int)
#index = index.reshape(index.size,)
if hasattr(X, 'values'):
X = X.values
index = np.int_(X[:, 1])
index = index.reshape(index.size,)
#terms that move along t
t = X[:, 0].reshape(X.shape[0], 1)
d = np.unique(index) #Output Indexes
B = self.decay.values[d]
S = self.W.values[d, :]
#Index transformation
indd = np.arange(self.output_dim)
indd[d] = np.arange(d.size)
index = indd[index]
B = B.reshape(B.size, 1)
#Terms that move along q
lq = self.lengthscale.values.reshape(1, self.rank)
S2 = S*S
kdiag = np.empty((t.size, ))
#Dx1 terms
c0 = (S2/B)*((.5*np.sqrt(np.pi))*lq)
#DxQ terms
nu = lq*(B*.5)
nu2 = nu*nu
#Nx1 terms
gamt = -2.*B
gamt = gamt[index]*t
#NxQ terms
t_lq = t/lq
# Upsilon Calculations
# Using wofz
#erfnu = erf(nu)
upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :] ,t_lq+nu[index,:] ))
upm[t[:, 0] == 0, :] = 0.
upv = np.exp(nu2[index, :] + gamt + lnDifErf( -t_lq+nu[index,:], nu[index, :] ) )
upv[t[:, 0] == 0, :] = 0.
#Covariance calculation
#kdiag = np.sum(c0[index, :]*(upm-upv), axis=1)
kdiag = c0[index, :]*(upm-upv)
return kdiag
def update_gradients_full(self, dL_dK, X, X2 = None):
#index = np.asarray(X, dtype=np.int)
#index = index.reshape(index.size,)
if hasattr(X, 'values'):
X = X.values
self.decay.gradient = np.zeros(self.decay.shape)
self.W.gradient = np.zeros(self.W.shape)
self.lengthscale.gradient = np.zeros(self.lengthscale.shape)
index = np.int_(np.round(X[:, 1]))
index = index.reshape(index.size,)
X_flag = index[0] >= self.output_dim
if X2 is None:
if X_flag: #Kuu or Kmm
index -= self.output_dim
tmp = dL_dK*self._gkuu_lq(X, index)
for q in np.unique(index):
ind = np.where(index == q)
self.lengthscale.gradient[q] = tmp[np.ix_(ind[0], ind[0])].sum()
else:
raise NotImplementedError
else: #Kfu or Knm
#index2 = np.asarray(X2, dtype=np.int)
#index2 = index2.reshape(index2.size,)
if hasattr(X2, 'values'):
X2 = X2.values
index2 = np.int_(np.round(X2[:, 1]))
index2 = index2.reshape(index2.size,)
X2_flag = index2[0] >= self.output_dim
if not X_flag and X2_flag: #Kfu
index2 -= self.output_dim
else: #Kuf
dL_dK = dL_dK.T #so we obtaing dL_Kfu
indtemp = index - self.output_dim
Xtemp = X
X = X2
X2 = Xtemp
index = index2
index2 = indtemp
glq, gSdq, gB = self._gkfu(X, index, X2, index2)
tmp = dL_dK*glq
for q in np.unique(index2):
ind = np.where(index2 == q)
self.lengthscale.gradient[q] = tmp[:, ind].sum()
tmpB = dL_dK*gB
tmp = dL_dK*gSdq
for d in np.unique(index):
ind = np.where(index == d)
self.decay.gradient[d] = tmpB[ind, :].sum()
for q in np.unique(index2):
ind2 = np.where(index2 == q)
self.W.gradient[d, q] = tmp[np.ix_(ind[0], ind2[0])].sum()
def update_gradients_diag(self, dL_dKdiag, X):
#index = np.asarray(X, dtype=np.int)
#index = index.reshape(index.size,)
if hasattr(X, 'values'):
X = X.values
self.decay.gradient = np.zeros(self.decay.shape)
self.W.gradient = np.zeros(self.W.shape)
self.lengthscale.gradient = np.zeros(self.lengthscale.shape)
index = np.int_(X[:, 1])
index = index.reshape(index.size,)
glq, gS, gB = self._gkdiag(X, index)
if dL_dKdiag.size == X.shape[0]:
dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1))
tmp = dL_dKdiag*glq
self.lengthscale.gradient = tmp.sum(0)
tmpB = dL_dKdiag*gB
tmp = dL_dKdiag*gS
for d in np.unique(index):
ind = np.where(index == d)
self.decay.gradient[d] = tmpB[ind, :].sum()
self.W.gradient[d, :] = tmp[ind].sum(0)
def gradients_X(self, dL_dK, X, X2=None):
#index = np.asarray(X, dtype=np.int)
#index = index.reshape(index.size,)
if hasattr(X, 'values'):
X = X.values
index = np.int_(np.round(X[:, 1]))
index = index.reshape(index.size,)
X_flag = index[0] >= self.output_dim
#If input_dim == 1, use this
#gX = np.zeros((X.shape[0], 1))
#Cheat to allow gradient for input_dim==2
gX = np.zeros(X.shape)
if X2 is None: #Kuu or Kmm
if X_flag:
index -= self.output_dim
gX[:, 0] = 2.*(dL_dK*self._gkuu_X(X, index)).sum(0)
return gX
else:
raise NotImplementedError
else: #Kuf or Kmn
#index2 = np.asarray(X2, dtype=np.int)
#index2 = index2.reshape(index2.size,)
if hasattr(X2, 'values'):
X2 = X2.values
index2 = np.int_(np.round(X2[:, 1]))
index2 = index2.reshape(index2.size,)
X2_flag = index2[0] >= self.output_dim
if X_flag and not X2_flag: #gradient of Kuf(Z, X) wrt Z
index -= self.output_dim
gX[:, 0] = (dL_dK*self._gkfu_z(X2, index2, X, index).T).sum(1)
return gX
else:
raise NotImplementedError
#---------------------------------------#
# Helper functions #
#---------------------------------------#
#Evaluation of squared exponential for LFM
def _Kuu(self, X, index):
index = index.reshape(index.size,)
t = X[:, 0].reshape(X.shape[0],)
lq = self.lengthscale.values.reshape(self.rank,)
lq2 = lq*lq
#Covariance matrix initialization
kuu = np.zeros((t.size, t.size))
#Assign 1. to diagonal terms
kuu[np.diag_indices(t.size)] = 1.
#Upper triangular indices
indtri1, indtri2 = np.triu_indices(t.size, 1)
#Block Diagonal indices among Upper Triangular indices
ind = np.where(index[indtri1] == index[indtri2])
indr = indtri1[ind]
indc = indtri2[ind]
r = t[indr] - t[indc]
r2 = r*r
#Calculation of covariance function
kuu[indr, indc] = np.exp(-r2/lq2[index[indr]])
#Completion of lower triangular part
kuu[indc, indr] = kuu[indr, indc]
return kuu
def _Kusu(self, X, index, X2, index2):
index = index.reshape(index.size,)
index2 = index2.reshape(index2.size,)
t = X[:, 0].reshape(X.shape[0],1)
t2 = X2[:, 0].reshape(1,X2.shape[0])
lq = self.lengthscale.values.reshape(self.rank,)
#Covariance matrix initialization
kuu = np.zeros((t.size, t2.size))
for q in range(self.rank):
ind1 = index == q
ind2 = index2 == q
r = t[ind1]/lq[q] - t2[0,ind2]/lq[q]
r2 = r*r
#Calculation of covariance function
kuu[np.ix_(ind1, ind2)] = np.exp(-r2)
return kuu
#Evaluation of cross-covariance function
def _Kfu(self, X, index, X2, index2):
#terms that move along t
t = X[:, 0].reshape(X.shape[0], 1)
d = np.unique(index) #Output Indexes
B = self.decay.values[d]
S = self.W.values[d, :]
#Index transformation
indd = np.arange(self.output_dim)
indd[d] = np.arange(d.size)
index = indd[index]
#Output related variables must be column-wise
B = B.reshape(B.size, 1)
#Input related variables must be row-wise
z = X2[:, 0].reshape(1, X2.shape[0])
lq = self.lengthscale.values.reshape((1, self.rank))
kfu = np.empty((t.size, z.size))
#DxQ terms
c0 = S*((.5*np.sqrt(np.pi))*lq)
nu = B*(.5*lq)
nu2 = nu**2
#1xM terms
z_lq = z/lq[0, index2]
#NxM terms
tz = t-z
tz_lq = tz/lq[0, index2]
# Upsilon Calculations
fullind = np.ix_(index, index2)
upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind]))
upsi[t[:, 0] == 0, :] = 0.
#Covariance calculation
kfu = c0[fullind]*upsi
return kfu
#Gradient of Kuu wrt lengthscale
def _gkuu_lq(self, X, index):
t = X[:, 0].reshape(X.shape[0],)
index = index.reshape(X.shape[0],)
lq = self.lengthscale.values.reshape(self.rank,)
lq2 = lq*lq
#Covariance matrix initialization
glq = np.zeros((t.size, t.size))
#Upper triangular indices
indtri1, indtri2 = np.triu_indices(t.size, 1)
#Block Diagonal indices among Upper Triangular indices
ind = np.where(index[indtri1] == index[indtri2])
indr = indtri1[ind]
indc = indtri2[ind]
r = t[indr] - t[indc]
r2 = r*r
r2_lq2 = r2/lq2[index[indr]]
#Calculation of covariance function
er2_lq2 = np.exp(-r2_lq2)
#Gradient wrt lq
c = 2.*r2_lq2/lq[index[indr]]
glq[indr, indc] = er2_lq2*c
#Complete the lower triangular
glq[indc, indr] = glq[indr, indc]
return glq
#Be careful this derivative should be transpose it
def _gkuu_X(self, X, index): #Diagonal terms are always zero
t = X[:, 0].reshape(X.shape[0],)
index = index.reshape(index.size,)
lq = self.lengthscale.values.reshape(self.rank,)
lq2 = lq*lq
#Covariance matrix initialization
gt = np.zeros((t.size, t.size))
#Upper triangular indices
indtri1, indtri2 = np.triu_indices(t.size, 1) #Offset of 1 from the diagonal
#Block Diagonal indices among Upper Triangular indices
ind = np.where(index[indtri1] == index[indtri2])
indr = indtri1[ind]
indc = indtri2[ind]
r = t[indr] - t[indc]
r2 = r*r
r2_lq2 = r2/(-lq2[index[indr]])
#Calculation of covariance function
er2_lq2 = np.exp(r2_lq2)
#Gradient wrt t
c = 2.*r/lq2[index[indr]]
gt[indr, indc] = er2_lq2*c
#Complete the lower triangular
gt[indc, indr] = -gt[indr, indc]
return gt
#Gradients for Diagonal Kff
def _gkdiag(self, X, index):
index = index.reshape(index.size,)
#terms that move along t
d = np.unique(index)
B = self.decay[d].values
S = self.W[d, :].values
#Index transformation
indd = np.arange(self.output_dim)
indd[d] = np.arange(d.size)
index = indd[index]
#Output related variables must be column-wise
t = X[:, 0].reshape(X.shape[0], 1)
B = B.reshape(B.size, 1)
S2 = S*S
#Input related variables must be row-wise
lq = self.lengthscale.values.reshape(1, self.rank)
gB = np.empty((t.size,))
glq = np.empty((t.size, lq.size))
gS = np.empty((t.size, lq.size))
#Dx1 terms
c0 = S2*lq*np.sqrt(np.pi)
#DxQ terms
nu = (.5*lq)*B
nu2 = nu*nu
#Nx1 terms
gamt = -B[index]*t
egamt = np.exp(gamt)
e2gamt = egamt*egamt
#NxQ terms
t_lq = t/lq
t2_lq2 = -t_lq*t_lq
etlq2gamt = np.exp(t2_lq2 + gamt) #NXQ
##Upsilon calculations
#erfnu = erf(nu) #TODO: This can be improved
upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :], t_lq + nu[index, :]) )
upm[t[:, 0] == 0, :] = 0.
upv = np.exp(nu2[index, :] + 2.*gamt + lnDifErf(-t_lq + nu[index, :], nu[index, :]) ) #egamt*upv
upv[t[:, 0] == 0, :] = 0.
#Gradient wrt S
c0_S = (S/B)*(lq*np.sqrt(np.pi))
gS = c0_S[index]*(upm - upv)
#For B
CB1 = (.5*lq)**2 - .5/B**2 #DXQ
lq2_2B = (.5*lq**2)*(S2/B) #DXQ
CB2 = 2.*etlq2gamt - e2gamt - 1. #NxQ
# gradient wrt B NxZ
gB = c0[index, :]*(CB1[index, :]*upm - (CB1[index, :] - t/B[index])*upv) + \
lq2_2B[index, :]*CB2
#Gradient wrt lengthscale
#DxQ terms
c0 = (.5*np.sqrt(np.pi))*(S2/B)*(1.+.5*(lq*B)**2)
Clq1 = S2*(lq*.5)
glq = c0[index]*(upm - upv) + Clq1[index]*CB2
return glq, gS, gB
def _gkfu(self, X, index, Z, index2):
index = index.reshape(index.size,)
#TODO: reduce memory usage
#terms that move along t
d = np.unique(index)
B = self.decay[d].values
S = self.W[d, :].values
#Index transformation
indd = np.arange(self.output_dim)
indd[d] = np.arange(d.size)
index = indd[index]
#t column
t = X[:, 0].reshape(X.shape[0], 1)
B = B.reshape(B.size, 1)
#z row
z = Z[:, 0].reshape(1, Z.shape[0])
index2 = index2.reshape(index2.size,)
lq = self.lengthscale.values.reshape((1, self.rank))
#kfu = np.empty((t.size, z.size))
glq = np.empty((t.size, z.size))
gSdq = np.empty((t.size, z.size))
gB = np.empty((t.size, z.size))
#Dx1 terms
B_2 = B*.5
S_pi = S*(.5*np.sqrt(np.pi))
#DxQ terms
c0 = S_pi*lq #lq*Sdq*sqrt(pi)
nu = B*lq*.5
nu2 = nu*nu
#1xM terms
z_lq = z/lq[0, index2]
#NxM terms
tz = t-z
tz_lq = tz/lq[0, index2]
etz_lq2 = -np.exp(-tz_lq*tz_lq)
ez_lq_Bt = np.exp(-z_lq*z_lq -B[index]*t)
# Upsilon calculations
fullind = np.ix_(index, index2)
upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind] ) )
upsi[t[:, 0] == 0., :] = 0.
#Gradient wrt S
#DxQ term
Sa1 = lq*(.5*np.sqrt(np.pi))
gSdq = Sa1[0,index2]*upsi
#Gradient wrt lq
la1 = S_pi*(1. + 2.*nu2)
Slq = S*lq
uplq = etz_lq2*(tz_lq/lq[0, index2] + B_2[index])
uplq += ez_lq_Bt*(-z_lq/lq[0, index2] + B_2[index])
glq = la1[fullind]*upsi
glq += Slq[fullind]*uplq
#Gradient wrt B
Slq = Slq*lq
nulq = nu*lq
upBd = etz_lq2 + ez_lq_Bt
gB = c0[fullind]*(nulq[fullind] - tz)*upsi + .5*Slq[fullind]*upBd
return glq, gSdq, gB
#TODO: reduce memory usage
def _gkfu_z(self, X, index, Z, index2): #Kfu(t,z)
index = index.reshape(index.size,)
#terms that move along t
d = np.unique(index)
B = self.decay[d].values
S = self.W[d, :].values
#Index transformation
indd = np.arange(self.output_dim)
indd[d] = np.arange(d.size)
index = indd[index]
#t column
t = X[:, 0].reshape(X.shape[0], 1)
B = B.reshape(B.size, 1)
#z row
z = Z[:, 0].reshape(1, Z.shape[0])
index2 = index2.reshape(index2.size,)
lq = self.lengthscale.values.reshape((1, self.rank))
#kfu = np.empty((t.size, z.size))
gz = np.empty((t.size, z.size))
#Dx1 terms
S_pi =S*(.5*np.sqrt(np.pi))
#DxQ terms
#Slq = S*lq
c0 = S_pi*lq #lq*Sdq*sqrt(pi)
nu = (.5*lq)*B
nu2 = nu*nu
#1xM terms
z_lq = z/lq[0, index2]
z_lq2 = -z_lq*z_lq
#NxQ terms
t_lq = t/lq
#NxM terms
zt_lq = z_lq - t_lq[:, index2]
zt_lq2 = -zt_lq*zt_lq
# Upsilon calculations
fullind = np.ix_(index, index2)
z2 = z_lq + nu[fullind]
z1 = z2 - t_lq[:, index2]
upsi = np.exp(nu2[fullind] - B[index]*(t-z) + lnDifErf(z1,z2) )
upsi[t[:, 0] == 0., :] = 0.
#Gradient wrt z
za1 = c0*B
#za2 = S_w
gz = za1[fullind]*upsi + S[fullind]*( np.exp(z_lq2 - B[index]*t) -np.exp(zt_lq2) )
return gz
def lnDifErf(z1,z2):
#Z2 is always positive
logdiferf = np.zeros(z1.shape)
ind = np.where(z1>0.)
ind2 = np.where(z1<=0.)
if ind[0].shape > 0:
z1i = z1[ind]
z12 = z1i*z1i
z2i = z2[ind]
logdiferf[ind] = -z12 + np.log(erfcx(z1i) - erfcx(z2i)*np.exp(z12-z2i**2))
if ind2[0].shape > 0:
z1i = z1[ind2]
z2i = z2[ind2]
logdiferf[ind2] = np.log(erf(z2i) - erf(z1i))
return logdiferf

View file

@ -44,7 +44,7 @@ class EQ_ODE2(Kern):
lengthscale = np.asarray(lengthscale)
assert lengthscale.size in [1, self.rank], "Bad number of lengthscales"
if lengthscale.size != self.rank:
lengthscale = np.ones(self.input_dim)*lengthscale
lengthscale = np.ones(self.rank)*lengthscale
if W is None:
#W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
@ -71,7 +71,7 @@ class EQ_ODE2(Kern):
#index = index.reshape(index.size,)
if hasattr(X, 'values'):
X = X.values
index = np.int_(X[:, 1])
index = np.int_(np.round(X[:, 1]))
index = index.reshape(index.size,)
X_flag = index[0] >= self.output_dim
if X2 is None:
@ -79,7 +79,7 @@ class EQ_ODE2(Kern):
#Calculate covariance function for the latent functions
index -= self.output_dim
return self._Kuu(X, index)
else:
else: #Kff full
raise NotImplementedError
else:
#This way is not working, indexes are lost after using k._slice_X
@ -87,19 +87,40 @@ class EQ_ODE2(Kern):
#index2 = index2.reshape(index2.size,)
if hasattr(X2, 'values'):
X2 = X2.values
index2 = np.int_(X2[:, 1])
index2 = np.int_(np.round(X2[:, 1]))
index2 = index2.reshape(index2.size,)
X2_flag = index2[0] >= self.output_dim
#Calculate cross-covariance function
if not X_flag and X2_flag:
index2 -= self.output_dim
return self._Kfu(X, index, X2, index2) #Kfu
else:
elif X_flag and not X2_flag:
index -= self.output_dim
return self._Kfu(X2, index2, X, index).T #Kuf
elif X_flag and X2_flag:
index -= self.output_dim
index2 -= self.output_dim
return self._Kusu(X, index, X2, index2) #Ku_s u
else:
raise NotImplementedError #Kf_s f
#Calculate the covariance function for diag(Kff(X,X))
def Kdiag(self, X):
if hasattr(X, 'values'):
index = np.int_(np.round(X[:, 1].values))
else:
index = np.int_(np.round(X[:, 1]))
index = index.reshape(index.size,)
X_flag = index[0] >= self.output_dim
if X_flag: #Kuudiag
return np.ones(X[:,0].shape)
else: #Kffdiag
kdiag = self._Kdiag(X)
return np.sum(kdiag, axis=1)
#Calculate the covariance function for diag(Kff(X,X))
def _Kdiag(self, X):
#This way is not working, indexes are lost after using k._slice_X
#index = np.asarray(X, dtype=np.int)
#index = index.reshape(index.size,)
@ -132,7 +153,7 @@ class EQ_ODE2(Kern):
#Terms that move along q
lq = self.lengthscale.values.reshape(1, self.lengthscale.size)
S2 = S*S
kdiag = np.empty((t.size, ))
kdiag = np.empty((t.size, lq.size))
indD = np.arange(B.size)
#(1) When wd is real
@ -187,8 +208,8 @@ class EQ_ODE2(Kern):
upv[t1[:, 0] == 0, :] = 0.
#Covariance calculation
kdiag[ind3t] = np.sum(np.real(K01[ind]*upm), axis=1)
kdiag[ind3t] += np.sum(np.real((c0[ind]*ec)*upv), axis=1)
kdiag[ind3t] = np.real(K01[ind]*upm)
kdiag[ind3t] += np.real((c0[ind]*ec)*upv)
#(2) When w_d is complex
if np.any(wbool):
@ -265,7 +286,7 @@ class EQ_ODE2(Kern):
upvc[t1[:, 0] == 0, :] = 0.
#Covariance calculation
kdiag[ind2t] = np.sum(K011[ind]*upm + K012[ind]*upmc + (c0[ind]*ec)*upv + (c0[ind]*ec2)*upvc, axis=1)
kdiag[ind2t] = K011[ind]*upm + K012[ind]*upmc + (c0[ind]*ec)*upv + (c0[ind]*ec2)*upvc
return kdiag
def update_gradients_full(self, dL_dK, X, X2 = None):
@ -336,16 +357,17 @@ class EQ_ODE2(Kern):
index = index.reshape(index.size,)
glq, gS, gB, gC = self._gkdiag(X, index)
tmp = dL_dKdiag.reshape(index.size, 1)*glq
if dL_dKdiag.size == X.shape[0]:
dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1))
tmp = dL_dKdiag*glq
self.lengthscale.gradient = tmp.sum(0)
#TODO: Avoid the reshape by a priori knowing the shape of dL_dKdiag
tmpB = dL_dKdiag*gB.reshape(dL_dKdiag.shape)
tmpC = dL_dKdiag*gC.reshape(dL_dKdiag.shape)
tmp = dL_dKdiag.reshape(index.size, 1)*gS
tmpB = dL_dKdiag*gB
tmpC = dL_dKdiag*gC
tmp = dL_dKdiag*gS
for d in np.unique(index):
ind = np.where(index == d)
self.B.gradient[d] = tmpB[ind].sum()
self.C.gradient[d] = tmpC[ind].sum()
self.B.gradient[d] = tmpB[ind, :].sum()
self.C.gradient[d] = tmpC[ind, :].sum()
self.W.gradient[d, :] = tmp[ind].sum(0)
def gradients_X(self, dL_dK, X, X2=None):
@ -410,6 +432,23 @@ class EQ_ODE2(Kern):
kuu[indc, indr] = kuu[indr, indc]
return kuu
def _Kusu(self, X, index, X2, index2):
index = index.reshape(index.size,)
index2 = index2.reshape(index2.size,)
t = X[:, 0].reshape(X.shape[0],1)
t2 = X2[:, 0].reshape(1,X2.shape[0])
lq = self.lengthscale.values.reshape(self.rank,)
#Covariance matrix initialization
kuu = np.zeros((t.size, t2.size))
for q in range(self.rank):
ind1 = index == q
ind2 = index2 == q
r = t[ind1]/lq[q] - t2[0,ind2]/lq[q]
r2 = r*r
#Calculation of covariance function
kuu[np.ix_(ind1, ind2)] = np.exp(-r2)
return kuu
#Evaluation of cross-covariance function
def _Kfu(self, X, index, X2, index2):
#terms that move along t
@ -632,8 +671,8 @@ class EQ_ODE2(Kern):
lq = self.lengthscale.values.reshape(1, self.rank)
lq2 = lq*lq
gB = np.empty((t.size,))
gC = np.empty((t.size,))
gB = np.empty((t.size, lq.size))
gC = np.empty((t.size, lq.size))
glq = np.empty((t.size, lq.size))
gS = np.empty((t.size, lq.size))
@ -723,8 +762,8 @@ class EQ_ODE2(Kern):
Ba4_1 = (S2lq*lq)*dgam_dB/w2
Ba4 = Ba4_1*c
gB[ind3t] = np.sum(np.real(Ba1[ind]*upm) - np.real(((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv)\
+ np.real(Ba4[ind]*upmd) + np.real((Ba4_1[ind]*ec)*upvd), axis=1)
gB[ind3t] = np.real(Ba1[ind]*upm) - np.real(((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv)\
+ np.real(Ba4[ind]*upmd) + np.real((Ba4_1[ind]*ec)*upvd)
# gradient wrt C
dw_dC = - alphad*dw_dB
@ -738,8 +777,8 @@ class EQ_ODE2(Kern):
Ca4_1 = (S2lq*lq)*dgam_dC/w2
Ca4 = Ca4_1*c
gC[ind3t] = np.sum(np.real(Ca1[ind]*upm) - np.real(((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv)\
+ np.real(Ca4[ind]*upmd) + np.real((Ca4_1[ind]*ec)*upvd), axis=1)
gC[ind3t] = np.real(Ca1[ind]*upm) - np.real(((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv)\
+ np.real(Ca4[ind]*upmd) + np.real((Ca4_1[ind]*ec)*upvd)
#Gradient wrt lengthscale
#DxQ terms
@ -868,10 +907,10 @@ class EQ_ODE2(Kern):
Ba2_1c = c0*(dgamc_dB*(0.5/gamc2 - 0.25*lq2) + 0.5/(w2*gamc))
Ba2_2c = c0*dgamc_dB/gamc
gB[ind2t] = np.sum(Ba1[ind]*upm - ((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv\
gB[ind2t] = Ba1[ind]*upm - ((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv\
+ Ba4[ind]*upmd + (Ba4_1[ind]*ec)*upvd\
+ Ba1c[ind]*upmc - ((Ba2_1c[ind] + Ba2_2c[ind]*t1)*egamct - Ba3c[ind]*egamt)*upvc\
+ Ba4c[ind]*upmdc + (Ba4_1c[ind]*ec2)*upvdc, axis=1)
+ Ba4c[ind]*upmdc + (Ba4_1c[ind]*ec2)*upvdc
##Gradient wrt C
dw_dC = 0.5*alphad/w
@ -895,10 +934,10 @@ class EQ_ODE2(Kern):
Ca4_1c = S2lq2*(dgamc_dC/w2)
Ca4c = Ca4_1c*c2
gC[ind2t] = np.sum(Ca1[ind]*upm - ((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv\
gC[ind2t] = Ca1[ind]*upm - ((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv\
+ Ca4[ind]*upmd + (Ca4_1[ind]*ec)*upvd\
+ Ca1c[ind]*upmc - ((Ca2_1c[ind] + Ca2_2c[ind]*t1)*egamct - (Ca3_1c[ind] + Ca3_2c[ind]*t1)*egamt)*upvc\
+ Ca4c[ind]*upmdc + (Ca4_1c[ind]*ec2)*upvdc, axis=1)
+ Ca4c[ind]*upmdc + (Ca4_1c[ind]*ec2)*upvdc
#Gradient wrt lengthscale
#DxQ terms

View file

@ -56,14 +56,18 @@ class IndependentOutputs(CombinationKernel):
self.single_kern = False
self.kern = kernels
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
self.index_dim = index_dim
# The combination kernel ALLWAYS puts the extra dimension last.
# Thus, the index dimension of this kernel is always the last dimension
# after slicing. This is why the index_dim is just the last column:
self.index_dim = -1
def K(self,X ,X2=None):
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)]
#[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)]
[[target.__setitem__((s,ss), kern.K(X[s,:]) if s==ss else kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)]
else:
slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0]))
@ -103,13 +107,10 @@ class IndependentOutputs(CombinationKernel):
target = np.zeros(X.shape)
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if X2 is None:
# TODO: make use of index_to_slices
# FIXME: Broken as X is already sliced out
# print("Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!")
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
for kern, s in zip(kerns, slices)]
for kern, s in zip(kerns, slices):
target[s] += kern.gradients_X(dL_dK[s, :][:, s],X[s], None)
#slices = index_to_slices(X[:,self.index_dim])
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
# for s in slices_i] for kern, slices_i in zip(kerns, slices)]
@ -121,8 +122,8 @@ class IndependentOutputs(CombinationKernel):
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
slices2 = [X2[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2]))
for kern, s, s2 in zip(kerns, slices, slices2)]
for kern, s, s2 in zip(kerns, slices, slices2):
target[s] += kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2])
# TODO: make work with index_to_slices
#slices = index_to_slices(X[:,self.index_dim])
#slices2 = index_to_slices(X2[:,self.index_dim])
@ -133,7 +134,9 @@ class IndependentOutputs(CombinationKernel):
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
target = np.zeros(X.shape)
[[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
for kern, slices_i in zip(kerns, slices):
for s in slices_i:
target[s] += kern.gradients_X_diag(dL_dKdiag[s],X[s])
return target
def update_gradients_diag(self, dL_dKdiag, X):

82
GPy/kern/src/integral.py Normal file
View file

@ -0,0 +1,82 @@
# Written by Mike Smith michaeltsmith.org.uk
from __future__ import division
import numpy as np
from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
import math
class Integral(Kern): #todo do I need to inherit from Stationary
"""
Integral kernel between...
"""
def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
super(Integral, self).__init__(input_dim, active_dims, name)
if lengthscale is None:
lengthscale = np.ones(1)
else:
lengthscale = np.asarray(lengthscale)
self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
self.variances = Param('variances', variances, Logexp()) #and here.
self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
def h(self, z):
return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
def dk_dl(self, t, tprime, l): #derivative of the kernel wrt lengthscale
return l * ( self.h(t/l) - self.h((t - tprime)/l) + self.h(tprime/l) - 1)
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None: #we're finding dK_xx/dTheta
dK_dl = np.zeros([X.shape[0],X.shape[0]])
dK_dv = np.zeros([X.shape[0],X.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X):
dK_dl[i,j] = self.variances[0]*self.dk_dl(x[0],x2[0],self.lengthscale[0]) #TODO Multiple length scales
dK_dv[i,j] = self.k_xx(x[0],x2[0],self.lengthscale[0]) #the gradient wrt the variance is k_xx.
self.lengthscale.gradient = np.sum(dK_dl * dL_dK)
self.variances.gradient = np.sum(dK_dv * dL_dK)
else: #we're finding dK_xf/Dtheta
raise NotImplementedError("Currently this function only handles finding the gradient of a single vector of inputs (X) not a pair of vectors (X and X2)")
#useful little function to help calculate the covariances.
def g(self,z):
return 1.0 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
#covariance between gradients (it's the gradients that we want out... maybe we should have a way of getting K_ff too? Currently you get the diag of K_ff from Kdiag)
def k_xx(self,t,tprime,l):
return 0.5 * (l**2) * ( self.g(t/l) - self.g((t - tprime)/l) + self.g(tprime/l) - 1)
def k_ff(self,t,tprime,l):
return np.exp(-((t-tprime)**2)/(l**2)) #rbf
#covariance between the gradient and the actual value
def k_xf(self,t,tprime,l):
return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf(tprime/l))
def K(self, X, X2=None):
if X2 is None:
K_xx = np.zeros([X.shape[0],X.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X):
K_xx[i,j] = self.k_xx(x[0],x2[0],self.lengthscale[0])
return K_xx * self.variances[0]
else:
K_xf = np.zeros([X.shape[0],X2.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X2):
K_xf[i,j] = self.k_xf(x[0],x2[0],self.lengthscale[0])
return K_xf * self.variances[0]
def Kdiag(self, X):
"""I've used the fact that we call this method for K_ff when finding the covariance as a hack so
I know if I should return K_ff or K_xx. In this case we're returning K_ff!!
$K_{ff}^{post} = K_{ff} - K_{fx} K_{xx}^{-1} K_{xf}$"""
K_ff = np.zeros(X.shape[0])
for i,x in enumerate(X):
K_ff[i] = self.k_ff(x[0],x[0],self.lengthscale[0])
return K_ff * self.variances[0]

View file

@ -0,0 +1,115 @@
# Written by Mike Smith michaeltsmith.org.uk
from __future__ import division
import math
import numpy as np
from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
class Integral_Limits(Kern):
"""
Integral kernel. This kernel allows 1d histogram or binned data to be modelled.
The outputs are the counts in each bin. The inputs (on two dimensions) are the start and end points of each bin.
The kernel's predictions are the latent function which might have generated those binned results.
"""
def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
"""
"""
super(Integral_Limits, self).__init__(input_dim, active_dims, name)
if lengthscale is None:
lengthscale = np.ones(1)
else:
lengthscale = np.asarray(lengthscale)
self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
self.variances = Param('variances', variances, Logexp()) #and here.
self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
def h(self, z):
return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
def dk_dl(self, t, tprime, s, sprime, l): #derivative of the kernel wrt lengthscale
return l * ( self.h((t-sprime)/l) - self.h((t - tprime)/l) + self.h((tprime-s)/l) - self.h((s-sprime)/l))
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None: #we're finding dK_xx/dTheta
dK_dl = np.zeros([X.shape[0],X.shape[0]])
dK_dv = np.zeros([X.shape[0],X.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X):
dK_dl[i,j] = self.variances[0]*self.dk_dl(x[0],x2[0],x[1],x2[1],self.lengthscale[0])
dK_dv[i,j] = self.k_xx(x[0],x2[0],x[1],x2[1],self.lengthscale[0]) #the gradient wrt the variance is k_xx.
self.lengthscale.gradient = np.sum(dK_dl * dL_dK)
self.variances.gradient = np.sum(dK_dv * dL_dK)
else: #we're finding dK_xf/Dtheta
raise NotImplementedError("Currently this function only handles finding the gradient of a single vector of inputs (X) not a pair of vectors (X and X2)")
#useful little function to help calculate the covariances.
def g(self,z):
return 1.0 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
def k_xx(self,t,tprime,s,sprime,l):
"""Covariance between observed values.
s and t are one domain of the integral (i.e. the integral between s and t)
sprime and tprime are another domain of the integral (i.e. the integral between sprime and tprime)
We're interested in how correlated these two integrals are.
Note: We've not multiplied by the variance, this is done in K."""
return 0.5 * (l**2) * ( self.g((t-sprime)/l) + self.g((tprime-s)/l) - self.g((t - tprime)/l) - self.g((s-sprime)/l))
def k_ff(self,t,tprime,l):
"""Doesn't need s or sprime as we're looking at the 'derivatives', so no domains over which to integrate are required"""
return np.exp(-((t-tprime)**2)/(l**2)) #rbf
def k_xf(self,t,tprime,s,l):
"""Covariance between the gradient (latent value) and the actual (observed) value.
Note that sprime isn't actually used in this expression, presumably because the 'primes' are the gradient (latent) values which don't
involve an integration, and thus there is no domain over which they're integrated, just a single value that we want."""
return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf((tprime-s)/l))
def K(self, X, X2=None):
"""Note: We have a latent function and an output function. We want to be able to find:
- the covariance between values of the output function
- the covariance between values of the latent function
- the "cross covariance" between values of the output function and the latent function
This method is used by GPy to either get the covariance between the outputs (K_xx) or
is used to get the cross covariance (between the latent function and the outputs (K_xf).
We take advantage of the places where this function is used:
- if X2 is none, then we know that the items being compared (to get the covariance for)
are going to be both from the OUTPUT FUNCTION.
- if X2 is not none, then we know that the items being compared are from two different
sets (the OUTPUT FUNCTION and the LATENT FUNCTION).
If we want the covariance between values of the LATENT FUNCTION, we take advantage of
the fact that we only need that when we do prediction, and this only calls Kdiag (not K).
So the covariance between LATENT FUNCTIONS is available from Kdiag.
"""
if X2 is None:
K_xx = np.zeros([X.shape[0],X.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X):
K_xx[i,j] = self.k_xx(x[0],x2[0],x[1],x2[1],self.lengthscale[0])
return K_xx * self.variances[0]
else:
K_xf = np.zeros([X.shape[0],X2.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X2):
K_xf[i,j] = self.k_xf(x[0],x2[0],x[1],self.lengthscale[0]) #x2[1] unused, see k_xf docstring for explanation.
return K_xf * self.variances[0]
def Kdiag(self, X):
"""I've used the fact that we call this method during prediction (instead of K). When we
do prediction we want to know the covariance between LATENT FUNCTIONS (K_ff) (as that's probably
what the user wants).
$K_{ff}^{post} = K_{ff} - K_{fx} K_{xx}^{-1} K_{xf}$"""
K_ff = np.zeros(X.shape[0])
for i,x in enumerate(X):
K_ff[i] = self.k_ff(x[0],x[0],self.lengthscale[0])
return K_ff * self.variances[0]

View file

@ -15,10 +15,10 @@ class Kern(Parameterized):
# This adds input slice support. The rather ugly code for slicing can be
# found in kernel_slice_operations
# __meataclass__ is ignored in Python 3 - needs to be put in the function definiton
#__metaclass__ = KernCallsViaSlicerMeta
#Here, we use the Python module six to support Py3 and Py2 simultaneously
# __metaclass__ = KernCallsViaSlicerMeta
# Here, we use the Python module six to support Py3 and Py2 simultaneously
#===========================================================================
_support_GPU=False
_support_GPU = False
def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw):
"""
The base class for a kernel: a positive definite function
@ -62,7 +62,7 @@ class Kern(Parameterized):
self.psicomp = PSICOMP_GH()
def __setstate__(self, state):
self._all_dims_active = np.arange(0, max(state['active_dims'])+1)
self._all_dims_active = np.arange(0, max(state['active_dims']) + 1)
super(Kern, self).__setstate__(state)
@property
@ -132,18 +132,18 @@ class Kern(Parameterized):
raise NotImplementedError
def gradients_X_X2(self, dL_dK, X, X2):
return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X)
def gradients_XX(self, dL_dK, X, X2):
def gradients_XX(self, dL_dK, X, X2, cov=True):
"""
.. math::
\\frac{\partial^2 L}{\partial X\partial X_2} = \\frac{\partial L}{\partial K}\\frac{\partial^2 K}{\partial X\partial X_2}
"""
raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_XX_diag(self, dL_dKdiag, X):
raise NotImplementedError("This is the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_XX_diag(self, dL_dKdiag, X, cov=True):
"""
The diagonal of the second derivative w.r.t. X and X2
"""
raise(NotImplementedError, "This is the diagonal of the second derivative of K wrt X and X2, and not implemented for this kernel")
raise NotImplementedError("This is the diagonal of the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_X_diag(self, dL_dKdiag, X):
"""
The diagonal of the derivative w.r.t. X
@ -211,6 +211,12 @@ class Kern(Parameterized):
def input_sensitivity(self, summarize=True):
"""
Returns the sensitivity for each dimension of this kernel.
This is an arbitrary measurement based on the parameters
of the kernel per dimension and scaling in general.
Use this as relative measurement, not for absolute comparison between
kernels.
"""
return np.zeros(self.input_dim)
@ -292,20 +298,19 @@ class Kern(Parameterized):
"""
assert isinstance(other, Kern), "only kernels can be multiplied to kernels..."
from .prod import Prod
#kernels = []
#if isinstance(self, Prod): kernels.extend(self.parameters)
#else: kernels.append(self)
#if isinstance(other, Prod): kernels.extend(other.parameters)
#else: kernels.append(other)
# kernels = []
# if isinstance(self, Prod): kernels.extend(self.parameters)
# else: kernels.append(self)
# if isinstance(other, Prod): kernels.extend(other.parameters)
# else: kernels.append(other)
return Prod([self, other], name)
def _check_input_dim(self, X):
assert X.shape[1] == self.input_dim, "{} did not specify _all_dims_active and X has wrong shape: X_dim={}, whereas input_dim={}".format(self.name, X.shape[1], self.input_dim)
assert X.shape[1] == self.input_dim, "{} did not specify active_dims and X has wrong shape: X_dim={}, whereas input_dim={}".format(self.name, X.shape[1], self.input_dim)
def _check_active_dims(self, X):
assert X.shape[1] >= len(self._all_dims_active), "At least {} dimensional X needed, X.shape={!s}".format(len(self._all_dims_active), X.shape)
class CombinationKernel(Kern):
"""
Abstract super class for combination kernels.
@ -325,11 +330,9 @@ class CombinationKernel(Kern):
assert all([isinstance(k, Kern) for k in kernels])
extra_dims = np.asarray(extra_dims, dtype=int)
active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), extra_dims)
input_dim = active_dims.size
if extra_dims is not None:
input_dim += extra_dims.size
# initialize the kernel with the full input_dim
super(CombinationKernel, self).__init__(input_dim, active_dims, name)

View file

@ -13,19 +13,20 @@ from paramz.parameterized import ParametersChangedMeta
def put_clean(dct, name, func):
if name in dct:
#dct['_clean_{}'.format(name)] = dct[name]
dct['_clean_{}'.format(name)] = dct[name]
dct[name] = func(dct[name])
class KernCallsViaSlicerMeta(ParametersChangedMeta):
def __new__(cls, name, bases, dct):
put_clean(dct, 'K', _slice_K)
put_clean(dct, 'Kdiag', _slice_Kdiag)
put_clean(dct, 'phi', _slice_Kdiag)
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
put_clean(dct, 'gradients_X', _slice_gradients_X)
put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag)
put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
put_clean(dct, 'psi0', _slice_psi)
@ -38,15 +39,16 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
class _Slice_wrap(object):
def __init__(self, k, X, X2=None, ret_shape=None):
def __init__(self, k, X, X2=None, diag=False, ret_shape=None):
self.k = k
self.diag = diag
if ret_shape is None:
self.shape = X.shape
else:
self.shape = ret_shape
assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape)
assert X.ndim == 2, "need at least column vectors as inputs to kernels for now, given X.shape={!s}".format(X.shape)
if X2 is not None:
assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
assert X2.ndim == 2, "need at least column vectors as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
if (self.k._all_dims_active is not None) and (self.k._sliced_X == 0):
self.k._check_active_dims(X)
self.X = self.k._slice_X(X)
@ -67,8 +69,13 @@ class _Slice_wrap(object):
ret = np.zeros(self.shape)
if len(self.shape) == 2:
ret[:, self.k._all_dims_active] = return_val
elif len(self.shape) == 3:
ret[:, :, self.k._all_dims_active] = return_val
elif len(self.shape) == 3: # derivative for X2!=None
if self.diag:
ret.T[np.ix_(self.k._all_dims_active, self.k._all_dims_active)] = return_val.T
else:
ret[:, :, self.k._all_dims_active] = return_val
elif len(self.shape) == 4: # second order derivative
ret.T[np.ix_(self.k._all_dims_active, self.k._all_dims_active)] = return_val.T
return ret
return return_val
@ -112,23 +119,34 @@ def _slice_gradients_X(f):
return ret
return wrap
def _slice_gradients_X_diag(f):
@wraps(f)
def wrap(self, dL_dKdiag, X):
with _Slice_wrap(self, X, None) as s:
ret = s.handle_return_array(f(self, dL_dKdiag, s.X))
return ret
return wrap
def _slice_gradients_XX(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
if X2 is None:
N, M = X.shape[0], X.shape[0]
Q1 = Q2 = X.shape[1]
else:
N, M = X.shape[0], X2.shape[0]
with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s:
Q1, Q2 = X.shape[1], X2.shape[1]
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
return ret
return wrap
def _slice_gradients_X_diag(f):
def _slice_gradients_XX_diag(f):
@wraps(f)
def wrap(self, dL_dKdiag, X):
with _Slice_wrap(self, X, None) as s:
N, Q = X.shape
with _Slice_wrap(self, X, None, diag=True, ret_shape=(N, Q, Q)) as s:
ret = s.handle_return_array(f(self, dL_dKdiag, s.X))
return ret
return wrap

View file

@ -102,17 +102,39 @@ class Linear(Kern):
return dL_dK.dot(X2)*self.variances #np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)
def gradients_XX(self, dL_dK, X, X2=None):
if X2 is None: dL_dK = (dL_dK+dL_dK.T)/2
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors, thus
the returned array is of shape [NxNxQxQ].
..math:
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
..returns:
dL2_dXdX2: [NxMxQxQ] for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
Thus, we return the second derivative in X2.
"""
if X2 is None:
return 2*np.ones(X.shape)*self.variances
else:
return np.ones(X.shape)*self.variances
X2 = X
return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
#if X2 is None: dL_dK = (dL_dK+dL_dK.T)/2
#if X2 is None:
# return np.ones(np.repeat(X.shape, 2)) * (self.variances[None,:] + self.variances[:, None])[None, None, :, :]
#else:
# return np.ones((X.shape[0], X2.shape[0], X.shape[1], X.shape[1])) * (self.variances[None,:] + self.variances[:, None])[None, None, :, :]
def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*X
def gradients_XX_diag(self, dL_dKdiag, X):
return 2*np.ones(X.shape)*self.variances
return np.zeros((X.shape[0], X.shape[1], X.shape[1]))
#dims = X.shape
#if cov:
# dims += (X.shape[1],)
#return 2*np.ones(dims)*self.variances
def input_sensitivity(self, summarize=True):
return np.ones(self.input_dim) * self.variances

View file

@ -0,0 +1,120 @@
# Written by Mike Smith michaeltsmith.org.uk
from __future__ import division
import numpy as np
from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
import math
class Multidimensional_Integral_Limits(Kern): #todo do I need to inherit from Stationary
"""
Integral kernel, can include limits on each integral value. This kernel allows an n-dimensional
histogram or binned data to be modelled. The outputs are the counts in each bin. The inputs
are the start and end points of each bin: Pairs of inputs act as the limits on each bin. So
inputs 4 and 5 provide the start and end values of each bin in the 3rd dimension.
The kernel's predictions are the latent function which might have generated those binned results.
"""
def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
super(Multidimensional_Integral_Limits, self).__init__(input_dim, active_dims, name)
if lengthscale is None:
lengthscale = np.ones(1)
else:
lengthscale = np.asarray(lengthscale)
self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
self.variances = Param('variances', variances, Logexp()) #and here.
self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
def h(self, z):
return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
def dk_dl(self, t, tprime, s, sprime, l): #derivative of the kernel wrt lengthscale
return l * ( self.h((t-sprime)/l) - self.h((t - tprime)/l) + self.h((tprime-s)/l) - self.h((s-sprime)/l))
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None: #we're finding dK_xx/dTheta
dK_dl_term = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
k_term = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
dK_dl = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
dK_dv = np.zeros([X.shape[0],X.shape[0]])
for il,l in enumerate(self.lengthscale):
idx = il*2
for i,x in enumerate(X):
for j,x2 in enumerate(X):
dK_dl_term[i,j,il] = self.dk_dl(x[idx],x2[idx],x[idx+1],x2[idx+1],l)
k_term[i,j,il] = self.k_xx(x[idx],x2[idx],x[idx+1],x2[idx+1],l)
for il,l in enumerate(self.lengthscale):
dK_dl = self.variances[0] * dK_dl_term[:,:,il]
for jl, l in enumerate(self.lengthscale):
if jl!=il:
dK_dl *= k_term[:,:,jl]
self.lengthscale.gradient[il] = np.sum(dK_dl * dL_dK)
dK_dv = self.calc_K_xx_wo_variance(X) #the gradient wrt the variance is k_xx.
self.variances.gradient = np.sum(dK_dv * dL_dK)
else: #we're finding dK_xf/Dtheta
raise NotImplementedError("Currently this function only handles finding the gradient of a single vector of inputs (X) not a pair of vectors (X and X2)")
#useful little function to help calculate the covariances.
def g(self,z):
return 1.0 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
def k_xx(self,t,tprime,s,sprime,l):
"""Covariance between observed values.
s and t are one domain of the integral (i.e. the integral between s and t)
sprime and tprime are another domain of the integral (i.e. the integral between sprime and tprime)
We're interested in how correlated these two integrals are.
Note: We've not multiplied by the variance, this is done in K."""
return 0.5 * (l**2) * ( self.g((t-sprime)/l) + self.g((tprime-s)/l) - self.g((t - tprime)/l) - self.g((s-sprime)/l))
def k_ff(self,t,tprime,l):
"""Doesn't need s or sprime as we're looking at the 'derivatives', so no domains over which to integrate are required"""
return np.exp(-((t-tprime)**2)/(l**2)) #rbf
def k_xf(self,t,tprime,s,l):
"""Covariance between the gradient (latent value) and the actual (observed) value.
Note that sprime isn't actually used in this expression, presumably because the 'primes' are the gradient (latent) values which don't
involve an integration, and thus there is no domain over which they're integrated, just a single value that we want."""
return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf((tprime-s)/l))
def calc_K_xx_wo_variance(self,X):
"""Calculates K_xx without the variance term"""
K_xx = np.ones([X.shape[0],X.shape[0]]) #ones now as a product occurs over each dimension
for i,x in enumerate(X):
for j,x2 in enumerate(X):
for il,l in enumerate(self.lengthscale):
idx = il*2 #each pair of input dimensions describe the limits on one actual dimension in the data
K_xx[i,j] *= self.k_xx(x[idx],x2[idx],x[idx+1],x2[idx+1],l)
return K_xx
def K(self, X, X2=None):
if X2 is None: #X vs X
K_xx = self.calc_K_xx_wo_variance(X)
return K_xx * self.variances[0]
else: #X vs X2
K_xf = np.ones([X.shape[0],X2.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X2):
for il,l in enumerate(self.lengthscale):
idx = il*2
K_xf[i,j] *= self.k_xf(x[idx],x2[idx],x[idx+1],l)
return K_xf * self.variances[0]
def Kdiag(self, X):
"""I've used the fact that we call this method for K_ff when finding the covariance as a hack so
I know if I should return K_ff or K_xx. In this case we're returning K_ff!!
$K_{ff}^{post} = K_{ff} - K_{fx} K_{xx}^{-1} K_{xf}$"""
K_ff = np.ones(X.shape[0])
for i,x in enumerate(X):
for il,l in enumerate(self.lengthscale):
idx = il*2
K_ff[i] *= self.k_ff(x[idx],x[idx],l)
return K_ff * self.variances[0]

View file

@ -99,9 +99,120 @@ class Prod(CombinationKernel):
def input_sensitivity(self, summarize=True):
if summarize:
i_s = np.zeros((self.input_dim))
i_s = np.ones((self.input_dim))
for k in self.parts:
i_s[k._all_dims_active] *= k.input_sensitivity(summarize)
return i_s
else:
return super(Prod, self).input_sensitivity(summarize)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
part_start_param_index = 0
for p in self.parts:
if not p.is_fixed:
part_param_num = len(p.param_array) # number of parameters in the part
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
part_start_param_index += part_param_num
def sde(self):
"""
"""
F = np.array((0,), ndmin=2)
L = np.array((1,), ndmin=2)
Qc = np.array((1,), ndmin=2)
H = np.array((1,), ndmin=2)
Pinf = np.array((1,), ndmin=2)
P0 = np.array((1,), ndmin=2)
dF = None
dQc = None
dPinf = None
dP0 = None
# Assign models
for p in self.parts:
(Ft,Lt,Qct,Ht,P_inft, P0t, dFt,dQct,dP_inft,dP0t) = p.sde()
# check derivative dimensions ->
number_of_parameters = len(p.param_array)
assert dFt.shape[2] == number_of_parameters, "Dynamic matrix derivative shape is wrong"
assert dQct.shape[2] == number_of_parameters, "Diffusion matrix derivative shape is wrong"
assert dP_inft.shape[2] == number_of_parameters, "Infinite covariance matrix derivative shape is wrong"
# check derivative dimensions <-
# exception for periodic kernel
if (p.name == 'std_periodic'):
Qct = P_inft
dQct = dP_inft
dF = dkron(F,dF,Ft,dFt,'sum')
dQc = dkron(Qc,dQc,Qct,dQct,'prod')
dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
dP0 = dkron(P0,dP0,P0t,dP0t,'prod')
F = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
L = np.kron(L,Lt)
Qc = np.kron(Qc,Qct)
Pinf = np.kron(Pinf,P_inft)
P0 = np.kron(P0,P_inft)
H = np.kron(H,Ht)
return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
def dkron(A,dA,B,dB, operation='prod'):
"""
Function computes the derivative of Kronecker product A*B
(or Kronecker sum A+B).
Input:
-----------------------
A: 2D matrix
Some matrix
dA: 3D (or 2D matrix)
Derivarives of A
B: 2D matrix
Some matrix
dB: 3D (or 2D matrix)
Derivarives of B
operation: str 'prod' or 'sum'
Which operation is considered. If the operation is 'sum' it is assumed
that A and are square matrices.s
Output:
dC: 3D matrix
Derivative of Kronecker product A*B (or Kronecker sum A+B)
"""
if dA is None:
dA_param_num = 0
dA = np.zeros((A.shape[0], A.shape[1],1))
else:
dA_param_num = dA.shape[2]
if dB is None:
dB_param_num = 0
dB = np.zeros((B.shape[0], B.shape[1],1))
else:
dB_param_num = dB.shape[2]
# Space allocation for derivative matrix
dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num + dB_param_num))
for k in range(dA_param_num):
if operation == 'prod':
dC[:,:,k] = np.kron(dA[:,:,k],B);
else:
dC[:,:,k] = np.kron(dA[:,:,k],np.eye( B.shape[0] ))
for k in range(dB_param_num):
if operation == 'prod':
dC[:,:,dA_param_num+k] = np.kron(A,dB[:,:,k])
else:
dC[:,:,dA_param_num+k] = np.kron(np.eye( A.shape[0] ),dB[:,:,k])
return dC

View file

@ -39,6 +39,8 @@ class RBF(Stationary):
def dK2_drdr(self, r):
return (r**2-1)*self.K_of_r(r)
def dK2_drdr_diag(self):
return -self.variance # as the diagonal of r is always filled with zeros
def __getstate__(self):
dc = super(RBF, self).__getstate__()
if self.useGPU:

View file

@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Brownian motion covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .brownian import Brownian
import numpy as np
class sde_Brownian(Brownian):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sigma^2 min(x,y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((1.0,),(0,)) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,0),) )
Pinf = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
#P0 = Pinf.copy()
P0 = np.zeros((2,2))
#Pinf = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.ones( (1,1,1) )
dPinf = np.zeros((2,2,1))
dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
#dP0 = dPinf.copy()
dP0 = np.zeros((2,2,1))
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Linear covariance function with the
Stochastic Differential Equation (SDE) functionality.
"""
from .linear import Linear
import numpy as np
class sde_Linear(Linear):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Linear kernel:
.. math::
k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
"""
def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
"""
Modify the init method, because one extra parameter is required. X - points
on the X axis.
"""
super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
self.t0 = np.min(X)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variances.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
t0 = float(self.t0)
F = np.array( ((0,1.0),(0,0) ))
L = np.array( ((0,),(1.0,)) )
Qc = np.zeros((1,1))
H = np.array( ((1.0,0),) )
Pinf = np.zeros((2,2))
P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance
dF = np.zeros((2,2,1))
dQc = np.zeros( (1,1,1) )
dPinf = np.zeros((2,2,1))
dP0 = np.zeros((2,2,1))
dP0[:,:,0] = P0 / variance
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

137
GPy/kern/src/sde_matern.py Normal file
View file

@ -0,0 +1,137 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .stationary import Matern32
from .stationary import Matern52
import numpy as np
class sde_Matern32(Matern32):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 3/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array(((0, 1.0), (-foo**2, -2*foo)))
L = np.array(( (0,), (1.0,) ))
Qc = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),))
H = np.array(((1.0, 0),))
Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2))))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros((2,2))
dFlengthscale = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2)))
dQcvariance = np.array((12.*np.sqrt(3)/lengthscale**3))
dQclengthscale = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance))
dPinfvariance = np.array(((1,0),(0,3./lengthscale**2)))
dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Matern52(Matern52):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
lamda = np.sqrt(5.0)/lengthscale
kappa = 5.0/3.0*variance/lengthscale**2
F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
L = np.array(((0,),(0,),(1,)))
Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
H = np.array(((1,0,0),))
Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
P0 = Pinf.copy()
# Allocate space for the derivatives
dF = np.empty((3,3,2))
dQc = np.empty((1,1,2))
dPinf = np.empty((3,3,2))
# The partial derivatives
dFvariance = np.zeros((3,3))
dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4,
30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))
dPinf_variance = Pinf/variance
kappa2 = -2.0*kappa/lengthscale
dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2,
0,-100*variance/lengthscale**5)))
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,180 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Matern covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .standard_periodic import StdPeriodic
import numpy as np
import scipy as sp
from scipy import special as special
class sde_StdPeriodic(StdPeriodic):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Standard Periodic kernel:
.. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.period.gradient = gradients[1]
self.lengthscale.gradient = gradients[2]
def sde(self):
"""
Return the state space representation of the covariance.
! Note: one must constrain lengthscale not to drop below 0.25.
After this bessel functions of the first kind grows to very high.
! Note: one must keep wevelength also not very low. Because then
the gradients wrt wavelength become ustable.
However this might depend on the data. For test example with
300 data points the low limit is 0.15.
"""
# Params to use: (in that order)
#self.variance
#self.period
#self.lengthscale
N = 7 # approximation order
w0 = 2*np.pi/self.period # frequency
lengthscale = 2*self.lengthscale
[q2,dq2l] = seriescoeff(N,lengthscale,self.variance)
# lengthscale is multiplied by 2 because of slightly different
# formula for periodic covariance function.
# For the same reason:
dq2l = 2*dq2l
if np.any( np.isfinite(q2) == False):
raise ValueError("SDE periodic covariance error 1")
if np.any( np.isfinite(dq2l) == False):
raise ValueError("SDE periodic covariance error 2")
F = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
L = np.eye(2*(N+1))
Qc = np.zeros((2*(N+1), 2*(N+1)))
P_inf = np.kron(np.diag(q2),np.eye(2))
H = np.kron(np.ones((1,N+1)),np.array((1,0)) )
P0 = P_inf.copy()
# Derivatives
dF = np.empty((F.shape[0], F.shape[1], 3))
dQc = np.empty((Qc.shape[0], Qc.shape[1], 3))
dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3))
# Derivatives wrt self.variance
dF[:,:,0] = np.zeros(F.shape)
dQc[:,:,0] = np.zeros(Qc.shape)
dP_inf[:,:,0] = P_inf / self.variance
# Derivatives self.period
dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0, w0), (-w0, 0)) ) / self.period );
dQc[:,:,1] = np.zeros(Qc.shape)
dP_inf[:,:,1] = np.zeros(P_inf.shape)
# Derivatives self.lengthscales
dF[:,:,2] = np.zeros(F.shape)
dQc[:,:,2] = np.zeros(Qc.shape)
dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
dP0 = dP_inf.copy()
return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
"""
Calculate the coefficients q_j^2 for the covariance function
approximation:
k(\tau) = \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
Reference is:
[1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic
covariance functions and state space models. In Proceedings of the
Seventeenth International Conference on Artifcial Intelligence and
Statistics (AISTATS 2014). JMLR: W&CP, volume 33.
Note! Only the infinite approximation (through Bessel function)
is currently implemented.
Input:
----------------
m: int
Degree of approximation. Default 6.
lengthScale: float
Length scale parameter in the kerenl
magnSigma2:float
Multiplier in front of the kernel.
Output:
-----------------
coeffs: array(m+1)
Covariance series coefficients
coeffs_dl: array(m+1)
Derivatives of the coefficients with respect to lengthscale.
"""
if true_covariance:
bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\
sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\
np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64)
M,J = np.meshgrid(range(0,m+1),range(0,m+1))
coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\
(lengthScale**(-2))**J *magnSigma2
coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0)
coeffs = np.sum(coeffs,0)
else:
coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
if np.any( np.isfinite(coeffs) == False):
raise ValueError("sde_standard_periodic: Coefficients are not finite!")
#import pdb; pdb.set_trace()
coeffs[0] = 0.5*coeffs[0]
# Derivatives wrt (lengthScale)
coeffs_dl = np.zeros(m+1)
coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
(-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) )
# The first element
coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\
(2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )
return coeffs, coeffs_dl

103
GPy/kern/src/sde_static.py Normal file
View file

@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance Static covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .static import White
from .static import Bias
import numpy as np
class sde_White(White):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
White kernel:
.. math::
k(x,y) = \alpha*\delta(x-y)
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((-np.inf,),) )
L = np.array( ((1.0,),) )
Qc = np.array( ((variance,),) )
H = np.array( ((1.0,),) )
Pinf = np.array( ((variance,),) )
P0 = Pinf.copy()
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dQc[:,:,0] = np.array( ((1.0,),) )
dPinf = np.zeros((1,1,1))
dPinf[:,:,0] = np.array( ((1.0,),) )
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Bias(Bias):
"""
Class provide extra functionality to transfer this covariance function into
SDE forrm.
Bias kernel:
.. math::
k(x,y) = \alpha
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
F = np.array( ((0.0,),))
L = np.array( ((1.0,),))
Qc = np.zeros((1,1))
H = np.array( ((1.0,),))
Pinf = np.zeros((1,1))
P0 = np.array( ((variance,),) )
dF = np.zeros((1,1,1))
dQc = np.zeros((1,1,1))
dPinf = np.zeros((1,1,1))
dP0 = np.zeros((1,1,1))
dP0[:,:,0] = np.array( ((1.0,),) )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

View file

@ -0,0 +1,192 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy, Arno Solin
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Classes in this module enhance several stationary covariance functions with the
Stochastic Differential Equation (SDE) functionality.
"""
from .rbf import RBF
from .stationary import Exponential
from .stationary import RatQuad
import numpy as np
import scipy as sp
class sde_RBF(RBF):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Radial Basis Function kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
N = 10# approximation order ( number of terms in exponent series expansion)
roots_rounding_decimals = 6
fn = np.math.factorial(N)
kappa = 1.0/2.0/self.lengthscale**2
Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
pp = sp.poly1d(pp)
roots = sp.roots(pp)
neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
aa = sp.poly1d(neg_real_part_roots, r=True).coeffs
F = np.diag(np.ones((N-1,)),1)
F[-1,:] = -aa[-1:0:-1]
L= np.zeros((N,1))
L[N-1,0] = 1
H = np.zeros((1,N))
H[0,0] = 1
# Infinite covariance:
Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
Pinf = 0.5*(Pinf + Pinf.T)
# Allocating space for derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# Derivatives:
dFvariance = np.zeros(F.shape)
dFlengthscale = np.zeros(F.shape)
dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
dQcvariance = Qc/self.variance
dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
dPinf_variance = Pinf/self.variance
lp = Pinf.shape[0]
coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
coeff[np.mod(coeff,2) != 0] = 0
dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinf_variance
dPinf[:,:,1] = dPinf_lengthscale
P0 = Pinf.copy()
dP0 = dPinf.copy()
# Benefits of this are not very sound. Helps only in one case:
# SVD Kalman + RBF kernel
import GPy.models.state_space_main as ssm
(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_Exponential(Exponential):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Exponential kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
kernel
"""
self.variance.gradient = gradients[0]
self.lengthscale.gradient = gradients[1]
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale)
F = np.array(((-1.0/lengthscale,),))
L = np.array(((1.0,),))
Qc = np.array( ((2.0*variance/lengthscale,),) )
H = np.array(((1.0,),))
Pinf = np.array(((variance,),))
P0 = Pinf.copy()
dF = np.zeros((1,1,2));
dQc = np.zeros((1,1,2));
dPinf = np.zeros((1,1,2));
dF[:,:,0] = 0.0
dF[:,:,1] = 1.0/lengthscale**2
dQc[:,:,0] = 2.0/lengthscale
dQc[:,:,1] = -2.0*variance/lengthscale**2
dPinf[:,:,0] = 1.0
dPinf[:,:,1] = 0.0
dP0 = dPinf.copy()
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
class sde_RatQuad(RatQuad):
"""
Class provide extra functionality to transfer this covariance function into
SDE form.
Rational Quadratic kernel:
.. math::
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def sde(self):
"""
Return the state space representation of the covariance.
"""
assert False, 'Not Implemented'
# Params to use:
# self.lengthscale
# self.variance
#self.power
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)

View file

@ -6,6 +6,7 @@ from .kern import Kern
import numpy as np
from ...core.parameterization import Param
from paramz.transformations import Logexp
from paramz.caching import Cache_this
class Static(Kern):
def __init__(self, input_dim, variance, active_dims, name):
@ -24,12 +25,13 @@ class Static(Kern):
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def gradients_XX(self, dL_dK, X, X2):
def gradients_XX(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
return np.zeros((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
def gradients_XX_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
return np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
def gradients_XX_diag(self, dL_dKdiag, X, cov=False):
return np.zeros((X.shape[0], X.shape[1], X.shape[1]), dtype=np.float64)
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(Z.shape)
@ -133,9 +135,7 @@ class Bias(Static):
def K(self, X, X2=None):
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
ret = np.empty(shape, dtype=np.float64)
ret[:] = self.variance
return ret
return np.full(shape, self.variance, dtype=np.float64)
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = dL_dK.sum()
@ -144,9 +144,7 @@ class Bias(Static):
self.variance.gradient = dL_dKdiag.sum()
def psi2(self, Z, variational_posterior):
ret = np.empty((Z.shape[0], Z.shape[0]), dtype=np.float64)
ret[:] = self.variance*self.variance*variational_posterior.shape[0]
return ret
return np.full((Z.shape[0], Z.shape[0]), self.variance*self.variance*variational_posterior.shape[0], dtype=np.float64)
def psi2n(self, Z, variational_posterior):
ret = np.empty((variational_posterior.mean.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
@ -172,16 +170,22 @@ class Fixed(Static):
super(Fixed, self).__init__(input_dim, variance, active_dims, name)
self.fixed_K = covariance_matrix
def K(self, X, X2):
return self.variance * self.fixed_K
if X2 is None:
return self.variance * self.fixed_K
else:
return np.zeros((X.shape[0], X2.shape[0]))
def Kdiag(self, X):
return self.variance * self.fixed_K.diagonal()
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
if X2 is None:
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
else:
self.variance.gradient = 0
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K)
self.variance.gradient = np.einsum('i,i', dL_dKdiag, np.diagonal(self.fixed_K))
def psi2(self, Z, variational_posterior):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
@ -192,3 +196,58 @@ class Fixed(Static):
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum()
class Precomputed(Fixed):
def __init__(self, input_dim, covariance_matrix, variance=1., active_dims=None, name='precomputed'):
"""
Class for precomputed kernels, indexed by columns in X
Usage example:
import numpy as np
from GPy.models import GPClassification
from GPy.kern import Precomputed
from sklearn.cross_validation import LeaveOneOut
n = 10
d = 100
X = np.arange(n).reshape((n,1)) # column vector of indices
y = 2*np.random.binomial(1,0.5,(n,1))-1
X0 = np.random.randn(n,d)
k = np.dot(X0,X0.T)
kern = Precomputed(1,k) # k is a n x n covariance matrix
cv = LeaveOneOut(n)
ypred = y.copy()
for train, test in cv:
m = GPClassification(X[train], y[train], kernel=kern)
m.optimize()
ypred[test] = 2*(m.predict(X[test])[0]>0.5)-1
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
assert input_dim==1, "Precomputed only implemented in one dimension. Use multiple Precomputed kernels to have more dimensions by making use of active_dims"
super(Precomputed, self).__init__(input_dim, covariance_matrix, variance, active_dims, name)
@Cache_this(limit=2)
def _index(self, X, X2):
if X2 is None:
i1 = i2 = X.astype('int').flat
else:
i1, i2 = X.astype('int').flat, X2.astype('int').flat
return self.fixed_K[i1,:][:,i2]
def K(self, X, X2=None):
return self.variance * self._index(X, X2)
def Kdiag(self, X):
return self.variance * self._index(X,None).diagonal()
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.einsum('ij,ij', dL_dK, self._index(X, X2))
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self._index(X, None))

View file

@ -51,6 +51,10 @@ class Stationary(Kern):
The lengthscale(s) and variance parameters are added to the structure automatically.
Thanks to @strongh:
In Stationary, a covariance function is defined in GPy as stationary when it depends only on the l2-norm |x_1 - x_2 |.
However this is the typical definition of isotropy, while stationarity is usually a bit more relaxed.
The more common version of stationarity is that the covariance is a function of x_1 - x_2 (See e.g. R&W first paragraph of section 4.1).
"""
def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=False):
@ -85,6 +89,11 @@ class Stationary(Kern):
def dK2_drdr(self, r):
raise NotImplementedError("implement second derivative of covariance wrt r to use this method")
@Cache_this(limit=3, ignore_args=())
def dK2_drdr_diag(self):
"Second order derivative of K in r_{i,i}. The diagonal entries are always zero, so we do not give it here."
raise NotImplementedError("implement second derivative of covariance wrt r_diag to use this method")
@Cache_this(limit=3, ignore_args=())
def K(self, X, X2=None):
"""
@ -222,54 +231,57 @@ class Stationary(Kern):
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
returns the full covariance matrix [QxQ] of the input dimensionfor each pair or vectors, thus
the returned array is of shape [NxNxQxQ].
..math:
\frac{\partial^2 K}{\partial X\partial X2}
\frac{\partial^2 K}{\partial X2 ^2} = - \frac{\partial^2 K}{\partial X\partial X2}
..returns:
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
Thus, we return the second derivative in X2.
dL2_dXdX2: [NxMxQxQ] in the cov=True case, or [NxMxQ] in the cov=False case,
for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
Thus, we return the second derivative in X2.
"""
# The off diagonals in Q are always zero, this should also be true for the Linear kernel...
# According to multivariable chain rule, we can chain the second derivative through r:
# d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2:
invdist = self._inv_dist(X, X2)
invdist2 = invdist**2
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
dL_dr = self.dK_dr_via_X(X, X2) #* dL_dK # we perform this product later
tmp1 = dL_dr * invdist
dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK
tmp2 = dL_drdr * invdist2
l2 = np.ones(X.shape[1]) * self.lengthscale**2
dL_drdr = self.dK2_drdr_via_X(X, X2) #* dL_dK # we perofrm this product later
tmp2 = dL_drdr*invdist2
l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2)
if X2 is None:
X2 = X
tmp1 -= np.eye(X.shape[0])*self.variance
else:
tmp1[X==X2.T] -= self.variance
tmp1[invdist2==0.] -= self.variance
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
#grad = np.empty(X.shape, dtype=np.float64)
for q in range(self.input_dim):
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
#grad = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
dist = X[:,None,:] - X2[None,:,:]
dist = (dist[:,:,:,None]*dist[:,:,None,:])
I = np.ones((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]))*np.eye((X2.shape[1]))
grad = (((dL_dK*(tmp1*invdist2 - tmp2))[:,:,None,None] * dist)/l2[None,None,:,None]
- (dL_dK*tmp1)[:,:,None,None] * I)/l2[None,None,None,:]
return grad
def gradients_XX_diag(self, dL_dK, X):
def gradients_XX_diag(self, dL_dK_diag, X):
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
Given the derivative of the objective dL_dK, compute the second derivative of K wrt X:
..math:
\frac{\partial^2 K}{\partial X\partial X2}
\frac{\partial^2 K}{\partial X\partial X}
..returns:
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ]
dL2_dXdX: [NxQxQ]
"""
return np.ones(X.shape) * self.variance/self.lengthscale**2
dL_dK_diag = dL_dK_diag.copy().reshape(-1, 1, 1)
assert (dL_dK_diag.size == X.shape[0]) or (dL_dK_diag.size == 1), "dL_dK_diag has to be given as row [N] or column vector [Nx1]"
l4 = np.ones(X.shape[1])*self.lengthscale**2
return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
def _gradients_X_pure(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)
@ -315,11 +327,23 @@ class Exponential(Stationary):
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r)
return self.variance * np.exp(-r)
def dK_dr(self, r):
return -0.5*self.K_of_r(r)
return -self.K_of_r(r)
# def sde(self):
# """
# Return the state space representation of the covariance.
# """
# F = np.array([[-1/self.lengthscale]])
# L = np.array([[1]])
# Qc = np.array([[2*self.variance/self.lengthscale]])
# H = np.array([[1]])
# Pinf = np.array([[self.variance]])
# # TODO: return the derivatives as well
#
# return (F, L, Qc, H, Pinf)
@ -388,6 +412,41 @@ class Matern32(Stationary):
F1lower = np.array([f(lower) for f in F1])[:, None]
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))
def sde(self):
"""
Return the state space representation of the covariance.
"""
variance = float(self.variance.values)
lengthscale = float(self.lengthscale.values)
foo = np.sqrt(3.)/lengthscale
F = np.array([[0, 1], [-foo**2, -2*foo]])
L = np.array([[0], [1]])
Qc = np.array([[12.*np.sqrt(3) / lengthscale**3 * variance]])
H = np.array([[1, 0]])
Pinf = np.array([[variance, 0],
[0, 3.*variance/(lengthscale**2)]])
# Allocate space for the derivatives
dF = np.empty([F.shape[0],F.shape[1],2])
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
# The partial derivatives
dFvariance = np.zeros([2,2])
dFlengthscale = np.array([[0,0],
[6./lengthscale**3,2*np.sqrt(3)/lengthscale**2]])
dQcvariance = np.array([12.*np.sqrt(3)/lengthscale**3])
dQclengthscale = np.array([-3*12*np.sqrt(3)/lengthscale**4*variance])
dPinfvariance = np.array([[1,0],[0,3./lengthscale**2]])
dPinflengthscale = np.array([[0,0],
[0,-6*variance/lengthscale**3]])
# Combine the derivatives
dF[:,:,0] = dFvariance
dF[:,:,1] = dFlengthscale
dQc[:,:,0] = dQcvariance
dQc[:,:,1] = dQclengthscale
dPinf[:,:,0] = dPinfvariance
dPinf[:,:,1] = dPinflengthscale
return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
class Matern52(Stationary):
"""

File diff suppressed because it is too large Load diff

View file

@ -27,9 +27,6 @@ class Binomial(Likelihood):
super(Binomial, self).__init__(gp_link, 'Binomial')
def conditional_mean(self, gp, Y_metadata):
return self.gp_link(gp)*Y_metadata['trials']
def pdf_link(self, inv_link_f, y, Y_metadata):
"""
Likelihood function given inverse link of f.
@ -138,7 +135,7 @@ class Binomial(Likelihood):
inv_link_f2 = np.square(inv_link_f)
return 2*y/inv_link_f**3 - 2*(N-y)/(1.-inv_link_f)**3
def samples(self, gp, Y_metadata=None):
def samples(self, gp, Y_metadata=None, **kw):
"""
Returns a set of samples of observations based on a given value of the latent variable.
@ -152,3 +149,32 @@ class Binomial(Likelihood):
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
pass
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Probit):
if gh_points is None:
gh_x, gh_w = self._gh_points()
else:
gh_x, gh_w = gh_points
gh_w = gh_w / np.sqrt(np.pi)
shape = m.shape
C = np.atleast_1d(Y_metadata['trials'])
m,v,Y, C = m.flatten(), v.flatten(), Y.flatten()[:,None], C.flatten()[:,None]
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None]
p = std_norm_cdf(X)
p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability
N = std_norm_pdf(X)
#TODO: missing nchoosek coefficient! use gammaln?
F = (Y*np.log(p) + (C-Y)*np.log(1.-p)).dot(gh_w)
NoverP = N/p
NoverP_ = N/(1.-p)
dF_dm = (Y*NoverP - (C-Y)*NoverP_).dot(gh_w)
dF_dv = -0.5* ( Y*(NoverP**2 + NoverP*X) + (C-Y)*(NoverP_**2 - NoverP_*X) ).dot(gh_w)
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), None
else:
raise NotImplementedError

View file

@ -678,7 +678,7 @@ class Likelihood(Parameterized):
burnin_cache = np.zeros(par_chains)
burnin_cache[:] = starting_loc.flatten()
burning_in = True
for i in xrange(burn_in+num_samples):
for i in range(burn_in+num_samples):
next_ind = i-burn_in
if burning_in:
old_y = burnin_cache

View file

@ -7,4 +7,6 @@ from .mlp import MLP
from .additive import Additive
from .compound import Compound
from .constant import Constant
from .identity import Identity
from .piecewise_linear import PiecewiseLinear

View file

@ -23,9 +23,10 @@ class Additive(Mapping):
assert(mapping1.input_dim==mapping2.input_dim)
assert(mapping1.output_dim==mapping2.output_dim)
input_dim, output_dim = mapping1.input_dim, mapping1.output_dim
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
super(Additive, self).__init__(input_dim=input_dim, output_dim=output_dim)
self.mapping1 = mapping1
self.mapping2 = mapping2
self.link_parameters(self.mapping1, self.mapping2)
def f(self, X):
return self.mapping1.f(X) + self.mapping2.f(X)

View file

@ -33,7 +33,7 @@ class Linear(Mapping):
return np.dot(X, self.A)
def update_gradients(self, dL_dF, X):
self.A.gradient = np.dot( X.T, dL_dF)
self.A.gradient = np.dot(X.T, dL_dF)
def gradients_X(self, dL_dF, X):
return np.dot(dL_dF, self.A.T)

View file

@ -22,3 +22,9 @@ from .gp_var_gauss import GPVariationalGaussianApproximation
from .one_vs_all_classification import OneVsAllClassification
from .one_vs_all_sparse_classification import OneVsAllSparseClassification
from .dpgplvm import DPBayesianGPLVM
from .state_space_model import StateSpace
from .ibp_lfm import IBPLFM
from .gp_offset_regression import GPOffsetRegression

View file

@ -17,7 +17,7 @@ class GPCoregionalizedRegression(GP):
:type X_list: list of numpy arrays
:param Y_list: list of observed values related to the different noise models
:type Y_list: list of numpy arrays
:param kernel: a GPy kernel, defaults to RBF ** Coregionalized
:param kernel: a GPy kernel ** Coregionalized, defaults to RBF ** Coregionalized
:type kernel: None | GPy.kernel defaults
:likelihoods_list: a list of likelihoods, defaults to list of Gaussian likelihoods
:type likelihoods_list: None | a list GPy.likelihoods

View file

@ -30,6 +30,7 @@ class GPKroneckerGaussianRegression(Model):
"""
def __init__(self, X1, X2, Y, kern1, kern2, noise_var=1., name='KGPR'):
Model.__init__(self, name=name)
# accept the construction arguments
self.X1 = ObsAr(X1)
self.X2 = ObsAr(X2)

View file

@ -0,0 +1,95 @@
# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
# Written by Mike Smith. michaeltsmith.org.uk
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
from ..core import Param
class GPOffsetRegression(GP):
"""
Gaussian Process model for offset regression
:param X: input observations, we assume for this class that this has one dimension of actual inputs and the last dimension should be the index of the cluster (so X should be Nx2)
:param Y: observed values (Nx1?)
:param kernel: a GPy kernel, defaults to rbf
:param Norm normalizer: [False]
:param noise_var: the noise variance for Gaussian likelhood, defaults to 1.
Normalize Y with the norm given.
If normalizer is False, no normalization will be done
If it is None, we use GaussianNorm(alization)
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1., mean_function=None):
assert X.shape[1]>1, "Need at least two input dimensions, as last dimension is the label of the cluster"
if kernel is None:
kernel = kern.RBF(X.shape[1]-1)
#self._log_marginal_likelihood = np.nan #todo
likelihood = likelihoods.Gaussian(variance=noise_var)
self.X_fixed = X[:,:-1]
self.selected = np.array([int(x) for x in X[:,-1]])
super(GPOffsetRegression, self).__init__(X, Y, kernel, likelihood, name='GP offset regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function)
maxcluster = np.max(self.selected)
self.offset = Param('offset', np.zeros(maxcluster))
#self.offset.set_prior(...)
self.link_parameter(self.offset)
#def dr_doffset(self, X, sel): #how much r changes wrt the offset hyperparameters
#def dL_doffset(self, X, sel):
# dL_dr = self.dK_dr_via_X(X, X) * dL_dK
def dr_doffset(self,X,sel,delta):
#given an input matrix, X and the offsets (delta)
#finds dr/dDelta
#returns them as a list, one for each offset (delta).
#get the input values
#a matrix G represents the effect of increasing the offset on the radius passed to the kernel for each input. For example
#what effect will increasing offset 4 have on the kernel output of inputs 5 and 8? Answer: Gs[4][5,8]... (positive or negative)
Gs = []
for i,d in enumerate(delta):
#X[sel==(i+1)]-=d
G = np.repeat(np.array(sel==(i+1))[:,None]*1,len(X),axis=1) - np.repeat(np.array(sel==(i+1))[None,:]*1,len(X),axis=0)
Gs.append(G)
#does subtracting the two Xs end up positive or negative (if negative we need to flip the sign in G).
w = np.repeat(X,len(X),axis=1) - np.repeat(X.T,len(X),axis=0)
dr_doffsets = []
for i,d in enumerate(delta):
dr_doffset = np.sign(w * Gs[i])
#print "dr_doffset %d" % i
#print dr_doffset
#print Gs[i]
#print w
dr_doffsets.append(dr_doffset)
#lastly we need to divide by the lengthscale: So far we've found d(X_i - X_j)/dOffsets
#we want dr/dOffsets. (X_i - X_j)/lengthscale = r
dr_doffsets /= self.kern.lengthscale
return dr_doffsets
def parameters_changed(self):
offsets = np.hstack([0.0,self.offset.values])[:,None]
self.X = self.X_fixed - offsets[self.selected]
super(GPOffsetRegression, self).parameters_changed()
dL_dr = self.kern.dK_dr_via_X(self.X, self.X) * self.grad_dict['dL_dK']
dr_doff = self.dr_doffset(self.X,self.selected,self.offset.values)
for i in range(len(dr_doff)):
dL_doff = dL_dr * dr_doff[i]
self.offset.gradient[i] = -np.sum(dL_doff)

View file

@ -28,7 +28,7 @@ class GPVariationalGaussianApproximation(GP):
self.beta = Param('beta', np.ones(num_data))
inf = VarGauss(self.alpha, self.beta)
super(GPVariationalGaussianApproximation, self).__init__(X, Y, kernel, likelihood, name='VarGP', inference_method=inf)
super(GPVariationalGaussianApproximation, self).__init__(X, Y, kernel, likelihood, name='VarGP', inference_method=inf, Y_metadata=Y_metadata)
self.link_parameter(self.alpha)
self.link_parameter(self.beta)

535
GPy/models/ibp_lfm.py Normal file
View file

@ -0,0 +1,535 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.sparse_gp_mpi import SparseGP_MPI
from .. import kern
from ..util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, pdinv
from ..util import diag
from ..core.parameterization import Param
from ..likelihoods import Gaussian
from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
from ..inference.latent_function_inference.posterior import Posterior
from GPy.core.parameterization.variational import VariationalPrior
from ..core.parameterization.parameterized import Parameterized
from paramz.transformations import Logexp, Logistic, __fixed__
log_2_pi = np.log(2*np.pi)
class VarDTC_minibatch_IBPLFM(VarDTC_minibatch):
'''
Modifications of VarDTC_minibatch for IBP LFM
'''
def __init__(self, batchsize=None, limit=3, mpi_comm=None):
super(VarDTC_minibatch_IBPLFM, self).__init__(batchsize, limit, mpi_comm)
def gatherPsiStat(self, kern, X, Z, Y, beta, Zp):
het_noise = beta.size > 1
assert beta.size == 1
trYYT = self.get_trYYT(Y)
if self.Y_speedup and not het_noise:
Y = self.get_YYTfactor(Y)
num_inducing = Z.shape[0]
num_data, output_dim = Y.shape
batchsize = num_data if self.batchsize is None else self.batchsize
psi2_full = np.zeros((num_inducing, num_inducing)) # MxM
psi1Y_full = np.zeros((output_dim, num_inducing)) # DxM
psi0_full = 0.
YRY_full = 0.
for n_start in range(0, num_data, batchsize):
n_end = min(batchsize+n_start, num_data)
if batchsize == num_data:
Y_slice = Y
X_slice = X
else:
Y_slice = Y[n_start:n_end]
X_slice = X[n_start:n_end]
if het_noise:
b = beta[n_start]
YRY_full += np.inner(Y_slice, Y_slice)*b
else:
b = beta
psi0 = kern._Kdiag(X_slice) #Kff^q
psi1 = kern.K(X_slice, Z) #Kfu
indX = X_slice.values
indX = np.int_(np.round(indX[:, -1]))
Zp = Zp.gamma.values
# Extend Zp across columns
indZ = Z.values
indZ = np.int_(np.round(indZ[:, -1])) - Zp.shape[0]
Zpq = Zp[:, indZ]
for d in np.unique(indX):
indd = indX == d
psi1d = psi1[indd, :]
Zpd = Zp[d, :]
Zp2 = Zpd[:, None]*Zpd[None, :] - np.diag(np.power(Zpd, 2)) + np.diag(Zpd)
psi2_full += (np.dot(psi1d.T, psi1d)*Zp2[np.ix_(indZ, indZ)])*b #Zp2*Kufd*Kfud*beta
psi0_full += np.sum(psi0*Zp[indX, :])*b
psi1Y_full += np.dot(Y_slice.T, psi1*Zpq[indX, :])*b
if not het_noise:
YRY_full = trYYT*beta
if self.mpi_comm is not None:
from mpi4py import MPI
psi0_all = np.array(psi0_full)
psi1Y_all = psi1Y_full.copy()
psi2_all = psi2_full.copy()
YRY_all = np.array(YRY_full)
self.mpi_comm.Allreduce([psi0_full, MPI.DOUBLE], [psi0_all, MPI.DOUBLE])
self.mpi_comm.Allreduce([psi1Y_full, MPI.DOUBLE], [psi1Y_all, MPI.DOUBLE])
self.mpi_comm.Allreduce([psi2_full, MPI.DOUBLE], [psi2_all, MPI.DOUBLE])
self.mpi_comm.Allreduce([YRY_full, MPI.DOUBLE], [YRY_all, MPI.DOUBLE])
return psi0_all, psi1Y_all, psi2_all, YRY_all
return psi0_full, psi1Y_full, psi2_full, YRY_full
def inference_likelihood(self, kern, X, Z, likelihood, Y, Zp):
"""
The first phase of inference:
Compute: log-likelihood, dL_dKmm
Cached intermediate results: Kmm, KmmInv,
"""
num_data, output_dim = Y.shape
input_dim = Z.shape[0]
if self.mpi_comm is not None:
from mpi4py import MPI
num_data_all = np.array(num_data,dtype=np.int32)
self.mpi_comm.Allreduce([np.int32(num_data), MPI.INT], [num_data_all, MPI.INT])
num_data = num_data_all
#see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1
if het_noise:
self.batchsize = 1
psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, Zp)
#======================================================================
# Compute Common Components
#======================================================================
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
if not np.isfinite(Kmm).all():
print(Kmm)
Lm = jitchol(Kmm)
LmInv = dtrtri(Lm)
LmInvPsi2LmInvT = np.dot(LmInv, np.dot(psi2_full, LmInv.T))
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
LL = jitchol(Lambda)
LLInv = dtrtri(LL)
logdet_L = 2.*np.sum(np.log(np.diag(LL)))
LmLLInv = np.dot(LLInv, LmInv)
b = np.dot(psi1Y_full, LmLLInv.T)
bbt = np.sum(np.square(b))
v = np.dot(b, LmLLInv).T
LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
tmp = -np.dot(np.dot(LLInv.T, LLinvPsi1TYYTPsi1LLinvT + output_dim*np.eye(input_dim)), LLInv)
dL_dpsi2R = .5*np.dot(np.dot(LmInv.T, tmp + output_dim*np.eye(input_dim)), LmInv)
# Cache intermediate results
self.midRes['dL_dpsi2R'] = dL_dpsi2R
self.midRes['v'] = v
#======================================================================
# Compute log-likelihood
#======================================================================
if het_noise:
logL_R = -np.sum(np.log(beta))
else:
logL_R = -num_data*np.log(beta)
logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)*.5 - output_dim*logdet_L*.5
#======================================================================
# Compute dL_dKmm
#======================================================================
dL_dKmm = dL_dpsi2R - .5*output_dim*np.dot(np.dot(LmInv.T, LmInvPsi2LmInvT), LmInv)
#======================================================================
# Compute the Posterior distribution of inducing points p(u|Y)
#======================================================================
if not self.Y_speedup or het_noise:
wd_inv = backsub_both_sides(Lm, np.eye(input_dim)- backsub_both_sides(LL, np.identity(input_dim), transpose='left'), transpose='left')
post = Posterior(woodbury_inv=wd_inv, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
else:
post = None
#======================================================================
# Compute dL_dthetaL for uncertian input and non-heter noise
#======================================================================
if not het_noise:
dL_dthetaL = .5*(YRY_full*beta + beta*output_dim*psi0_full - num_data*output_dim*beta) - beta*(dL_dpsi2R*psi2_full).sum() - beta*(v.T*psi1Y_full).sum()
self.midRes['dL_dthetaL'] = dL_dthetaL
return logL, dL_dKmm, post
def inference_minibatch(self, kern, X, Z, likelihood, Y, Zp):
"""
The second phase of inference: Computing the derivatives over a minibatch of Y
Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
return a flag showing whether it reached the end of Y (isEnd)
"""
num_data, output_dim = Y.shape
#see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
#self.YYTfactor = beta*self.get_YYTfactor(Y)
if self.Y_speedup and not het_noise:
YYT_factor = self.get_YYTfactor(Y)
else:
YYT_factor = Y
n_start = self.batch_pos
batchsize = num_data if self.batchsize is None else self.batchsize
n_end = min(batchsize+n_start, num_data)
if n_end == num_data:
isEnd = True
self.batch_pos = 0
else:
isEnd = False
self.batch_pos = n_end
if batchsize == num_data:
Y_slice = YYT_factor
X_slice = X
else:
Y_slice = YYT_factor[n_start:n_end]
X_slice = X[n_start:n_end]
psi0 = kern._Kdiag(X_slice) #Kffdiag
psi1 = kern.K(X_slice, Z) #Kfu
betapsi1 = np.einsum('n,nm->nm', beta, psi1)
X_slice = X_slice.values
Z = Z.values
Zp = Zp.gamma.values
indX = np.int_(X_slice[:, -1])
indZ = np.int_(Z[:, -1]) - Zp.shape[0]
betaY = beta*Y_slice
#======================================================================
# Load Intermediate Results
#======================================================================
dL_dpsi2R = self.midRes['dL_dpsi2R']
v = self.midRes['v']
#======================================================================
# Compute dL_dpsi
#======================================================================
dL_dpsi0 = -.5*output_dim*(beta * Zp[indX, :]) #XxQ #TODO: Check this gradient
dL_dpsi1 = np.dot(betaY, v.T)
dL_dEZp = psi1*dL_dpsi1
dL_dpsi1 = Zp[np.ix_(indX, indZ)]*dL_dpsi1
dL_dgamma = np.zeros(Zp.shape)
for d in np.unique(indX):
indd = indX == d
betapsi1d = betapsi1[indd, :]
psi1d = psi1[indd, :]
Zpd = Zp[d, :]
Zp2 = Zpd[:, None]*Zpd[None, :] - np.diag(np.power(Zpd, 2)) + np.diag(Zpd)
dL_dpsi1[indd, :] += np.dot(betapsi1d, Zp2[np.ix_(indZ, indZ)] * dL_dpsi2R)*2.
dL_EZp2 = dL_dpsi2R * (np.dot(psi1d.T, psi1d) * beta)*2. # Zpd*Kufd*Kfud*beta
#Gradient of Likelihood wrt gamma is calculated here
EZ = Zp[d, indZ]
for q in range(Zp.shape[1]):
EZt = EZ.copy()
indq = indZ == q
EZt[indq] = .5
dL_dgamma[d, q] = np.sum(dL_dEZp[np.ix_(indd, indq)]) + np.sum(dL_EZp2[:, indq]*EZt[:, None]) -\
.5*beta*(np.sum(psi0[indd, q]))
#======================================================================
# Compute dL_dthetaL
#======================================================================
if isEnd:
dL_dthetaL = self.midRes['dL_dthetaL']
else:
dL_dthetaL = 0.
grad_dict = {'dL_dKdiag': dL_dpsi0,
'dL_dKnm': dL_dpsi1,
'dL_dthetaL': dL_dthetaL,
'dL_dgamma': dL_dgamma}
return isEnd, (n_start, n_end), grad_dict
def update_gradients(model, mpi_comm=None):
if mpi_comm is None:
Y = model.Y
X = model.X
else:
Y = model.Y_local
X = model.X[model.N_range[0]:model.N_range[1]]
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y, model.Zp)
het_noise = model.likelihood.variance.size > 1
if het_noise:
dL_dthetaL = np.empty((model.Y.shape[0],))
else:
dL_dthetaL = np.float64(0.)
kern_grad = model.kern.gradient.copy()
kern_grad[:] = 0.
model.Z.gradient = 0.
gamma_gradient = model.Zp.gamma.copy()
gamma_gradient[:] = 0.
isEnd = False
while not isEnd:
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, X, model.Z, model.likelihood, Y, model.Zp)
if (n_range[1]-n_range[0]) == X.shape[0]:
X_slice = X
elif mpi_comm is None:
X_slice = model.X[n_range[0]:n_range[1]]
else:
X_slice = model.X[model.N_range[0]+n_range[0]:model.N_range[0]+n_range[1]]
#gradients w.r.t. kernel
model.kern.update_gradients_diag(grad_dict['dL_dKdiag'], X_slice)
kern_grad += model.kern.gradient
model.kern.update_gradients_full(grad_dict['dL_dKnm'], X_slice, model.Z)
kern_grad += model.kern.gradient
#gradients w.r.t. Z
model.Z.gradient += model.kern.gradients_X(grad_dict['dL_dKnm'].T, model.Z, X_slice)
#gradients w.r.t. posterior parameters of Zp
gamma_gradient += grad_dict['dL_dgamma']
if het_noise:
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
else:
dL_dthetaL += grad_dict['dL_dthetaL']
# Gather the gradients from multiple MPI nodes
if mpi_comm is not None:
from mpi4py import MPI
if het_noise:
raise "het_noise not implemented!"
kern_grad_all = kern_grad.copy()
Z_grad_all = model.Z.gradient.copy()
gamma_grad_all = gamma_gradient.copy()
mpi_comm.Allreduce([kern_grad, MPI.DOUBLE], [kern_grad_all, MPI.DOUBLE])
mpi_comm.Allreduce([model.Z.gradient, MPI.DOUBLE], [Z_grad_all, MPI.DOUBLE])
mpi_comm.Allreduce([gamma_gradient, MPI.DOUBLE], [gamma_grad_all, MPI.DOUBLE])
kern_grad = kern_grad_all
model.Z.gradient = Z_grad_all
gamma_gradient = gamma_grad_all
#gradients w.r.t. kernel
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
model.kern.gradient += kern_grad
#gradients w.r.t. Z
model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z)
#gradient w.r.t. gamma
model.Zp.gamma.gradient = gamma_gradient
# Update Log-likelihood
KL_div = model.variational_prior.KL_divergence(model.Zp)
# update for the KL divergence
model.variational_prior.update_gradients_KL(model.Zp)
model._log_marginal_likelihood += KL_div
# dL_dthetaL
model.likelihood.update_gradients(dL_dthetaL)
class IBPPosterior(Parameterized):
'''
The IBP distribution for variational approximations.
'''
def __init__(self, binary_prob, tau=None, name='Sensitivity space', *a, **kw):
"""
binary_prob : the probability of including a latent function over an output.
"""
super(IBPPosterior, self).__init__(name=name, *a, **kw)
self.gamma = Param("binary_prob", binary_prob, Logistic(1e-10, 1. - 1e-10))
self.link_parameter(self.gamma)
if tau is not None:
assert tau.size == 2*self.gamma_.shape[1]
self.tau = Param("tau", tau, Logexp())
else:
self.tau = Param("tau", np.ones((2, self.gamma.shape[1])), Logexp())
self.link_parameter(self.tau)
def set_gradients(self, grad):
self.gamma.gradient, self.tau.gradient = grad
def __getitem__(self, s):
pass
# if isinstance(s, (int, slice, tuple, list, np.ndarray)):
# import copy
# n = self.__new__(self.__class__, self.name)
# dc = self.__dict__.copy()
# dc['binary_prob'] = self.binary_prob[s]
# dc['tau'] = self.tau
# dc['parameters'] = copy.copy(self.parameters)
# n.__dict__.update(dc)
# n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
# n.parameters[dc['tau']._parent_index_] = dc['tau']
# n._gradient_array_ = None
# oversize = self.size - self.gamma.size - self.tau.size
# n.size = n.gamma.size + n.tau.size + oversize
# return n
# else:
# return super(IBPPosterior, self).__getitem__(s)
class IBPPrior(VariationalPrior):
def __init__(self, rank, alpha=2., name='IBPPrior', **kw):
super(IBPPrior, self).__init__(name=name, **kw)
from paramz.transformations import __fixed__
self.rank = rank
self.alpha = Param('alpha', alpha, __fixed__)
self.link_parameter(self.alpha)
def KL_divergence(self, variational_posterior):
from scipy.special import gamma, psi
eta, tau = variational_posterior.gamma.values, variational_posterior.tau.values
sum_eta = np.sum(eta, axis=0) #sum_d gamma(d,q)
D_seta = eta.shape[0] - sum_eta
ad = self.alpha/eta.shape[1]
psitau1 = psi(tau[0, :])
psitau2 = psi(tau[1, :])
sumtau = np.sum(tau, axis=0)
psitau = psi(sumtau)
# E[log p(z)]
part1 = np.sum(sum_eta*psitau1 + D_seta*psitau2 - eta.shape[0]*psitau)
# E[log p(pi)]
part1 += (ad - 1.)*np.sum(psitau1 - psitau) + eta.shape[1]*np.log(ad)
#H(z)
part2 = np.sum(-(1.-eta)*np.log(1.-eta) - eta*np.log(eta))
#H(pi)
part2 += np.sum(np.log(gamma(tau[0, :])*gamma(tau[1, :])/gamma(sumtau))-(tau[0, :]-1.)*psitau1-(tau[1, :]-1.)*psitau2\
+ (sumtau-2.)*psitau)
return part1+part2
def update_gradients_KL(self, variational_posterior):
eta, tau = variational_posterior.gamma.values, variational_posterior.tau.values
from scipy.special import psi, polygamma
dgamma = np.log(1. - eta) - np.log(eta) + psi(tau[0, :]) - psi(tau[1, :])
variational_posterior.gamma.gradient += dgamma
ad = self.alpha/self.rank
sumeta = np.sum(eta, axis=0)
sumtau = np.sum(tau, axis=0)
common = (-eta.shape[0] - (ad - 1.) + (sumtau - 2.))*polygamma(1, sumtau)
variational_posterior.tau.gradient[0, :] = (sumeta + ad - tau[0, :])*polygamma(1, tau[0, :]) + common
variational_posterior.tau.gradient[1, :] = ((eta.shape[0] - sumeta) - (tau[1, :] - 1.))*polygamma(1, tau[1, :])\
+ common
class IBPLFM(SparseGP_MPI):
"""
Indian Buffet Process for Latent Force Models
:param Y: observed data (np.ndarray) or GPy.likelihood
:type Y: np.ndarray| GPy.likelihood instance
:param X: input data (np.ndarray) [X:values, X:index], index refers to the number of the output
:type X: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
: param rank: number of latent functions
"""
def __init__(self, X, Y, input_dim=2, output_dim=1, rank=1, Gamma=None, num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='IBP for LFM', alpha=2., beta=2., connM=None, tau=None, mpi_comm=None, normalizer=False, variational_prior=None,**kwargs):
if kernel is None:
kernel = kern.EQ_ODE2(input_dim, output_dim, rank)
if Gamma is None:
gamma = np.empty((output_dim, rank)) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.1 * np.random.randn(output_dim, rank)
gamma[gamma>1.-1e-9] = 1.-1e-9
gamma[gamma<1e-9] = 1e-9
else:
gamma = Gamma.copy()
#TODO: create a vector of inducing points
if Z is None:
Z = np.random.permutation(X.copy())[:num_inducing]
assert Z.shape[1] == X.shape[1]
if likelihood is None:
likelihood = Gaussian()
if inference_method is None:
inference_method = VarDTC_minibatch_IBPLFM(mpi_comm=mpi_comm)
#Definition of variational terms
self.variational_prior = IBPPrior(rank=rank, alpha=alpha) if variational_prior is None else variational_prior
self.Zp = IBPPosterior(gamma, tau=tau)
super(IBPLFM, self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
self.link_parameter(self.Zp, index=0)
def set_Zp_gradients(self, Zp, Zp_grad):
"""Set the gradients of the posterior distribution of Zp in its specific form."""
Zp.gamma.gradient = Zp_grad
def get_Zp_gradients(self, Zp):
"""Get the gradients of the posterior distribution of Zp in its specific form."""
return Zp.gamma.gradient
def _propogate_Zp_val(self):
pass
def parameters_changed(self):
#super(IBPLFM,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch_IBPLFM):
update_gradients(self, mpi_comm=self.mpi_comm)
return
# Add the KL divergence term
self._log_marginal_likelihood += self.variational_prior.KL_divergence(self.Zp)
#TODO Change the following according to this variational distribution
#self.Zp.gamma.gradient = self.
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.Zp)

View file

@ -127,8 +127,6 @@ class MRD(BayesianGPLVMMiniBatch):
self.unlink_parameter(self.likelihood)
self.unlink_parameter(self.kern)
del self.kern
del self.likelihood
self.num_data = Ylist[0].shape[0]
if isinstance(batchsize, int):
@ -156,7 +154,11 @@ class MRD(BayesianGPLVMMiniBatch):
self.link_parameter(spgp, i+2)
self.bgplvms.append(spgp)
self.posterior = None
b = self.bgplvms[0]
self.posterior = b.posterior
self.kern = b.kern
self.likelihood = b.likelihood
self.logger.info("init done")
def parameters_changed(self):

View file

@ -19,7 +19,7 @@ class SparseGPCoregionalizedRegression(SparseGP):
:type Y_list: list of numpy arrays
:param Z_list: list of inducing inputs (optional)
:type Z_list: empty list | list of numpy arrays
:param kernel: a GPy kernel, defaults to RBF ** Coregionalized
:param kernel: a GPy kernel ** Coregionalized, defaults to RBF ** Coregionalized
:type kernel: None | GPy.kernel defaults
:likelihoods_list: a list of likelihoods, defaults to list of Gaussian likelihoods
:type likelihoods_list: None | a list GPy.likelihoods

View file

@ -30,7 +30,7 @@ class SparseGPRegression(SparseGP_MPI):
"""
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None, mpi_comm=None):
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None, mpi_comm=None, name='sparse_gp'):
num_data, input_dim = X.shape
# kern defaults to rbf (plus white for stability)
@ -55,7 +55,7 @@ class SparseGPRegression(SparseGP_MPI):
else:
infr = VarDTC()
SparseGP_MPI.__init__(self, X, Y, Z, kernel, likelihood, inference_method=infr, normalizer=normalizer, mpi_comm=mpi_comm)
SparseGP_MPI.__init__(self, X, Y, Z, kernel, likelihood, inference_method=infr, normalizer=normalizer, mpi_comm=mpi_comm, name=name)
def parameters_changed(self):
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients_sparsegp,VarDTC_minibatch

View file

@ -291,12 +291,12 @@ class SSGPLVM(SparseGP_MPI):
Xs[b>self.X.gamma.values] = 0
invcov = (Xs[:,:,:,None]*Xs[:,:,None,:]).sum(1)/noise_var+np.eye(Q)
cov = np.array([pdinv(invcov[s_idx])[0] for s_idx in xrange(invcov.shape[0])])
cov = np.array([pdinv(invcov[s_idx])[0] for s_idx in range(invcov.shape[0])])
Ws = np.empty((nSamples, Q, D))
tmp = (np.transpose(Xs, (0,2,1)).reshape(nSamples*Q,N).dot(self.Y)).reshape(nSamples,Q,D)
mean = (cov[:,:,:,None]*tmp[:,None,:,:]).sum(2)/noise_var
zeros = np.zeros((Q,))
for s_idx in xrange(Xs.shape[0]):
for s_idx in range(Xs.shape[0]):
Ws[s_idx] = (np.random.multivariate_normal(mean=zeros,cov=cov[s_idx],size=(D,))).T+mean[s_idx]
if raw_samples:

View file

@ -25,7 +25,7 @@ class SSMRD(Model):
self.X = NormalPosterior(means=X, variances=X_variance)
if kernels is None:
kernels = [RBF(input_dim, lengthscale=1./fracs, ARD=True) for i in xrange(len(Ylist))]
kernels = [RBF(input_dim, lengthscale=1./fracs, ARD=True) for i in range(len(Ylist))]
if Zs is None:
Zs = [None]* len(Ylist)
if likelihoods is None:
@ -34,9 +34,9 @@ class SSMRD(Model):
inference_methods = [None]* len(Ylist)
if IBP:
self.var_priors = [IBPPrior_SSMRD(len(Ylist),input_dim,alpha=alpha) for i in xrange(len(Ylist))]
self.var_priors = [IBPPrior_SSMRD(len(Ylist),input_dim,alpha=alpha) for i in range(len(Ylist))]
else:
self.var_priors = [SpikeAndSlabPrior_SSMRD(nModels=len(Ylist),pi=pi,learnPi=False, group_spike=group_spike) for i in xrange(len(Ylist))]
self.var_priors = [SpikeAndSlabPrior_SSMRD(nModels=len(Ylist),pi=pi,learnPi=False, group_spike=group_spike) for i in range(len(Ylist))]
self.models = [SSGPLVM(y, input_dim, X=X.copy(), X_variance=X_variance.copy(), Gamma=Gammas[i], num_inducing=num_inducing,Z=Zs[i], learnPi=False, group_spike=group_spike,
kernel=kernels[i],inference_method=inference_methods[i],likelihood=likelihoods[i], variational_prior=self.var_priors[i], IBP=IBP, tau=None if taus is None else taus[i],
name='model_'+str(i), mpi_comm=mpi_comm, sharedX=True) for i,y in enumerate(Ylist)]
@ -73,7 +73,7 @@ class SSMRD(Model):
# Divide latent dimensions
idx = np.empty((input_dim,),dtype=np.int)
residue = (input_dim)%(len(Ylist))
for i in xrange(len(Ylist)):
for i in range(len(Ylist)):
if i < residue:
size = input_dim/len(Ylist)+1
idx[i*size:(i+1)*size] = i
@ -86,7 +86,7 @@ class SSMRD(Model):
X = np.empty((Ylist[0].shape[0],input_dim))
fracs = np.empty((input_dim,))
from ..util.initialization import initialize_latent
for i in xrange(len(Ylist)):
for i in range(len(Ylist)):
Y = Ylist[i]
dim = (idx==i).sum()
if dim>0:

745
GPy/models/state_space.py Normal file
View file

@ -0,0 +1,745 @@
# Copyright (c) 2013, Arno Solin.
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#
# This implementation of converting GPs to state space models is based on the article:
#
# @article{Sarkka+Solin+Hartikainen:2013,
# author = {Simo S\"arkk\"a and Arno Solin and Jouni Hartikainen},
# year = {2013},
# title = {Spatiotemporal learning via infinite-dimensional {B}ayesian filtering and smoothing},
# journal = {IEEE Signal Processing Magazine},
# volume = {30},
# number = {4},
# pages = {51--61}
# }
#
import numpy as np
from scipy import linalg
from ..core import Model
from .. import kern
from GPy.plotting.matplot_dep.models_plots import gpplot
from GPy.plotting.matplot_dep.base_plots import x_frame1D
from GPy.plotting.matplot_dep import Tango
import pylab as pb
from GPy.core.parameterization.param import Param
class StateSpace(Model):
def __init__(self, X, Y, kernel=None, sigma2=1.0, name='StateSpace'):
super(StateSpace, self).__init__(name=name)
self.num_data, input_dim = X.shape
assert input_dim==1, "State space methods for time only"
num_data_Y, self.output_dim = Y.shape
assert num_data_Y == self.num_data, "X and Y data don't match"
assert self.output_dim == 1, "State space methods for single outputs only"
# Make sure the observations are ordered in time
sort_index = np.argsort(X[:,0])
self.X = X[sort_index]
self.Y = Y[sort_index]
# Noise variance
self.sigma2 = Param('Gaussian_noise', sigma2)
self.link_parameter(self.sigma2)
# Default kernel
if kernel is None:
self.kern = kern.Matern32(1)
else:
self.kern = kernel
self.link_parameter(self.kern)
self.sigma2.constrain_positive()
# Assert that the kernel is supported
if not hasattr(self.kern, 'sde'):
raise NotImplementedError('SDE must be implemented for the kernel being used')
#assert self.kern.sde() not False, "This kernel is not supported for state space estimation"
def parameters_changed(self):
"""
Parameters have now changed
"""
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Use the Kalman filter to evaluate the likelihood
self._log_marginal_likelihood = self.kf_likelihood(F,L,Qc,H,self.sigma2,Pinf,self.X.T,self.Y.T)
gradients = self.compute_gradients()
self.sigma2.gradient_full[:] = gradients[-1]
self.kern.gradient_full[:] = gradients[:-1]
def log_likelihood(self):
return self._log_marginal_likelihood
def compute_gradients(self):
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dFt,dQct,dPinft) = self.kern.sde()
# Allocate space for the full partial derivative matrices
dF = np.zeros([dFt.shape[0],dFt.shape[1],dFt.shape[2]+1])
dQc = np.zeros([dQct.shape[0],dQct.shape[1],dQct.shape[2]+1])
dPinf = np.zeros([dPinft.shape[0],dPinft.shape[1],dPinft.shape[2]+1])
# Assign the values for the kernel function
dF[:,:,:-1] = dFt
dQc[:,:,:-1] = dQct
dPinf[:,:,:-1] = dPinft
# The sigma2 derivative
dR = np.zeros([1,1,dF.shape[2]])
dR[:,:,-1] = 1
# Calculate the likelihood gradients
gradients = self.kf_likelihood_g(F,L,Qc,H,self.sigma2,Pinf,dF,dQc,dPinf,dR,self.X.T,self.Y.T)
return gradients
def predict_raw(self, Xnew, Ynew=None, filteronly=False):
# Set defaults
if Ynew is None:
Ynew = self.Y
# Make a single matrix containing training and testing points
X = np.vstack((self.X, Xnew))
Y = np.vstack((Ynew, np.nan*np.zeros(Xnew.shape)))
# Sort the matrix (save the order)
_, return_index, return_inverse = np.unique(X,True,True)
X = X[return_index]
Y = Y[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Run the Kalman filter
(M, P) = self.kalman_filter(F,L,Qc,H,self.sigma2,Pinf,X.T,Y.T)
# Run the Rauch-Tung-Striebel smoother
if not filteronly:
(M, P) = self.rts_smoother(F,L,Qc,X.T,M,P)
# Put the data back in the original order
M = M[:,return_inverse]
P = P[:,:,return_inverse]
# Only return the values for Xnew
M = M[:,self.num_data:]
P = P[:,:,self.num_data:]
# Calculate the mean and variance
m = H.dot(M).T
V = np.tensordot(H[0],P,(0,0))
V = np.tensordot(V,H[0],(0,0))
V = V[:,None]
# Return the posterior of the state
return (m, V)
def predict(self, Xnew, filteronly=False):
# Run the Kalman filter to get the state
(m, V) = self.predict_raw(Xnew,filteronly=filteronly)
# Add the noise variance to the state variance
V += self.sigma2
# Lower and upper bounds
lower = m - 2*np.sqrt(V)
upper = m + 2*np.sqrt(V)
# Return mean and variance
return (m, V, lower, upper)
def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
ax=None, resolution=None, plot_raw=False, plot_filter=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
# Deal with optional parameters
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
# Define the frame on which to plot
resolution = resolution or 200
Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
# Make a prediction on the frame and plot it
if plot_raw:
m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v)
Y = self.Y
else:
m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
Y = self.Y
# Plot the values
gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(self.X, self.Y, 'kx', mew=1.5)
# Optionally plot some samples
if samples:
if plot_raw:
Ysim = self.posterior_samples_f(Xgrid, samples)
else:
Ysim = self.posterior_samples(Xgrid, samples)
for yi in Ysim.T:
ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
# Set the limits of the plot to some sensible values
ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
def prior_samples_f(self,X,size=10):
# Sort the matrix (save the order)
(_, return_index, return_inverse) = np.unique(X,True,True)
X = X[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Allocate space for results
Y = np.empty((size,X.shape[0]))
# Simulate random draws
#for j in range(0,size):
# Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
Y = self.simulate(F,L,Qc,Pinf,X.T,size)
# Only observations
Y = np.tensordot(H[0],Y,(0,0))
# Reorder simulated values
Y = Y[:,return_inverse]
# Return trajectory
return Y.T
def posterior_samples_f(self,X,size=10):
# Sort the matrix (save the order)
(_, return_index, return_inverse) = np.unique(X,True,True)
X = X[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
# Run smoother on original data
(m,V) = self.predict_raw(X)
# Simulate random draws from the GP prior
y = self.prior_samples_f(np.vstack((self.X, X)),size)
# Allocate space for sample trajectories
Y = np.empty((size,X.shape[0]))
# Run the RTS smoother on each of these values
for j in range(0,size):
yobs = y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
(m2,V2) = self.predict_raw(X,Ynew=yobs)
Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
# Reorder simulated values
Y = Y[:,return_inverse]
# Return posterior sample trajectories
return Y.T
def posterior_samples(self, X, size=10):
# Make samples of f
Y = self.posterior_samples_f(X,size)
# Add noise
Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
# Return trajectory
return Y
def kalman_filter(self,F,L,Qc,H,R,Pinf,X,Y):
# KALMAN_FILTER - Run the Kalman filter for a given model and data
# Allocate space for results
MF = np.empty((F.shape[0],Y.shape[1]))
PF = np.empty((F.shape[0],F.shape[0],Y.shape[1]))
# Initialize
MF[:,-1] = np.zeros(F.shape[0])
PF[:,:,-1] = Pinf.copy()
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = self.lti_disc(F,L,Qc,dt)
# Kalman filter
for k in range(0,Y.shape[1]):
# Form discrete-time model
#(A, Q) = self.lti_disc(F,L,Qc,dt[:,k])
A = As[:,:,index[k]];
Q = Qs[:,:,index[k]];
# Prediction step
MF[:,k] = A.dot(MF[:,k-1])
PF[:,:,k] = A.dot(PF[:,:,k-1]).dot(A.T) + Q
# Update step (only if there is data)
if not np.isnan(Y[:,k]):
if Y.shape[0]==1:
K = PF[:,:,k].dot(H.T)/(H.dot(PF[:,:,k]).dot(H.T) + R)
else:
LL = linalg.cho_factor(H.dot(PF[:,:,k]).dot(H.T) + R)
K = linalg.cho_solve(LL, H.dot(PF[:,:,k].T)).T
MF[:,k] += K.dot(Y[:,k]-H.dot(MF[:,k]))
PF[:,:,k] -= K.dot(H).dot(PF[:,:,k])
# Return values
return (MF, PF)
def rts_smoother(self,F,L,Qc,X,MS,PS):
# RTS_SMOOTHER - Run the RTS smoother for a given model and data
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = self.lti_disc(F,L,Qc,dt)
# Sequentially smooth states starting from the end
for k in range(2,X.shape[1]+1):
# Form discrete-time model
#(A, Q) = self.lti_disc(F,L,Qc,dt[:,1-k])
A = As[:,:,index[1-k]];
Q = Qs[:,:,index[1-k]];
# Smoothing step
LL = linalg.cho_factor(A.dot(PS[:,:,-k]).dot(A.T)+Q)
G = linalg.cho_solve(LL,A.dot(PS[:,:,-k])).T
MS[:,-k] += G.dot(MS[:,1-k]-A.dot(MS[:,-k]))
PS[:,:,-k] += G.dot(PS[:,:,1-k]-A.dot(PS[:,:,-k]).dot(A.T)-Q).dot(G.T)
# Return
return (MS, PS)
def kf_likelihood(self,F,L,Qc,H,R,Pinf,X,Y):
# Evaluate marginal likelihood
# Initialize
lik = 0
m = np.zeros((F.shape[0],1))
P = Pinf.copy()
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = self.lti_disc(F,L,Qc,dt)
# Kalman filter for likelihood evaluation
for k in range(0,Y.shape[1]):
# Form discrete-time model
#(A,Q) = self.lti_disc(F,L,Qc,dt[:,k])
A = As[:,:,index[k]];
Q = Qs[:,:,index[k]];
# Prediction step
m = A.dot(m)
P = A.dot(P).dot(A.T) + Q
# Update step only if there is data
if not np.isnan(Y[:,k]):
v = Y[:,k]-H.dot(m)
if Y.shape[0]==1:
S = H.dot(P).dot(H.T) + R
K = P.dot(H.T)/S
lik -= 0.5*np.log(S)
lik -= 0.5*v.shape[0]*np.log(2*np.pi)
lik -= 0.5*v*v/S
else:
LL, isupper = linalg.cho_factor(H.dot(P).dot(H.T) + R)
lik -= np.sum(np.log(np.diag(LL)))
lik -= 0.5*v.shape[0]*np.log(2*np.pi)
lik -= 0.5*linalg.cho_solve((LL, isupper),v).dot(v)
K = linalg.cho_solve((LL, isupper), H.dot(P.T)).T
m += K.dot(v)
P -= K.dot(H).dot(P)
# Return likelihood
return lik[0,0]
def kf_likelihood_g(self,F,L,Qc,H,R,Pinf,dF,dQc,dPinf,dR,X,Y):
# Evaluate marginal likelihood gradient
# State dimension, number of data points and number of parameters
n = F.shape[0]
steps = Y.shape[1]
nparam = dF.shape[2]
# Time steps
t = X.squeeze()
# Allocate space
e = 0
eg = np.zeros(nparam)
# Set up
m = np.zeros([n,1])
P = Pinf.copy()
dm = np.zeros([n,nparam])
dP = dPinf.copy()
mm = m.copy()
PP = P.copy()
# Initial dt
dt = -np.Inf
# Allocate space for expm results
AA = np.zeros([2*n, 2*n, nparam])
FF = np.zeros([2*n, 2*n])
# Loop over all observations
for k in range(0,steps):
# The previous time step
dt_old = dt;
# The time discretization step length
if k>0:
dt = t[k]-t[k-1]
else:
dt = 0
# Loop through all parameters (Kalman filter prediction step)
for j in range(0,nparam):
# Should we recalculate the matrix exponential?
if abs(dt-dt_old) > 1e-9:
# The first matrix for the matrix factor decomposition
FF[:n,:n] = F
FF[n:,:n] = dF[:,:,j]
FF[n:,n:] = F
# Solve the matrix exponential
AA[:,:,j] = linalg.expm3(FF*dt)
# Solve the differential equation
foo = AA[:,:,j].dot(np.vstack([m, dm[:,j:j+1]]))
mm = foo[:n,:]
dm[:,j:j+1] = foo[n:,:]
# The discrete-time dynamical model
if j==0:
A = AA[:n,:n,j]
Q = Pinf - A.dot(Pinf).dot(A.T)
PP = A.dot(P).dot(A.T) + Q
# The derivatives of A and Q
dA = AA[n:,:n,j]
dQ = dPinf[:,:,j] - dA.dot(Pinf).dot(A.T) \
- A.dot(dPinf[:,:,j]).dot(A.T) - A.dot(Pinf).dot(dA.T)
# The derivatives of P
dP[:,:,j] = dA.dot(P).dot(A.T) + A.dot(dP[:,:,j]).dot(A.T) \
+ A.dot(P).dot(dA.T) + dQ
# Set predicted m and P
m = mm
P = PP
# Start the Kalman filter update step and precalculate variables
S = H.dot(P).dot(H.T) + R
# We should calculate the Cholesky factor if S is a matrix
# [LS,notposdef] = chol(S,'lower');
# The Kalman filter update (S is scalar)
HtiS = H.T/S
iS = 1/S
K = P.dot(HtiS)
v = Y[:,k]-H.dot(m)
vtiS = v.T/S
# Loop through all parameters (Kalman filter update step derivative)
for j in range(0,nparam):
# Innovation covariance derivative
dS = H.dot(dP[:,:,j]).dot(H.T) + dR[:,:,j];
# Evaluate the energy derivative for j
eg[j] = eg[j] \
- .5*np.sum(iS*dS) \
+ .5*H.dot(dm[:,j:j+1]).dot(vtiS.T) \
+ .5*vtiS.dot(dS).dot(vtiS.T) \
+ .5*vtiS.dot(H.dot(dm[:,j:j+1]))
# Kalman filter update step derivatives
dK = dP[:,:,j].dot(HtiS) - P.dot(HtiS).dot(dS)/S
dm[:,j:j+1] = dm[:,j:j+1] + dK.dot(v) - K.dot(H).dot(dm[:,j:j+1])
dKSKt = dK.dot(S).dot(K.T)
dP[:,:,j] = dP[:,:,j] - dKSKt - K.dot(dS).dot(K.T) - dKSKt.T
# Evaluate the energy
# e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.diag(LS))) - .5*vtiS.dot(v);
e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.sqrt(S))) - .5*vtiS.dot(v)
# Finish Kalman filter update step
m = m + K.dot(v)
P = P - K.dot(S).dot(K.T)
# Make sure the covariances stay symmetric
P = (P+P.T)/2
dP = (dP + dP.transpose([1,0,2]))/2
# raise NameError('Debug me')
# Return the gradient
return eg
def kf_likelihood_g_notstable(self,F,L,Qc,H,R,Pinf,dF,dQc,dPinf,dR,X,Y):
# Evaluate marginal likelihood gradient
# State dimension, number of data points and number of parameters
steps = Y.shape[1]
nparam = dF.shape[2]
n = F.shape[0]
# Time steps
t = X.squeeze()
# Allocate space
e = 0
eg = np.zeros(nparam)
# Set up
Z = np.zeros(F.shape)
QC = L.dot(Qc).dot(L.T)
m = np.zeros([n,1])
P = Pinf.copy()
dm = np.zeros([n,nparam])
dP = dPinf.copy()
mm = m.copy()
PP = P.copy()
# % Initial dt
dt = -np.Inf
# Allocate space for expm results
AA = np.zeros([2*F.shape[0], 2*F.shape[0], nparam])
AAA = np.zeros([4*F.shape[0], 4*F.shape[0], nparam])
FF = np.zeros([2*F.shape[0], 2*F.shape[0]])
FFF = np.zeros([4*F.shape[0], 4*F.shape[0]])
# Loop over all observations
for k in range(0,steps):
# The previous time step
dt_old = dt;
# The time discretization step length
if k>0:
dt = t[k]-t[k-1]
else:
dt = t[1]-t[0]
# Loop through all parameters (Kalman filter prediction step)
for j in range(0,nparam):
# Should we recalculate the matrix exponential?
if abs(dt-dt_old) > 1e-9:
# The first matrix for the matrix factor decomposition
FF[:n,:n] = F
FF[n:,:n] = dF[:,:,j]
FF[n:,n:] = F
# Solve the matrix exponential
AA[:,:,j] = linalg.expm3(FF*dt)
# Solve using matrix fraction decomposition
foo = AA[:,:,j].dot(np.vstack([m, dm[:,j:j+1]]))
# Pick the parts
mm = foo[:n,:]
dm[:,j:j+1] = foo[n:,:]
# Should we recalculate the matrix exponential?
if abs(dt-dt_old) > 1e-9:
# Define W and G
W = L.dot(dQc[:,:,j]).dot(L.T)
G = dF[:,:,j];
# The second matrix for the matrix factor decomposition
FFF[:n,:n] = F
FFF[2*n:-n,:n] = G
FFF[:n, n:2*n] = QC
FFF[n:2*n, n:2*n] = -F.T
FFF[2*n:-n,n:2*n] = W
FFF[-n:, n:2*n] = -G.T
FFF[2*n:-n,2*n:-n] = F
FFF[2*n:-n,-n:] = QC
FFF[-n:,-n:] = -F.T
# Solve the matrix exponential
AAA[:,:,j] = linalg.expm3(FFF*dt)
# Solve using matrix fraction decomposition
foo = AAA[:,:,j].dot(np.vstack([P, np.eye(n), dP[:,:,j], np.zeros([n,n])]))
# Pick the parts
C = foo[:n, :]
D = foo[n:2*n, :]
dC = foo[2*n:-n,:]
dD = foo[-n:, :]
# The prediction step covariance (PP = C/D)
if j==0:
PP = linalg.solve(D.T,C.T).T
PP = (PP + PP.T)/2
# Sove dP for j (C/D == P_{k|k-1})
dP[:,:,j] = linalg.solve(D.T,(dC - PP.dot(dD)).T).T
# Set predicted m and P
m = mm
P = PP
# Start the Kalman filter update step and precalculate variables
S = H.dot(P).dot(H.T) + R
# We should calculate the Cholesky factor if S is a matrix
# [LS,notposdef] = chol(S,'lower');
# The Kalman filter update (S is scalar)
HtiS = H.T/S
iS = 1/S
K = P.dot(HtiS)
v = Y[:,k]-H.dot(m)
vtiS = v.T/S
# Loop through all parameters (Kalman filter update step derivative)
for j in range(0,nparam):
# Innovation covariance derivative
dS = H.dot(dP[:,:,j]).dot(H.T) + dR[:,:,j];
# Evaluate the energy derivative for j
eg[j] = eg[j] \
- .5*np.sum(iS*dS) \
+ .5*H.dot(dm[:,j:j+1]).dot(vtiS.T) \
+ .5*vtiS.dot(dS).dot(vtiS.T) \
+ .5*vtiS.dot(H.dot(dm[:,j:j+1]))
# Kalman filter update step derivatives
dK = dP[:,:,j].dot(HtiS) - P.dot(HtiS).dot(dS)/S
dm[:,j:j+1] = dm[:,j:j+1] + dK.dot(v) - K.dot(H).dot(dm[:,j:j+1])
dKSKt = dK.dot(S).dot(K.T)
dP[:,:,j] = dP[:,:,j] - dKSKt - K.dot(dS).dot(K.T) - dKSKt.T
# Evaluate the energy
# e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.diag(LS))) - .5*vtiS.dot(v);
e = e - .5*S.shape[0]*np.log(2*np.pi) - np.sum(np.log(np.sqrt(S))) - .5*vtiS.dot(v)
# Finish Kalman filter update step
m = m + K.dot(v)
P = P - K.dot(S).dot(K.T)
# Make sure the covariances stay symmetric
P = (P+P.T)/2
dP = (dP + dP.transpose([1,0,2]))/2
# raise NameError('Debug me')
# Report
#print e
#print eg
# Return the gradient
return eg
def simulate(self,F,L,Qc,Pinf,X,size=1):
# Simulate a trajectory using the state space model
# Allocate space for results
f = np.zeros((F.shape[0],size,X.shape[1]))
# Initial state
f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
# Time step lengths
dt = np.empty(X.shape)
dt[:,0] = X[:,1]-X[:,0]
dt[:,1:] = np.diff(X)
# Solve the LTI SDE for these time steps
As, Qs, index = self.lti_disc(F,L,Qc,dt)
# Sweep through remaining time points
for k in range(1,X.shape[1]):
# Form discrete-time model
A = As[:,:,index[1-k]]
Q = Qs[:,:,index[1-k]]
# Draw the state
f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
# Return values
return f
def lti_disc(self,F,L,Qc,dt):
# Discrete-time solution to the LTI SDE
# Dimensionality
n = F.shape[0]
index = 0
# Check for numbers of time steps
if dt.flatten().shape[0]==1:
# The covariance matrix by matrix fraction decomposition
Phi = np.zeros((2*n,2*n))
Phi[:n,:n] = F
Phi[:n,n:] = L.dot(Qc).dot(L.T)
Phi[n:,n:] = -F.T
AB = linalg.expm(Phi*dt).dot(np.vstack((np.zeros((n,n)),np.eye(n))))
Q = linalg.solve(AB[n:,:].T,AB[:n,:].T)
# The dynamical model
A = linalg.expm(F*dt)
# Return
return A, Q
# Optimize for cases where time steps occur repeatedly
else:
# Time discretizations (round to 14 decimals to avoid problems)
dt, _, index = np.unique(np.round(dt,14),True,True)
# Allocate space for A and Q
A = np.empty((n,n,dt.shape[0]))
Q = np.empty((n,n,dt.shape[0]))
# Call this function for each dt
for j in range(0,dt.shape[0]):
A[:,:,j], Q[:,:,j] = self.lti_disc(F,L,Qc,dt[j])
# Return
return A, Q, index

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,964 @@
# -*- coding: utf-8 -*-
"""
Contains some cython code for state space modelling.
"""
import numpy as np
cimport numpy as np
import scipy as sp
cimport cython
#from libc.math cimport isnan # for nan checking in kalman filter cycle
cdef extern from "numpy/npy_math.h":
bint npy_isnan(double x)
DTYPE = np.float64
DTYPE_int = np.int64
ctypedef np.float64_t DTYPE_t
ctypedef np.int64_t DTYPE_int_t
# Template class for dynamic callables
cdef class Dynamic_Callables_Cython:
cpdef f_a(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] A):
raise NotImplemented("(cython) f_a is not implemented!")
cpdef Ak(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] P): # returns state iteration matrix
raise NotImplemented("(cython) Ak is not implemented!")
cpdef Qk(self, int k):
raise NotImplemented("(cython) Qk is not implemented!")
cpdef Q_srk(self, int k):
raise NotImplemented("(cython) Q_srk is not implemented!")
cpdef dAk(self, int k):
raise NotImplemented("(cython) dAk is not implemented!")
cpdef dQk(self, int k):
raise NotImplemented("(cython) dQk is not implemented!")
cpdef reset(self, bint compute_derivatives = False):
raise NotImplemented("(cython) reset is not implemented!")
# Template class for measurement callables
cdef class Measurement_Callables_Cython:
cpdef f_h(self, int k, np.ndarray[DTYPE_t, ndim=2] m_pred, np.ndarray[DTYPE_t, ndim=2] Hk):
raise NotImplemented("(cython) f_a is not implemented!")
cpdef Hk(self, int k, np.ndarray[DTYPE_t, ndim=2] m_pred, np.ndarray[DTYPE_t, ndim=2] P_pred): # returns state iteration matrix
raise NotImplemented("(cython) Hk is not implemented!")
cpdef Rk(self, int k):
raise NotImplemented("(cython) Rk is not implemented!")
cpdef R_isrk(self, int k):
raise NotImplemented("(cython) Q_srk is not implemented!")
cpdef dHk(self, int k):
raise NotImplemented("(cython) dAk is not implemented!")
cpdef dRk(self, int k):
raise NotImplemented("(cython) dQk is not implemented!")
cpdef reset(self,compute_derivatives = False):
raise NotImplemented("(cython) reset is not implemented!")
cdef class R_handling_Cython(Measurement_Callables_Cython):
"""
The calss handles noise matrix R.
"""
cdef:
np.ndarray R
np.ndarray index
int R_time_var_index
np.ndarray dR
bint svd_each_time
dict R_square_root
def __init__(self, np.ndarray[DTYPE_t, ndim=3] R, np.ndarray[DTYPE_t, ndim=2] index,
int R_time_var_index, int p_unique_R_number, np.ndarray[DTYPE_t, ndim=3] dR = None):
"""
Input:
---------------
R - array with noise on various steps. The result of preprocessing
the noise input.
index - for each step of Kalman filter contains the corresponding index
in the array.
R_time_var_index - another index in the array R. Computed earlier and passed here.
unique_R_number - number of unique noise matrices below which square roots
are cached and above which they are computed each time.
dR: 3D array[:, :, param_num]
derivative of R. Derivative is supported only when R do not change over time
Output:
--------------
Object which has two necessary functions:
f_R(k)
inv_R_square_root(k)
"""
self.R = R
self.index = index
self.R_time_var_index = R_time_var_index
self.dR = dR
cdef int unique_len = len(np.unique(index))
if (unique_len > p_unique_R_number):
self.svd_each_time = True
else:
self.svd_each_time = False
self.R_square_root = {}
cpdef Rk(self, int k):
return self.R[:,:, <int>self.index[self.R_time_var_index, k]]
cpdef dRk(self,int k):
if self.dR is None:
raise ValueError("dR derivative is None")
return self.dR # the same dirivative on each iteration
cpdef R_isrk(self, int k):
"""
Function returns the inverse square root of R matrix on step k.
"""
cdef int ind = <int>self.index[self.R_time_var_index, k]
cdef np.ndarray[DTYPE_t, ndim=2] R = self.R[:,:, ind ]
cdef np.ndarray[DTYPE_t, ndim=2] inv_square_root
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
if (R.shape[0] == 1): # 1-D case handle simplier. No storage
# of the result, just compute it each time.
inv_square_root = np.sqrt( 1.0/R )
else:
if self.svd_each_time:
U,S,Vh = sp.linalg.svd( R,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
inv_square_root = U * 1.0/np.sqrt(S)
else:
if ind in self.R_square_root:
inv_square_root = self.R_square_root[ind]
else:
U,S,Vh = sp.linalg.svd( R,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
inv_square_root = U * 1.0/np.sqrt(S)
self.R_square_root[ind] = inv_square_root
return inv_square_root
cdef class Std_Measurement_Callables_Cython(R_handling_Cython):
cdef:
np.ndarray H
int H_time_var_index
np.ndarray dH
def __init__(self, np.ndarray[DTYPE_t, ndim=3] H, int H_time_var_index,
np.ndarray[DTYPE_t, ndim=3] R, np.ndarray[DTYPE_t, ndim=2] index, int R_time_var_index,
int unique_R_number, np.ndarray[DTYPE_t, ndim=3] dH = None,
np.ndarray[DTYPE_t, ndim=3] dR=None):
super(Std_Measurement_Callables_Cython,self).__init__(R, index, R_time_var_index, unique_R_number,dR)
self.H = H
self.H_time_var_index = H_time_var_index
self.dH = dH
cpdef f_h(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] H):
"""
function (k, x_{k}, H_{k}). Measurement function.
k (iteration number), starts at 0
x_{k} state
H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
"""
return np.dot(H, m)
cpdef Hk(self, int k, np.ndarray[DTYPE_t, ndim=2] m_pred, np.ndarray[DTYPE_t, ndim=2] P_pred): # returns state iteration matrix
"""
function (k, m, P) return Jacobian of measurement function, it is
passed into p_h.
k (iteration number), starts at 0
m: point where Jacobian is evaluated
P: parameter for Jacobian, usually covariance matrix.
"""
return self.H[:,:, <int>self.index[self.H_time_var_index, k]]
cpdef dHk(self,int k):
if self.dH is None:
raise ValueError("dH derivative is None")
return self.dH # the same dirivative on each iteration
cdef class Q_handling_Cython(Dynamic_Callables_Cython):
cdef:
np.ndarray Q
np.ndarray index
int Q_time_var_index
np.ndarray dQ
dict Q_square_root
bint svd_each_time
def __init__(self, np.ndarray[DTYPE_t, ndim=3] Q, np.ndarray[DTYPE_t, ndim=2] index,
int Q_time_var_index, int p_unique_Q_number, np.ndarray[DTYPE_t, ndim=3] dQ = None):
"""
Input:
---------------
Q - array with noise on various steps. The result of preprocessing
the noise input.
index - for each step of Kalman filter contains the corresponding index
in the array.
Q_time_var_index - another index in the array R. Computed earlier and passed here.
unique_Q_number - number of unique noise matrices below which square roots
are cached and above which they are computed each time.
dQ: 3D array[:, :, param_num]
derivative of Q. Derivative is supported only when Q do not change over time
Output:
--------------
Object which has three necessary functions:
Qk(k)
dQk(k)
Q_srkt(k)
"""
self.Q = Q
self.index = index
self.Q_time_var_index = Q_time_var_index
self.dQ = dQ
cdef int unique_len = len(np.unique(index))
if (unique_len > p_unique_Q_number):
self.svd_each_time = True
else:
self.svd_each_time = False
self.Q_square_root = {}
cpdef Qk(self, int k):
"""
function (k). Returns noise matrix of dynamic model on iteration k.
k (iteration number). starts at 0
"""
return self.Q[:,:, <int>self.index[self.Q_time_var_index, k]]
cpdef dQk(self, int k):
if self.dQ is None:
raise ValueError("dQ derivative is None")
return self.dQ # the same dirivative on each iteration
cpdef Q_srk(self, int k):
"""
function (k). Returns the square root of noise matrix of dynamic model on iteration k.
k (iteration number). starts at 0
This function is implemented to use SVD prediction step.
"""
cdef int ind = <int>self.index[self.Q_time_var_index, k]
cdef np.ndarray[DTYPE_t, ndim=2] Q = self.Q[:,:, ind]
cdef np.ndarray[DTYPE_t, ndim=2] square_root
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
if (Q.shape[0] == 1): # 1-D case handle simplier. No storage
# of the result, just compute it each time.
square_root = np.sqrt( Q )
else:
if self.svd_each_time:
U,S,Vh = sp.linalg.svd( Q,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
square_root = U * np.sqrt(S)
else:
if ind in self.Q_square_root:
square_root = self.Q_square_root[ind]
else:
U,S,Vh = sp.linalg.svd( Q,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
square_root = U * np.sqrt(S)
self.Q_square_root[ind] = square_root
return square_root
cdef class Std_Dynamic_Callables_Cython(Q_handling_Cython):
cdef:
np.ndarray A
int A_time_var_index
np.ndarray dA
def __init__(self, np.ndarray[DTYPE_t, ndim=3] A, int A_time_var_index,
np.ndarray[DTYPE_t, ndim=3] Q,
np.ndarray[DTYPE_t, ndim=2] index,
int Q_time_var_index, int unique_Q_number,
np.ndarray[DTYPE_t, ndim=3] dA = None,
np.ndarray[DTYPE_t, ndim=3] dQ=None):
super(Std_Dynamic_Callables_Cython,self).__init__(Q, index, Q_time_var_index, unique_Q_number,dQ)
self.A = A
self.A_time_var_index = A_time_var_index
self.dA = dA
cpdef f_a(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] A):
"""
f_a: function (k, x_{k-1}, A_{k}). Dynamic function.
k (iteration number), starts at 0
x_{k-1} State from the previous step
A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
"""
return np.dot(A,m)
cpdef Ak(self, int k, np.ndarray[DTYPE_t, ndim=2] m_pred, np.ndarray[DTYPE_t, ndim=2] P_pred): # returns state iteration matrix
"""
function (k, m, P) return Jacobian of measurement function, it is
passed into p_h.
k (iteration number), starts at 0
m: point where Jacobian is evaluated
P: parameter for Jacobian, usually covariance matrix.
"""
return self.A[:,:, <int>self.index[self.A_time_var_index, k]]
cpdef dAk(self, int k):
if self.dA is None:
raise ValueError("dA derivative is None")
return self.dA # the same dirivative on each iteration
cpdef reset(self, bint compute_derivatives=False):
"""
For reusing this object e.g. in smoother computation. It makes sence
because necessary matrices have been already computed for all
time steps.
"""
return self
cdef class AQcompute_batch_Cython(Q_handling_Cython):
"""
Class for calculating matrices A, Q, dA, dQ of the discrete Kalman Filter
from the matrices F, L, Qc, P_ing, dF, dQc, dP_inf of the continuos state
equation. dt - time steps.
It has the same interface as AQcompute_once.
It computes matrices for all time steps. This object is used when
there are not so many (controlled by internal variable)
different time steps and storing all the matrices do not take too much memory.
Since all the matrices are computed all together, this object can be used
in smoother without repeating the computations.
"""
#def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
cdef:
np.ndarray As
np.ndarray Qs
np.ndarray dAs
np.ndarray dQs
np.ndarray reconstruct_indices
#long total_size_of_data
dict Q_svd_dict
int last_k
def __init__(self, np.ndarray[DTYPE_t, ndim=3] As, np.ndarray[DTYPE_t, ndim=3] Qs,
np.ndarray[DTYPE_int_t, ndim=1] reconstruct_indices,
np.ndarray[DTYPE_t, ndim=4] dAs=None,
np.ndarray[DTYPE_t, ndim=4] dQs=None):
"""
Constructor. All necessary parameters are passed here and stored
in the opject.
Input:
-------------------
F, L, Qc, P_inf : matrices
Parameters of corresponding continuous state model
dt: array
All time steps
compute_derivatives: bool
Whether to calculate derivatives
dP_inf, dF, dQc: 3D array
Derivatives if they are required
Output:
-------------------
"""
self.As = As
self.Qs = Qs
self.dAs = dAs
self.dQs = dQs
self.reconstruct_indices = reconstruct_indices
self.total_size_of_data = self.As.nbytes + self.Qs.nbytes +\
(self.dAs.nbytes if (self.dAs is not None) else 0) +\
(self.dQs.nbytes if (self.dQs is not None) else 0) +\
(self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
self.Q_svd_dict = {}
self.last_k = 0
# !!!Print statistics! Which object is created
# !!!Print statistics! Print sizes of matrices
cpdef f_a(self, int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] A):
"""
Dynamic model
"""
return np.dot(A, m) # default dynamic model
cpdef reset(self, bint compute_derivatives=False):
"""
For reusing this object e.g. in smoother computation. It makes sence
because necessary matrices have been already computed for all
time steps.
"""
return self
cpdef Ak(self,int k, np.ndarray[DTYPE_t, ndim=2] m, np.ndarray[DTYPE_t, ndim=2] P):
self.last_k = k
return self.As[:,:, <int>self.reconstruct_indices[k]]
cpdef Qk(self,int k):
self.last_k = k
return self.Qs[:,:, <int>self.reconstruct_indices[k]]
cpdef dAk(self, int k):
self.last_k = k
return self.dAs[:,:, :, <int>self.reconstruct_indices[k]]
cpdef dQk(self, int k):
self.last_k = k
return self.dQs[:,:, :, <int>self.reconstruct_indices[k]]
cpdef Q_srk(self, int k):
"""
Square root of the noise matrix Q
"""
cdef int matrix_index = <int>self.reconstruct_indices[k]
cdef np.ndarray[DTYPE_t, ndim=2] square_root
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
if matrix_index in self.Q_svd_dict:
square_root = self.Q_svd_dict[matrix_index]
else:
U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index],
full_matrices=False, compute_uv=True,
overwrite_a=False, check_finite=False)
square_root = U * np.sqrt(S)
self.Q_svd_dict[matrix_index] = square_root
return square_root
# def return_last(self):
# """
# Function returns last available matrices.
# """
#
# if (self.last_k is None):
# raise ValueError("Matrices are not computed.")
# else:
# ind = self.reconstruct_indices[self.last_k]
# A = self.As[:,:, ind]
# Q = self.Qs[:,:, ind]
# dA = self.dAs[:,:, :, ind]
# dQ = self.dQs[:,:, :, ind]
#
# return self.last_k, A, Q, dA, dQ
@cython.boundscheck(False)
def _kalman_prediction_step_SVD_Cython(long k, np.ndarray[DTYPE_t, ndim=2] p_m , tuple p_P,
Dynamic_Callables_Cython p_dynamic_callables,
bint calc_grad_log_likelihood=False,
np.ndarray[DTYPE_t, ndim=3] p_dm = None,
np.ndarray[DTYPE_t, ndim=3] p_dP = None):
"""
Desctrete prediction function
Input:
k:int
Iteration No. Starts at 0. Total number of iterations equal to the
number of measurements.
p_m: matrix of size (state_dim, time_series_no)
Mean value from the previous step. For "multiple time series mode"
it is matrix, second dimension of which correspond to different
time series.
p_P: tuple (Prev_cov, S, V)
Covariance matrix from the previous step and its SVD decomposition.
Prev_cov = V * S * V.T The tuple is (Prev_cov, S, V)
p_a: function (k, x_{k-1}, A_{k}). Dynamic function.
k (iteration number), starts at 0
x_{k-1} State from the previous step
A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
p_f_A: function (k, m, P) return Jacobian of dynamic function, it is
passed into p_a.
k (iteration number), starts at 0
m: point where Jacobian is evaluated
P: parameter for Jacobian, usually covariance matrix.
p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k.
k (iteration number). starts at 0
p_f_Qsr: function (k). Returns square root of noise matrix of the
dynamic model on iteration k. k (iteration number). starts at 0
calc_grad_log_likelihood: boolean
Whether to calculate gradient of the marginal likelihood
of the state-space model. If true then the next parameter must
provide the extra parameters for gradient calculation.
p_dm: 3D array (state_dim, time_series_no, parameters_no)
Mean derivatives from the previous step. For "multiple time series mode"
it is 3D array, second dimension of which correspond to different
time series.
p_dP: 3D array (state_dim, state_dim, parameters_no)
Mean derivatives from the previous step
grad_calc_params_1: List or None
List with derivatives. The first component is 'f_dA' - function(k)
which returns the derivative of A. The second element is 'f_dQ'
- function(k). Function which returns the derivative of Q.
Output:
----------------------------
m_pred, P_pred, dm_pred, dP_pred: metrices, 3D objects
Results of the prediction steps.
"""
# covariance from the previous step# p_prev_cov = v * S * V.T
cdef np.ndarray[DTYPE_t, ndim=2] Prev_cov = p_P[0]
cdef np.ndarray[DTYPE_t, ndim=1] S_old = p_P[1]
cdef np.ndarray[DTYPE_t, ndim=2] V_old = p_P[2]
#p_prev_cov_tst = np.dot(p_V, (p_S * p_V).T) # reconstructed covariance from the previous step
# index correspond to values from previous iteration.
cdef np.ndarray[DTYPE_t, ndim=2] A = p_dynamic_callables.Ak(k,p_m,Prev_cov) # state transition matrix (or Jacobian)
cdef np.ndarray[DTYPE_t, ndim=2] Q = p_dynamic_callables.Qk(k) # state noise matrx. This is necessary for the square root calculation (next step)
cdef np.ndarray[DTYPE_t, ndim=2] Q_sr = p_dynamic_callables.Q_srk(k)
# Prediction step ->
cdef np.ndarray[DTYPE_t, ndim=2] m_pred = p_dynamic_callables.f_a(k, p_m, A) # predicted mean
# coavariance prediction have changed:
cdef np.ndarray[DTYPE_t, ndim=2] svd_1_matr = np.vstack( ( (np.sqrt(S_old)* np.dot(A,V_old)).T , Q_sr.T) )
res = sp.linalg.svd( svd_1_matr,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
# (U,S,Vh)
cdef np.ndarray[DTYPE_t, ndim=2] U = res[0]
cdef np.ndarray[DTYPE_t, ndim=1] S = res[1]
cdef np.ndarray[DTYPE_t, ndim=2] Vh = res[2]
# predicted variance computed by the regular method. For testing
#P_pred_tst = A.dot(Prev_cov).dot(A.T) + Q
cdef np.ndarray[DTYPE_t, ndim=2] V_new = Vh.T
cdef np.ndarray[DTYPE_t, ndim=1] S_new = S**2
cdef np.ndarray[DTYPE_t, ndim=2] P_pred = np.dot(V_new * S_new, V_new.T) # prediction covariance
#tuple P_pred = (P_pred, S_new, Vh.T)
# Prediction step <-
# derivatives
cdef np.ndarray[DTYPE_t, ndim=3] dA_all_params
cdef np.ndarray[DTYPE_t, ndim=3] dQ_all_params
cdef np.ndarray[DTYPE_t, ndim=3] dm_pred
cdef np.ndarray[DTYPE_t, ndim=3] dP_pred
cdef int param_number
cdef int j
cdef tuple ret
cdef np.ndarray[DTYPE_t, ndim=2] dA
cdef np.ndarray[DTYPE_t, ndim=2] dQ
if calc_grad_log_likelihood:
dA_all_params = p_dynamic_callables.dAk(k) # derivatives of A wrt parameters
dQ_all_params = p_dynamic_callables.dQk(k) # derivatives of Q wrt parameters
param_number = p_dP.shape[2]
# p_dm, p_dP - derivatives form the previoius step
dm_pred = np.empty((p_dm.shape[0], p_dm.shape[1], p_dm.shape[2]), dtype = DTYPE)
dP_pred = np.empty((p_dP.shape[0], p_dP.shape[1], p_dP.shape[2]), dtype = DTYPE)
for j in range(param_number):
dA = dA_all_params[:,:,j]
dQ = dQ_all_params[:,:,j]
dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, p_dm[:,:,j])
# prediction step derivatives for current parameter:
dP_pred[:,:,j] = np.dot( dA ,np.dot(Prev_cov, A.T))
dP_pred[:,:,j] += dP_pred[:,:,j].T
dP_pred[:,:,j] += np.dot( A ,np.dot( p_dP[:,:,j] , A.T)) + dQ
dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize
else:
dm_pred = None
dP_pred = None
ret = (P_pred, S_new, Vh.T)
return m_pred, ret, dm_pred, dP_pred
@cython.boundscheck(False)
def _kalman_update_step_SVD_Cython(long k, np.ndarray[DTYPE_t, ndim=2] p_m, tuple p_P,
Measurement_Callables_Cython p_measurement_callables,
np.ndarray[DTYPE_t, ndim=2] measurement,
bint calc_log_likelihood= False,
bint calc_grad_log_likelihood=False,
np.ndarray[DTYPE_t, ndim=3] p_dm = None,
np.ndarray[DTYPE_t, ndim=3] p_dP = None):
"""
Input:
k: int
Iteration No. Starts at 0. Total number of iterations equal to the
number of measurements.
m_P: matrix of size (state_dim, time_series_no)
Mean value from the previous step. For "multiple time series mode"
it is matrix, second dimension of which correspond to different
time series.
p_P: tuple (P_pred, S, V)
Covariance matrix from the prediction step and its SVD decomposition.
P_pred = V * S * V.T The tuple is (P_pred, S, V)
p_h: function (k, x_{k}, H_{k}). Measurement function.
k (iteration number), starts at 0
x_{k} state
H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
p_f_H: function (k, m, P) return Jacobian of dynamic function, it is
passed into p_h.
k (iteration number), starts at 0
m: point where Jacobian is evaluated
P: parameter for Jacobian, usually covariance matrix.
p_f_R: function (k). Returns noise matrix of measurement equation
on iteration k.
k (iteration number). starts at 0
p_f_iRsr: function (k). Returns the square root of the noise matrix of
measurement equation on iteration k.
k (iteration number). starts at 0
measurement: (measurement_dim, time_series_no) matrix
One measurement used on the current update step. For
"multiple time series mode" it is matrix, second dimension of
which correspond to different time series.
calc_log_likelihood: boolean
Whether to calculate marginal likelihood of the state-space model.
calc_grad_log_likelihood: boolean
Whether to calculate gradient of the marginal likelihood
of the state-space model. If true then the next parameter must
provide the extra parameters for gradient calculation.
p_dm: 3D array (state_dim, time_series_no, parameters_no)
Mean derivatives from the prediction step. For "multiple time series mode"
it is 3D array, second dimension of which correspond to different
time series.
p_dP: array
Covariance derivatives from the prediction step.
grad_calc_params_2: List or None
List with derivatives. The first component is 'f_dH' - function(k)
which returns the derivative of H. The second element is 'f_dR'
- function(k). Function which returns the derivative of R.
Output:
----------------------------
m_upd, P_upd, dm_upd, dP_upd: metrices, 3D objects
Results of the prediction steps.
log_likelihood_update: double or 1D array
Update to the log_likelihood from this step
d_log_likelihood_update: (grad_params_no, time_series_no) matrix
Update to the gradient of log_likelihood, "multiple time series mode"
adds extra columns to the gradient.
"""
cdef np.ndarray[DTYPE_t, ndim=2] m_pred = p_m # from prediction step
#P_pred,S_pred,V_pred = p_P # from prediction step
cdef np.ndarray[DTYPE_t, ndim=2] P_pred = p_P[0]
cdef np.ndarray[DTYPE_t, ndim=1] S_pred = p_P[1]
cdef np.ndarray[DTYPE_t, ndim=2] V_pred = p_P[2]
cdef np.ndarray[DTYPE_t, ndim=2] H = p_measurement_callables.Hk(k, m_pred, P_pred)
cdef np.ndarray[DTYPE_t, ndim=2] R = p_measurement_callables.Rk(k)
cdef np.ndarray[DTYPE_t, ndim=2] R_isr =p_measurement_callables.R_isrk(k) # square root of the inverse of R matrix
cdef int time_series_no = p_m.shape[1] # number of time serieses
cdef np.ndarray[DTYPE_t, ndim=2] log_likelihood_update # log_likelihood_update=None;
# Update step (only if there is data)
#if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other.
cdef np.ndarray[DTYPE_t, ndim=2] v = measurement-p_measurement_callables.f_h(k, m_pred, H)
cdef np.ndarray[DTYPE_t, ndim=2] svd_2_matr = np.vstack( ( np.dot( R_isr.T, np.dot(H, V_pred)) , np.diag( 1.0/np.sqrt(S_pred) ) ) )
res = sp.linalg.svd( svd_2_matr,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
#(U,S,Vh)
cdef np.ndarray[DTYPE_t, ndim=2] U = res[0]
cdef np.ndarray[DTYPE_t, ndim=1] S_svd = res[1]
cdef np.ndarray[DTYPE_t, ndim=2] Vh = res[2]
# P_upd = U_upd S_upd**2 U_upd.T
cdef np.ndarray[DTYPE_t, ndim=2] U_upd = np.dot(V_pred, Vh.T)
cdef np.ndarray[DTYPE_t, ndim=1] S_upd = (1.0/S_svd)**2
cdef np.ndarray[DTYPE_t, ndim=2] P_upd = np.dot(U_upd * S_upd, U_upd.T) # update covariance
#P_upd = (P_upd,S_upd,U_upd) # tuple to pass to the next step
# stil need to compute S and K for derivative computation
cdef np.ndarray[DTYPE_t, ndim=2] S = H.dot(P_pred).dot(H.T) + R
cdef np.ndarray[DTYPE_t, ndim=2] K
cdef bint measurement_dim_gt_one = False
if measurement.shape[0]==1: # measurements are one dimensional
if (S < 0):
raise ValueError("Kalman Filter Update SVD: S is negative step %i" % k )
#import pdb; pdb.set_trace()
K = P_pred.dot(H.T) / S
if calc_log_likelihood:
log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) +
v*v / S)
#log_likelihood_update = log_likelihood_update[0,0] # to make int
if np.any(np.isnan(log_likelihood_update)): # some member in P_pred is None.
raise ValueError("Nan values in likelihood update!")
else:
log_likelihood_update = None
#LL = None; islower = None
else:
measurement_dim_gt_one = True
raise ValueError("""Measurement dimension larger then 1 is currently not supported""")
# Old method of computing updated covariance (for testing) ->
#P_upd_tst = K.dot(S).dot(K.T)
#P_upd_tst = 0.5*(P_upd_tst + P_upd_tst.T)
#P_upd_tst = P_pred - P_upd_tst# this update matrix is symmetric
# Old method of computing updated covariance (for testing) <-
cdef np.ndarray[DTYPE_t, ndim=3] dm_upd # dm_upd=None;
cdef np.ndarray[DTYPE_t, ndim=3] dP_upd # dP_upd=None;
cdef np.ndarray[DTYPE_t, ndim=2] d_log_likelihood_update # d_log_likelihood_update=None
cdef np.ndarray[DTYPE_t, ndim=3] dm_pred_all_params
cdef np.ndarray[DTYPE_t, ndim=3] dP_pred_all_params
cdef int param_number
cdef np.ndarray[DTYPE_t, ndim=3] dH_all_params
cdef np.ndarray[DTYPE_t, ndim=3] dR_all_params
cdef int param
cdef np.ndarray[DTYPE_t, ndim=2] dH, dR, dm_pred, dP_pred, dv, dS, tmp1, tmp2, tmp3, dK, tmp5
cdef tuple ret
if calc_grad_log_likelihood:
dm_pred_all_params = p_dm # derivativas of the prediction phase
dP_pred_all_params = p_dP
param_number = p_dP.shape[2]
dH_all_params = p_measurement_callables.dHk(k)
dR_all_params = p_measurement_callables.dRk(k)
dm_upd = np.empty((dm_pred_all_params.shape[0], dm_pred_all_params.shape[1], dm_pred_all_params.shape[2]), dtype = DTYPE)
dP_upd = np.empty((dP_pred_all_params.shape[0], dP_pred_all_params.shape[1], dP_pred_all_params.shape[2]), dtype = DTYPE)
# firts dimension parameter_no, second - time series number
d_log_likelihood_update = np.empty((param_number,time_series_no), dtype = DTYPE)
for param in range(param_number):
dH = dH_all_params[:,:,param]
dR = dR_all_params[:,:,param]
dm_pred = dm_pred_all_params[:,:,param]
dP_pred = dP_pred_all_params[:,:,param]
# Terms in the likelihood derivatives
dv = - np.dot( dH, m_pred) - np.dot( H, dm_pred)
dS = np.dot(dH, np.dot( P_pred, H.T))
dS += dS.T
dS += np.dot(H, np.dot( dP_pred, H.T)) + dR
# TODO: maybe symmetrize dS
tmp1 = H.T / S
tmp2 = dH.T / S
tmp3 = dS.T / S
dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \
np.dot( P_pred, np.dot( tmp1, tmp3 ) )
# terms required for the next step, save this for each parameter
dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv)
dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T))
dP_upd[:,:,param] += dP_upd[:,:,param].T
dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T))
dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize
# computing the likelihood change for each parameter:
tmp5 = v / S
d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \
np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) )
# Compute the actual updates for mean of the states. Variance update
# is computed earlier.
else:
dm_upd = None
dP_upd = None
d_log_likelihood_update = None
m_upd = m_pred + K.dot( v )
ret = (P_upd,S_upd,U_upd)
return m_upd, ret, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update
@cython.boundscheck(False)
def _cont_discr_kalman_filter_raw_Cython(int state_dim, Dynamic_Callables_Cython p_dynamic_callables,
Measurement_Callables_Cython p_measurement_callables, X, Y,
np.ndarray[DTYPE_t, ndim=2] m_init=None, np.ndarray[DTYPE_t, ndim=2] P_init=None,
p_kalman_filter_type='regular',
bint calc_log_likelihood=False,
bint calc_grad_log_likelihood=False,
int grad_params_no=0,
np.ndarray[DTYPE_t, ndim=3] dm_init=None,
np.ndarray[DTYPE_t, ndim=3] dP_init=None):
cdef int steps_no = Y.shape[0] # number of steps in the Kalman Filter
cdef int time_series_no = Y.shape[2] # multiple time series mode
# Allocate space for results
# Mean estimations. Initial values will be included
cdef np.ndarray[DTYPE_t, ndim=3] M = np.empty(((steps_no+1),state_dim,time_series_no), dtype=DTYPE)
M[0,:,:] = m_init # Initialize mean values
# Variance estimations. Initial values will be included
cdef np.ndarray[DTYPE_t, ndim=3] P = np.empty(((steps_no+1),state_dim,state_dim))
P_init = 0.5*( P_init + P_init.T) # symmetrize initial covariance. In some ustable cases this is uiseful
P[0,:,:] = P_init # Initialize initial covariance matrix
cdef np.ndarray[DTYPE_t, ndim=2] U
cdef np.ndarray[DTYPE_t, ndim=1] S
cdef np.ndarray[DTYPE_t, ndim=2] Vh
U,S,Vh = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True,
overwrite_a=False,check_finite=True)
S[ (S==0) ] = 1e-17 # allows to run algorithm for singular initial variance
cdef tuple P_upd = (P_init, S,U)
#log_likelihood = 0
#grad_log_likelihood = np.zeros((grad_params_no,1))
cdef np.ndarray[DTYPE_t, ndim=2] log_likelihood = np.zeros((1, time_series_no), dtype = DTYPE) #if calc_log_likelihood else None
cdef np.ndarray[DTYPE_t, ndim=2] grad_log_likelihood = np.zeros((grad_params_no, time_series_no), dtype = DTYPE) #if calc_grad_log_likelihood else None
#setting initial values for derivatives update
cdef np.ndarray[DTYPE_t, ndim=3] dm_upd = dm_init
cdef np.ndarray[DTYPE_t, ndim=3] dP_upd = dP_init
# Main loop of the Kalman filter
cdef np.ndarray[DTYPE_t, ndim=2] prev_mean, k_measurment
cdef np.ndarray[DTYPE_t, ndim=2] m_pred, m_upd
cdef tuple P_pred
cdef np.ndarray[DTYPE_t, ndim=3] dm_pred, dP_pred
cdef np.ndarray[DTYPE_t, ndim=2] log_likelihood_update, d_log_likelihood_update
cdef int k
#print "Hi I am cython"
for k in range(0,steps_no):
# In this loop index for new estimations is (k+1), old - (k)
# This happened because initial values are stored at 0-th index.
#import pdb; pdb.set_trace()
prev_mean = M[k,:,:] # mean from the previous step
m_pred, P_pred, dm_pred, dP_pred = \
_kalman_prediction_step_SVD_Cython(k, prev_mean ,P_upd, p_dynamic_callables,
calc_grad_log_likelihood, dm_upd, dP_upd)
k_measurment = Y[k,:,:]
if (np.any(np.isnan(k_measurment)) == False):
# if np.any(np.isnan(k_measurment)):
# raise ValueError("Nan measurements are currently not supported")
m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
_kalman_update_step_SVD_Cython(k, m_pred , P_pred, p_measurement_callables,
k_measurment, calc_log_likelihood=calc_log_likelihood,
calc_grad_log_likelihood=calc_grad_log_likelihood,
p_dm = dm_pred, p_dP = dP_pred)
else:
if not np.all(np.isnan(k_measurment)):
raise ValueError("""Nan measurements are currently not supported if
they are intermixed with not NaN measurements""")
else:
m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred
if calc_log_likelihood:
log_likelihood_update = np.zeros((1,time_series_no))
if calc_grad_log_likelihood:
d_log_likelihood_update = np.zeros((grad_params_no,time_series_no))
if calc_log_likelihood:
log_likelihood += log_likelihood_update
if calc_grad_log_likelihood:
grad_log_likelihood += d_log_likelihood_update
M[k+1,:,:] = m_upd # separate mean value for each time series
P[k+1,:,:] = P_upd[0]
return (M, P, log_likelihood, grad_log_likelihood, p_dynamic_callables.reset(False))

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,424 @@
# Copyright (c) 2013, Arno Solin.
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#
# This implementation of converting GPs to state space models is based on the article:
#
# @article{Sarkka+Solin+Hartikainen:2013,
# author = {Simo S\"arkk\"a and Arno Solin and Jouni Hartikainen},
# year = {2013},
# title = {Spatiotemporal learning via infinite-dimensional {B}ayesian filtering and smoothing},
# journal = {IEEE Signal Processing Magazine},
# volume = {30},
# number = {4},
# pages = {51--61}
# }
#
import numpy as np
from scipy import stats
from .. import likelihoods
#from . import state_space_setup as ss_setup
from ..core import Model
from . import state_space_main as ssm
from . import state_space_setup as ss_setup
class StateSpace(Model):
def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, name='StateSpace'):
super(StateSpace, self).__init__(name=name)
if len(X.shape) == 1:
X = np.atleast_2d(X).T
self.num_data, self.input_dim = X.shape
if len(Y.shape) == 1:
Y = np.atleast_2d(Y).T
assert self.input_dim==1, "State space methods are only for 1D data"
if len(Y.shape)==2:
num_data_Y, self.output_dim = Y.shape
ts_number = None
elif len(Y.shape)==3:
num_data_Y, self.output_dim, ts_number = Y.shape
self.ts_number = ts_number
assert num_data_Y == self.num_data, "X and Y data don't match"
assert self.output_dim == 1, "State space methods are for single outputs only"
self.kalman_filter_type = kalman_filter_type
#self.kalman_filter_type = 'svd' # temp test
ss_setup.use_cython = use_cython
#import pdb; pdb.set_trace()
global ssm
#from . import state_space_main as ssm
if (ssm.cython_code_available) and (ssm.use_cython != ss_setup.use_cython):
reload(ssm)
# Make sure the observations are ordered in time
sort_index = np.argsort(X[:,0])
self.X = X[sort_index]
self.Y = Y[sort_index]
# Noise variance
self.likelihood = likelihoods.Gaussian(variance=noise_var)
# Default kernel
if kernel is None:
raise ValueError("State-Space Model: the kernel must be provided.")
else:
self.kern = kernel
self.link_parameter(self.kern)
self.link_parameter(self.likelihood)
self.posterior = None
# Assert that the kernel is supported
if not hasattr(self.kern, 'sde'):
raise NotImplementedError('SDE must be implemented for the kernel being used')
#assert self.kern.sde() not False, "This kernel is not supported for state space estimation"
def parameters_changed(self):
"""
Parameters have now changed
"""
#np.set_printoptions(16)
#print(self.param_array)
#import pdb; pdb.set_trace()
# Get the model matrices from the kernel
(F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
# necessary parameters
measurement_dim = self.output_dim
grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
# add measurement noise as a parameter and get the gradient matrices
dF = np.zeros([dFt.shape[0],dFt.shape[1],grad_params_no])
dQc = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no])
dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no])
dP0 = np.zeros([dP0t.shape[0],dP0t.shape[1],grad_params_no])
# Assign the values for the kernel function
dF[:,:,:-1] = dFt
dQc[:,:,:-1] = dQct
dP_inf[:,:,:-1] = dP_inft
dP0[:,:,:-1] = dP0t
# The sigma2 derivative
dR = np.zeros([measurement_dim,measurement_dim,grad_params_no])
dR[:,:,-1] = np.eye(measurement_dim)
# Balancing
#(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
# Use the Kalman filter to evaluate the likelihood
grad_calc_params = {}
grad_calc_params['dP_inf'] = dP_inf
grad_calc_params['dF'] = dF
grad_calc_params['dQc'] = dQc
grad_calc_params['dR'] = dR
grad_calc_params['dP_init'] = dP0
kalman_filter_type = self.kalman_filter_type
# The following code is required because sometimes the shapes of self.Y
# becomes 3D even though is must be 2D. The reason is undescovered.
Y = self.Y
if self.ts_number is None:
Y.shape = (self.num_data,1)
else:
Y.shape = (self.num_data,1,self.ts_number)
(filter_means, filter_covs, log_likelihood,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
float(self.Gaussian_noise.variance),P_inf,self.X,Y,m_init=None,
P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True,
calc_grad_log_likelihood=True,
grad_params_no=grad_params_no,
grad_calc_params=grad_calc_params)
if np.any( np.isfinite(log_likelihood) == False):
#import pdb; pdb.set_trace()
print("State-Space: NaN valkues in the log_likelihood")
if np.any( np.isfinite(grad_log_likelihood) == False):
#import pdb; pdb.set_trace()
print("State-Space: NaN valkues in the grad_log_likelihood")
#print(grad_log_likelihood)
grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
grad_log_likelihood_sum.shape = (grad_log_likelihood_sum.shape[0],1)
self._log_marginal_likelihood = np.sum( log_likelihood,axis=1 )
self.likelihood.update_gradients(grad_log_likelihood_sum[-1,0])
self.kern.sde_update_gradient_full(grad_log_likelihood_sum[:-1,0])
def log_likelihood(self):
return self._log_marginal_likelihood
def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, **kw):
"""
Performs the actual prediction for new X points.
Inner function. It is called only from inside this class.
Input:
---------------------
Xnews: vector or (n_points,1) matrix
New time points where to evaluate predictions.
Ynews: (n_train_points, ts_no) matrix
This matrix can substitude the original training points (in order
to use only the parameters of the model).
filteronly: bool
Use only Kalman Filter for prediction. In this case the output does
not coincide with corresponding Gaussian process.
Output:
--------------------
m: vector
Mean prediction
V: vector
Variance in every point
"""
# Set defaults
if Ynew is None:
Ynew = self.Y
# Make a single matrix containing training and testing points
if Xnew is not None:
X = np.vstack((self.X, Xnew))
Y = np.vstack((Ynew, np.nan*np.zeros(Xnew.shape)))
predict_only_training = False
else:
X = self.X
Y = Ynew
predict_only_training = True
# Sort the matrix (save the order)
_, return_index, return_inverse = np.unique(X,True,True)
X = X[return_index] # TODO they are not used
Y = Y[return_index]
# Get the model matrices from the kernel
(F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
state_dim = F.shape[0]
#Y = self.Y[:, 0,0]
# Run the Kalman filter
#import pdb; pdb.set_trace()
kalman_filter_type = self.kalman_filter_type
(M, P, log_likelihood,
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(
F,L,Qc,H,float(self.Gaussian_noise.variance),P_inf,X,Y,m_init=None,
P_init=P0, p_kalman_filter_type = kalman_filter_type,
calc_log_likelihood=False,
calc_grad_log_likelihood=False)
# (filter_means, filter_covs, log_likelihood,
# grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
# float(self.Gaussian_noise.variance),P_inf,self.X,self.Y,m_init=None,
# P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True,
# calc_grad_log_likelihood=True,
# grad_params_no=grad_params_no,
# grad_calc_params=grad_calc_params)
# Run the Rauch-Tung-Striebel smoother
if not filteronly:
(M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P,
p_dynamic_callables=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc)
# remove initial values
M = M[1:,:,:]
P = P[1:,:,:]
# Put the data back in the original order
M = M[return_inverse,:,:]
P = P[return_inverse,:,:]
# Only return the values for Xnew
if not predict_only_training:
M = M[self.num_data:,:,:]
P = P[self.num_data:,:,:]
# Calculate the mean and variance
# after einsum m has dimension in 3D (sample_num, dim_no,time_series_no)
m = np.einsum('ijl,kj', M, H)# np.dot(M,H.T)
m.shape = (m.shape[0], m.shape[1]) # remove the third dimension
V = np.einsum('ij,ajk,kl', H, P, H.T)
V.shape = (V.shape[0], V.shape[1]) # remove the third dimension
# Return the posterior of the state
return (m, V)
def predict(self, Xnew=None, filteronly=False, include_likelihood=True, **kw):
# Run the Kalman filter to get the state
(m, V) = self._raw_predict(Xnew,filteronly=filteronly)
# Add the noise variance to the state variance
if include_likelihood:
V += float(self.likelihood.variance)
# Lower and upper bounds
#lower = m - 2*np.sqrt(V)
#upper = m + 2*np.sqrt(V)
# Return mean and variance
return m, V
def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), **kw):
mu, var = self._raw_predict(Xnew)
#import pdb; pdb.set_trace()
return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
# def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
# ax=None, resolution=None, plot_raw=False, plot_filter=False,
# linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
#
# # Deal with optional parameters
# if ax is None:
# fig = pb.figure(num=fignum)
# ax = fig.add_subplot(111)
#
# # Define the frame on which to plot
# resolution = resolution or 200
# Xgrid, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
#
# # Make a prediction on the frame and plot it
# if plot_raw:
# m, v = self.predict_raw(Xgrid,filteronly=plot_filter)
# lower = m - 2*np.sqrt(v)
# upper = m + 2*np.sqrt(v)
# Y = self.Y
# else:
# m, v, lower, upper = self.predict(Xgrid,filteronly=plot_filter)
# Y = self.Y
#
# # Plot the values
# gpplot(Xgrid, m, lower, upper, axes=ax, edgecol=linecol, fillcol=fillcol)
# ax.plot(self.X, self.Y, 'kx', mew=1.5)
#
# # Optionally plot some samples
# if samples:
# if plot_raw:
# Ysim = self.posterior_samples_f(Xgrid, samples)
# else:
# Ysim = self.posterior_samples(Xgrid, samples)
# for yi in Ysim.T:
# ax.plot(Xgrid, yi, Tango.colorsHex['darkBlue'], linewidth=0.25)
#
# # Set the limits of the plot to some sensible values
# ymin, ymax = min(np.append(Y.flatten(), lower.flatten())), max(np.append(Y.flatten(), upper.flatten()))
# ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
# ax.set_xlim(xmin, xmax)
# ax.set_ylim(ymin, ymax)
#
# def prior_samples_f(self,X,size=10):
#
# # Sort the matrix (save the order)
# (_, return_index, return_inverse) = np.unique(X,True,True)
# X = X[return_index]
#
# # Get the model matrices from the kernel
# (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
#
# # Allocate space for results
# Y = np.empty((size,X.shape[0]))
#
# # Simulate random draws
# #for j in range(0,size):
# # Y[j,:] = H.dot(self.simulate(F,L,Qc,Pinf,X.T))
# Y = self.simulate(F,L,Qc,Pinf,X.T,size)
#
# # Only observations
# Y = np.tensordot(H[0],Y,(0,0))
#
# # Reorder simulated values
# Y = Y[:,return_inverse]
#
# # Return trajectory
# return Y.T
#
# def posterior_samples_f(self,X,size=10):
#
# # Sort the matrix (save the order)
# (_, return_index, return_inverse) = np.unique(X,True,True)
# X = X[return_index]
#
# # Get the model matrices from the kernel
# (F,L,Qc,H,Pinf,dF,dQc,dPinf) = self.kern.sde()
#
# # Run smoother on original data
# (m,V) = self.predict_raw(X)
#
# # Simulate random draws from the GP prior
# y = self.prior_samples_f(np.vstack((self.X, X)),size)
#
# # Allocate space for sample trajectories
# Y = np.empty((size,X.shape[0]))
#
# # Run the RTS smoother on each of these values
# for j in range(0,size):
# yobs = y[0:self.num_data,j:j+1] + np.sqrt(self.sigma2)*np.random.randn(self.num_data,1)
# (m2,V2) = self.predict_raw(X,Ynew=yobs)
# Y[j,:] = m.T + y[self.num_data:,j].T - m2.T
#
# # Reorder simulated values
# Y = Y[:,return_inverse]
#
# # Return posterior sample trajectories
# return Y.T
#
# def posterior_samples(self, X, size=10):
#
# # Make samples of f
# Y = self.posterior_samples_f(X,size)
#
# # Add noise
# Y += np.sqrt(self.sigma2)*np.random.randn(Y.shape[0],Y.shape[1])
#
# # Return trajectory
# return Y
#
#
# def simulate(self,F,L,Qc,Pinf,X,size=1):
# # Simulate a trajectory using the state space model
#
# # Allocate space for results
# f = np.zeros((F.shape[0],size,X.shape[1]))
#
# # Initial state
# f[:,:,1] = np.linalg.cholesky(Pinf).dot(np.random.randn(F.shape[0],size))
#
# # Time step lengths
# dt = np.empty(X.shape)
# dt[:,0] = X[:,1]-X[:,0]
# dt[:,1:] = np.diff(X)
#
# # Solve the LTI SDE for these time steps
# As, Qs, index = ssm.ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt)
#
# # Sweep through remaining time points
# for k in range(1,X.shape[1]):
#
# # Form discrete-time model
# A = As[:,:,index[1-k]]
# Q = Qs[:,:,index[1-k]]
#
# # Draw the state
# f[:,:,k] = A.dot(f[:,:,k-1]) + np.dot(np.linalg.cholesky(Q),np.random.randn(A.shape[0],size))
#
# # Return values
# return f

View file

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2015, Alex Grigorevskiy
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
This module is intended for the setup of state_space_main module.
The need of this module appeared because of the way state_space_main module
connected with cython code.
"""
use_cython = False

View file

@ -2,65 +2,56 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..util.warping_functions import *
#from ..util.warping_functions import *
from ..core import GP
from .. import likelihoods
from GPy.util.warping_functions import TanhWarpingFunction_d
from paramz import ObsAr
#from GPy.util.warping_functions import TanhFunction
from ..util.warping_functions import TanhFunction
from GPy import kern
class WarpedGP(GP):
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3):
"""
This defines a GP Regression model that applies a
warping function to the output.
"""
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalizer=False):
if kernel is None:
kernel = kern.RBF(X.shape[1])
if warping_function == None:
self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_function = TanhFunction(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1) * 1)
else:
self.warping_function = warping_function
self.scale_data = False
if self.scale_data:
Y = self._scale_data(Y)
#self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy()
self.predict_in_warped_space = True
likelihood = likelihoods.Gaussian()
GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel)
super(WarpedGP, self).__init__(X, Y.copy(), likelihood=likelihood, kernel=kernel, normalizer=normalizer)
self.Y_normalized = self.Y_normalized.copy()
self.Y_untransformed = self.Y_normalized.copy()
self.predict_in_warped_space = True
self.link_parameter(self.warping_function)
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y - self._Ymin) / (self._Ymax - self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
def set_XY(self, X=None, Y=None):
super(WarpedGP, self).set_XY(X, Y)
self.Y_untransformed = self.Y_normalized.copy()
self.update_model(True)
def parameters_changed(self):
self.Y[:] = self.transform_data()
"""
Notice that we update the warping function gradients here.
"""
self.Y_normalized[:] = self.transform_data()
super(WarpedGP, self).parameters_changed()
Kiy = self.posterior.woodbury_vector.flatten()
grad_y = self.warping_function.fgrad_y(self.Y_untransformed)
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed,
return_covar_chain=True)
djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
warping_grads = -dquad_dpsi + djac_dpsi
self.warping_function.psi.gradient[:] = warping_grads[:, :-1]
self.warping_function.d.gradient[:] = warping_grads[0, -1]
self.warping_function.update_grads(self.Y_untransformed, Kiy)
def transform_data(self):
Y = self.warping_function.f(self.Y_untransformed.copy()).copy()
return Y
def log_likelihood(self):
"""
Notice we add the jacobian of the warping function here.
"""
ll = GP.log_likelihood(self)
jacobian = self.warping_function.fgrad_y(self.Y_untransformed)
return ll + np.log(jacobian).sum()
@ -73,36 +64,42 @@ class WarpedGP(GP):
arg2 = np.ones(shape=gh_samples.shape).dot(mean.T)
return self.warping_function.f_inv(arg1 + arg2, y=pred_init)
def _get_warped_mean(self, mean, std, pred_init=None, deg_gauss_hermite=100):
def _get_warped_mean(self, mean, std, pred_init=None, deg_gauss_hermite=20):
"""
Calculate the warped mean by using Gauss-Hermite quadrature.
"""
gh_samples, gh_weights = np.polynomial.hermite.hermgauss(deg_gauss_hermite)
gh_samples = gh_samples[:,None]
gh_weights = gh_weights[None,:]
gh_samples = gh_samples[:, None]
gh_weights = gh_weights[None, :]
return gh_weights.dot(self._get_warped_term(mean, std, gh_samples)) / np.sqrt(np.pi)
def _get_warped_variance(self, mean, std, pred_init=None, deg_gauss_hermite=100):
def _get_warped_variance(self, mean, std, pred_init=None, deg_gauss_hermite=20):
"""
Calculate the warped variance by using Gauss-Hermite quadrature.
"""
gh_samples, gh_weights = np.polynomial.hermite.hermgauss(deg_gauss_hermite)
gh_samples = gh_samples[:,None]
gh_weights = gh_weights[None,:]
gh_samples = gh_samples[:, None]
gh_weights = gh_weights[None, :]
arg1 = gh_weights.dot(self._get_warped_term(mean, std, gh_samples,
pred_init=pred_init) ** 2) / np.sqrt(np.pi)
arg2 = self._get_warped_mean(mean, std, pred_init=pred_init,
deg_gauss_hermite=deg_gauss_hermite)
return arg1 - (arg2 ** 2)
def predict(self, Xnew, which_parts='all', pred_init=None, full_cov=False, Y_metadata=None,
median=False, deg_gauss_hermite=100):
# normalize X values
# Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = GP._raw_predict(self, Xnew)
def predict(self, Xnew, kern=None, pred_init=None, Y_metadata=None,
median=False, deg_gauss_hermite=20, likelihood=None):
"""
Prediction results depend on:
- The value of the self.predict_in_warped_space flag
- The median flag passed as argument
The likelihood keyword is never used, it is just to follow the plotting API.
"""
#mu, var = GP._raw_predict(self, Xnew)
# now push through likelihood
mean, var = self.likelihood.predictive_values(mu, var)
#mean, var = self.likelihood.predictive_values(mu, var)
mean, var = super(WarpedGP, self).predict(Xnew, kern=kern, full_cov=False, likelihood=likelihood)
if self.predict_in_warped_space:
std = np.sqrt(var)
@ -116,13 +113,9 @@ class WarpedGP(GP):
else:
wmean = mean
wvar = var
if self.scale_data:
pred = self._unscale_data(pred)
return wmean, wvar
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, likelihood=None, kern=None):
"""
Get the predictive quantiles around the prediction at X
@ -133,19 +126,38 @@ class WarpedGP(GP):
:returns: list of quantiles for each X and predictive quantiles for interval combination
:rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)]
"""
m, v = self._raw_predict(X, full_cov=False)
if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
#return [a, b]
if not self.predict_in_warped_space:
return [a, b]
#print a.shape
new_a = self.warping_function.f_inv(a)
new_b = self.warping_function.f_inv(b)
qs = super(WarpedGP, self).predict_quantiles(X, quantiles, Y_metadata=Y_metadata, likelihood=likelihood, kern=kern)
if self.predict_in_warped_space:
return [self.warping_function.f_inv(q) for q in qs]
return qs
#m, v = self._raw_predict(X, full_cov=False)
#if self.normalizer is not None:
# m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
#a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
#if not self.predict_in_warped_space:
# return [a, b]
#new_a = self.warping_function.f_inv(a)
#new_b = self.warping_function.f_inv(b)
#return [new_a, new_b]
return [new_a, new_b]
#return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
"""
Calculation of the log predictive density. Notice we add
the jacobian of the warping function here.
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param x_test: test locations (x_{*})
:type x_test: (Nx1) array
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param Y_metadata: metadata associated with the test points
"""
mu_star, var_star = self._raw_predict(x_test)
fy = self.warping_function.f(y_test)
ll_lpd = self.likelihood.log_predictive_density(fy, mu_star, var_star, Y_metadata=Y_metadata)
return ll_lpd + np.log(self.warping_function.fgrad_y(y_test))
if __name__ == '__main__':

View file

@ -50,8 +50,23 @@ def inject_plotting():
GP.plot_samples = gpy_plot.gp_plots.plot_samples
GP.plot = gpy_plot.gp_plots.plot
GP.plot_f = gpy_plot.gp_plots.plot_f
GP.plot_latent = gpy_plot.gp_plots.plot_f
GP.plot_noiseless = gpy_plot.gp_plots.plot_f
GP.plot_magnification = gpy_plot.latent_plots.plot_magnification
from ..models import StateSpace
StateSpace.plot_data = gpy_plot.data_plots.plot_data
StateSpace.plot_data_error = gpy_plot.data_plots.plot_data_error
StateSpace.plot_errorbars_trainset = gpy_plot.data_plots.plot_errorbars_trainset
StateSpace.plot_mean = gpy_plot.gp_plots.plot_mean
StateSpace.plot_confidence = gpy_plot.gp_plots.plot_confidence
StateSpace.plot_density = gpy_plot.gp_plots.plot_density
StateSpace.plot_samples = gpy_plot.gp_plots.plot_samples
StateSpace.plot = gpy_plot.gp_plots.plot
StateSpace.plot_f = gpy_plot.gp_plots.plot_f
StateSpace.plot_latent = gpy_plot.gp_plots.plot_f
StateSpace.plot_noiseless = gpy_plot.gp_plots.plot_f
from ..core import SparseGP
SparseGP.plot_inducing = gpy_plot.data_plots.plot_inducing

View file

@ -61,6 +61,8 @@ class AbstractPlottingLibrary(object):
"""
Get a new figure with nrows and ncolumns subplots.
Does not initialize the canvases yet.
There is individual kwargs for the individual plotting libraries to use.
"""
raise NotImplementedError("Implement all plot functions in AbstractPlottingLibrary in order to use your own plotting library")

View file

@ -158,7 +158,7 @@ def _plot_data_error(self, canvas, which_data_rows='all',
return plots
def plot_inducing(self, visible_dims=None, projection='2d', label='inducing', **plot_kwargs):
def plot_inducing(self, visible_dims=None, projection='2d', label='inducing', legend=True, **plot_kwargs):
"""
Plot the inducing inputs of a sparse gp model
@ -167,7 +167,7 @@ def plot_inducing(self, visible_dims=None, projection='2d', label='inducing', **
"""
canvas, kwargs = pl().new_canvas(projection=projection, **plot_kwargs)
plots = _plot_inducing(self, canvas, visible_dims, projection, label, **kwargs)
return pl().add_to_canvas(canvas, plots, legend=label is not None)
return pl().add_to_canvas(canvas, plots, legend=legend)
def _plot_inducing(self, canvas, visible_dims, projection, label, **plot_kwargs):
if visible_dims is None:
@ -175,7 +175,7 @@ def _plot_inducing(self, canvas, visible_dims, projection, label, **plot_kwargs)
visible_dims = [i for i in sig_dims if i is not None]
free_dims = get_free_dims(self, visible_dims, None)
Z = self.Z[:, free_dims]
Z = self.Z.values
plots = {}
#one dimensional plotting

View file

@ -112,26 +112,29 @@ def plot_latent_inducing(self,
which_indices=None,
legend=False,
plot_limits=None,
marker='^',
num_samples=1000,
marker=None,
projection='2d',
**kwargs):
"""
Plot a scatter plot of the inducing inputs.
:param array-like labels: a label for each data point (row) of the inputs
:param (int, int) which_indices: which input dimensions to plot against each other
:param [int] which_indices: which input dimensions to plot against each other
:param bool legend: whether to plot the legend on the figure
:param plot_limits: the plot limits for the plot
:type plot_limits: (xmin, xmax, ymin, ymax) or ((xmin, xmax), (ymin, ymax))
:param str marker: markers to use - cycle if more labels then markers are given
:param str marker: marker to use [default is custom arrow like]
:param kwargs: the kwargs for the scatter plots
:param str projection: for now 2d or 3d projection (other projections can be implemented, see developer documentation)
"""
canvas, projection, kwargs, sig_dims = _new_canvas(self, projection, kwargs, which_indices)
Z = self.Z.values
labels = np.array(['inducing'] * Z.shape[0])
scatters = _plot_latent_scatter(canvas, Z, sig_dims, labels, marker, num_samples, projection=projection, **kwargs)
if legend: label = 'inducing'
else: label = None
if marker is not None:
kwargs['marker'] = marker
update_not_existing_kwargs(kwargs, pl().defaults.inducing_2d) # @UndefinedVariable
from .data_plots import _plot_inducing
scatters = _plot_inducing(self, canvas, sig_dims[:2], projection, label, **kwargs)
return pl().add_to_canvas(canvas, dict(scatter=scatters), legend=legend)

View file

@ -31,6 +31,7 @@
import numpy as np
from scipy import sparse
import itertools
from ...models import WarpedGP
def in_ipynb():
try:
@ -190,6 +191,7 @@ def scatter_label_generator(labels, X, visible_dims, marker=None):
x = X[index, input_1]
y = X[index, input_2]
z = X[index, input_3]
yield x, y, z, this_label, index, m
def subsample_X(X, labels, num_samples=1000):
@ -294,6 +296,10 @@ def get_x_y_var(model):
Y = model.Y.values
except AttributeError:
Y = model.Y
if isinstance(model, WarpedGP) and not model.predict_in_warped_space:
Y = model.Y_normalized
if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
return X, X_variance, Y
@ -379,5 +385,5 @@ def x_frame2D(X,plot_limits=None,resolution=None):
resolution = resolution or 50
xx, yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xnew = np.vstack((xx.flatten(),yy.flatten())).T
Xnew = np.c_[xx.flat, yy.flat]
return Xnew, xx, yy, xmin, xmax

View file

@ -43,11 +43,11 @@ it gives back an empty default, when defaults are not defined.
'''
# Data plots:
data_1d = dict(lw=1.5, marker='x', edgecolor='k')
data_1d = dict(lw=1.5, marker='x', color='k')
data_2d = dict(s=35, edgecolors='none', linewidth=0., cmap=cm.get_cmap('hot'), alpha=.5)
inducing_1d = dict(lw=0, s=500, facecolors=Tango.colorsHex['darkRed'])
inducing_2d = dict(s=14, edgecolors='k', linewidth=.4, facecolors='white', alpha=.5, marker='^')
inducing_3d = dict(lw=.3, s=500, facecolors='white', edgecolors='k')
inducing_1d = dict(lw=0, s=500, color=Tango.colorsHex['darkRed'])
inducing_2d = dict(s=17, edgecolor='k', linewidth=.4, color='white', alpha=.5, marker='^')
inducing_3d = dict(lw=.3, s=500, color=Tango.colorsHex['darkRed'], edgecolor='k')
xerrorbar = dict(color='k', fmt='none', elinewidth=.5, alpha=.5)
yerrorbar = dict(color=Tango.colorsHex['darkRed'], fmt='none', elinewidth=.5, alpha=.5)
@ -71,5 +71,5 @@ ard = dict(edgecolor='k', linewidth=1.2)
latent = dict(aspect='auto', cmap='Greys', interpolation='bicubic')
gradient = dict(aspect='auto', cmap='RdBu', interpolation='nearest', alpha=.7)
magnification = dict(aspect='auto', cmap='Greys', interpolation='bicubic')
latent_scatter = dict(s=40, linewidth=.2, edgecolor='k', alpha=.9)
latent_scatter = dict(s=20, linewidth=.2, edgecolor='k', alpha=.9)
annotation = dict(fontdict=dict(family='sans-serif', weight='light', fontsize=9), zorder=.3, alpha=.7)

View file

@ -106,7 +106,7 @@ class MatplotlibPlots(AbstractPlottingLibrary):
return ax.plot(X, Y, color=color, zs=Z, label=label, **kwargs)
return ax.plot(X, Y, color=color, label=label, **kwargs)
def plot_axis_lines(self, ax, X, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs):
def plot_axis_lines(self, ax, X, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
from matplotlib import transforms
from matplotlib.path import Path
if 'marker' not in kwargs:
@ -126,14 +126,14 @@ class MatplotlibPlots(AbstractPlottingLibrary):
bottom=bottom, label=label, color=color,
**kwargs)
def xerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs):
def xerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
if not('linestyle' in kwargs or 'ls' in kwargs):
kwargs['ls'] = 'none'
#if Z is not None:
# return ax.errorbar(X, Y, Z, xerr=error, ecolor=color, label=label, **kwargs)
return ax.errorbar(X, Y, xerr=error, ecolor=color, label=label, **kwargs)
def yerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs):
def yerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
if not('linestyle' in kwargs or 'ls' in kwargs):
kwargs['ls'] = 'none'
#if Z is not None:

View file

@ -15,7 +15,9 @@ def plot(parameterized, fignum=None, ax=None, colors=None, figsize=(12, 6)):
if ax is None:
fig = pb.figure(num=fignum, figsize=figsize)
if colors is None:
colors = pb.gca()._get_lines.color_cycle
from ..Tango import mediumList
from itertools import cycle
colors = cycle(mediumList)
pb.clf()
else:
colors = iter(colors)
@ -32,7 +34,7 @@ def plot(parameterized, fignum=None, ax=None, colors=None, figsize=(12, 6)):
else:
raise ValueError("Need one ax per latent dimension input_dim")
bg_lines.append(a.plot(means, c='k', alpha=.3))
lines.extend(a.plot(x, means.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
lines.extend(a.plot(x, means.T[i], c=next(colors), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
fills.append(a.fill_between(x,
means.T[i] - 2 * np.sqrt(variances.T[i]),
means.T[i] + 2 * np.sqrt(variances.T[i]),
@ -64,7 +66,9 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_sid
else:
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1]))))
if colors is None:
colors = pb.gca()._get_lines.color_cycle
from ..Tango import mediumList
from itertools import cycle
colors = cycle(mediumList)
pb.clf()
else:
colors = iter(colors)
@ -82,7 +86,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_sid
# mean and variance plot
a = fig.add_subplot(*sub1)
a.plot(means, c='k', alpha=.3)
plots.extend(a.plot(x, means.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
plots.extend(a.plot(x, means.T[i], c=next(colors), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
a.fill_between(x,
means.T[i] - 2 * np.sqrt(variances.T[i]),
means.T[i] + 2 * np.sqrt(variances.T[i]),

View file

@ -131,14 +131,15 @@ class PlotlyPlots(AbstractPlottingLibrary):
#not matplotlib marker
pass
marker_kwargs = marker_kwargs or {}
marker_kwargs.setdefault('symbol', marker)
if 'symbol' not in marker_kwargs:
marker_kwargs['symbol'] = marker
if Z is not None:
return Scatter3d(x=X, y=Y, z=Z, mode='markers',
showlegend=label is not None,
marker=Marker(color=color, colorscale=cmap, **marker_kwargs),
name=label, **kwargs)
return Scatter(x=X, y=Y, mode='markers', showlegend=label is not None,
marker=Marker(color=color, colorscale=cmap, **marker_kwargs or {}),
marker=Marker(color=color, colorscale=cmap, **marker_kwargs),
name=label, **kwargs)
def plot(self, ax, X, Y, Z=None, color=None, label=None, line_kwargs=None, **kwargs):

Binary file not shown.

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.9 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.5 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.3 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.9 KiB

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show more