Merged __init__
|
|
@ -9,17 +9,16 @@ omit = ./GPy/testing/*.py, travis_tests.py, setup.py, ./GPy/__version__.py
|
||||||
exclude_lines =
|
exclude_lines =
|
||||||
# Have to re-enable the standard pragma
|
# Have to re-enable the standard pragma
|
||||||
pragma: no cover
|
pragma: no cover
|
||||||
|
verbose
|
||||||
|
|
||||||
# Don't complain about missing debug-only code:
|
# Don't complain about missing debug-only code:
|
||||||
if self\.debug
|
if self\.debug
|
||||||
|
|
||||||
# Don't complain if tests don't hit defensive assertion code:
|
# Don't complain if tests don't hit defensive assertion code:
|
||||||
raise AssertionError
|
raise
|
||||||
raise NotImplementedError
|
|
||||||
raise NotImplemented
|
|
||||||
except
|
except
|
||||||
pass
|
pass
|
||||||
|
Not implemented
|
||||||
|
|
||||||
# Don't complain if non-runnable code isn't run:
|
# Don't complain if non-runnable code isn't run:
|
||||||
if 0:
|
if 0:
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ install:
|
||||||
- pip install pypandoc
|
- pip install pypandoc
|
||||||
- pip install git+git://github.com/BRML/climin.git
|
- pip install git+git://github.com/BRML/climin.git
|
||||||
- pip install autograd
|
- pip install autograd
|
||||||
|
- pip install nose-show-skipped
|
||||||
- python setup.py develop
|
- python setup.py develop
|
||||||
|
|
||||||
script:
|
script:
|
||||||
|
|
@ -47,9 +48,11 @@ before_deploy:
|
||||||
- make html
|
- make html
|
||||||
- cd ../
|
- cd ../
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]];
|
- if [[ "$TRAVIS_OS_NAME" == "linux" ]];
|
||||||
then export DIST='sdist';
|
then
|
||||||
|
export DIST='sdist';
|
||||||
elif [[ "$TRAVIS_OS_NAME" == "osx" ]];
|
elif [[ "$TRAVIS_OS_NAME" == "osx" ]];
|
||||||
then export DIST='bdist_wheel';
|
then
|
||||||
|
export DIST='bdist_wheel';
|
||||||
fi;
|
fi;
|
||||||
|
|
||||||
deploy:
|
deploy:
|
||||||
|
|
@ -60,5 +63,6 @@ deploy:
|
||||||
on:
|
on:
|
||||||
tags: true
|
tags: true
|
||||||
branch: deploy
|
branch: deploy
|
||||||
|
#condition: "$TRAVIS_OS_NAME" == "osx" || ( "$TRAVIS_OS_NAME" == "linux" && "$PYTHON_VERSION" == "2.7" )
|
||||||
distributions: $DIST
|
distributions: $DIST
|
||||||
skip_cleanup: true
|
skip_cleanup: true
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
[GPy Authors](https://github.com/SheffieldML/GPy/graphs/contributors)
|
GPy Authors: https://github.com/SheffieldML/GPy/graphs/contributors
|
||||||
|
|
@ -1 +1 @@
|
||||||
__version__ = "1.0.2"
|
__version__ = "1.0.9"
|
||||||
|
|
|
||||||
|
|
@ -148,14 +148,16 @@ class GP(Model):
|
||||||
# LVM models
|
# LVM models
|
||||||
if isinstance(self.X, VariationalPosterior):
|
if isinstance(self.X, VariationalPosterior):
|
||||||
assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
|
assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
|
||||||
|
index = self.X._parent_index_
|
||||||
self.unlink_parameter(self.X)
|
self.unlink_parameter(self.X)
|
||||||
self.X = X
|
self.X = X
|
||||||
self.link_parameter(self.X)
|
self.link_parameter(self.X, index=index)
|
||||||
else:
|
else:
|
||||||
|
index = self.X._parent_index_
|
||||||
self.unlink_parameter(self.X)
|
self.unlink_parameter(self.X)
|
||||||
from ..core import Param
|
from ..core import Param
|
||||||
self.X = Param('latent mean',X)
|
self.X = Param('latent mean', X)
|
||||||
self.link_parameter(self.X)
|
self.link_parameter(self.X, index=index)
|
||||||
else:
|
else:
|
||||||
self.X = ObsAr(X)
|
self.X = ObsAr(X)
|
||||||
self.update_model(True)
|
self.update_model(True)
|
||||||
|
|
@ -437,15 +439,22 @@ class GP(Model):
|
||||||
warnings.warn("Wrong naming, use predict_wishart_embedding instead. Will be removed in future versions!", DeprecationWarning)
|
warnings.warn("Wrong naming, use predict_wishart_embedding instead. Will be removed in future versions!", DeprecationWarning)
|
||||||
return self.predict_wishart_embedding(Xnew, kern, mean, covariance)
|
return self.predict_wishart_embedding(Xnew, kern, mean, covariance)
|
||||||
|
|
||||||
def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True):
|
def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True, dimensions=None):
|
||||||
"""
|
"""
|
||||||
Predict the magnification factor as
|
Predict the magnification factor as
|
||||||
|
|
||||||
sqrt(det(G))
|
sqrt(det(G))
|
||||||
|
|
||||||
for each point N in Xnew
|
for each point N in Xnew.
|
||||||
|
|
||||||
|
:param bool mean: whether to include the mean of the wishart embedding.
|
||||||
|
:param bool covariance: whether to include the covariance of the wishart embedding.
|
||||||
|
:param array-like dimensions: which dimensions of the input space to use [defaults to self.get_most_significant_input_dimensions()[:2]]
|
||||||
"""
|
"""
|
||||||
G = self.predict_wishard_embedding(Xnew, kern, mean, covariance)
|
G = self.predict_wishard_embedding(Xnew, kern, mean, covariance)
|
||||||
|
if dimensions is None:
|
||||||
|
dimensions = self.get_most_significant_input_dimensions()[:2]
|
||||||
|
G = G[:, dimensions][:,:,dimensions]
|
||||||
from ..util.linalg import jitchol
|
from ..util.linalg import jitchol
|
||||||
mag = np.empty(Xnew.shape[0])
|
mag = np.empty(Xnew.shape[0])
|
||||||
for n in range(Xnew.shape[0]):
|
for n in range(Xnew.shape[0]):
|
||||||
|
|
@ -525,21 +534,23 @@ class GP(Model):
|
||||||
def get_most_significant_input_dimensions(self, which_indices=None):
|
def get_most_significant_input_dimensions(self, which_indices=None):
|
||||||
return self.kern.get_most_significant_input_dimensions(which_indices)
|
return self.kern.get_most_significant_input_dimensions(which_indices)
|
||||||
|
|
||||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs):
|
||||||
"""
|
"""
|
||||||
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
|
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
|
||||||
kwargs are passed to the optimizer. They can be:
|
kwargs are passed to the optimizer. They can be:
|
||||||
|
|
||||||
:param max_f_eval: maximum number of function evaluations
|
:param max_iters: maximum number of function evaluations
|
||||||
:type max_f_eval: int
|
:type max_iters: int
|
||||||
:messages: whether to display during optimisation
|
:messages: whether to display during optimisation
|
||||||
:type messages: bool
|
:type messages: bool
|
||||||
:param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'.
|
:param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'.
|
||||||
:type optimizer: string
|
:type optimizer: string
|
||||||
|
:param bool ipython_notebook: whether to use ipython notebook widgets or not.
|
||||||
|
:param bool clear_after_finish: if in ipython notebook, we can clear the widgets after optimization.
|
||||||
"""
|
"""
|
||||||
self.inference_method.on_optimization_start()
|
self.inference_method.on_optimization_start()
|
||||||
try:
|
try:
|
||||||
super(GP, self).optimize(optimizer, start, **kwargs)
|
super(GP, self).optimize(optimizer, start, messages, max_iters, ipython_notebook, clear_after_finish, **kwargs)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print("KeyboardInterrupt caught, calling on_optimization_end() to round things up")
|
print("KeyboardInterrupt caught, calling on_optimization_end() to round things up")
|
||||||
self.inference_method.on_optimization_end()
|
self.inference_method.on_optimization_end()
|
||||||
|
|
|
||||||
|
|
@ -340,7 +340,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
|
||||||
gtol=.05)
|
gtol=.05)
|
||||||
if plot:
|
if plot:
|
||||||
m.X.plot("BGPLVM Latent Space 1D")
|
m.X.plot("BGPLVM Latent Space 1D")
|
||||||
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
|
m.kern.plot_ARD()
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def gplvm_simulation(optimize=True, verbose=1,
|
def gplvm_simulation(optimize=True, verbose=1,
|
||||||
|
|
@ -364,7 +364,7 @@ def gplvm_simulation(optimize=True, verbose=1,
|
||||||
gtol=.05)
|
gtol=.05)
|
||||||
if plot:
|
if plot:
|
||||||
m.X.plot("BGPLVM Latent Space 1D")
|
m.X.plot("BGPLVM Latent Space 1D")
|
||||||
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
|
m.kern.plot_ARD()
|
||||||
return m
|
return m
|
||||||
def ssgplvm_simulation(optimize=True, verbose=1,
|
def ssgplvm_simulation(optimize=True, verbose=1,
|
||||||
plot=True, plot_sim=False,
|
plot=True, plot_sim=False,
|
||||||
|
|
@ -388,7 +388,7 @@ def ssgplvm_simulation(optimize=True, verbose=1,
|
||||||
gtol=.05)
|
gtol=.05)
|
||||||
if plot:
|
if plot:
|
||||||
m.X.plot("SSGPLVM Latent Space 1D")
|
m.X.plot("SSGPLVM Latent Space 1D")
|
||||||
m.kern.plot_ARD('SSGPLVM Simulation ARD Parameters')
|
m.kern.plot_ARD()
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
||||||
|
|
@ -418,7 +418,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
||||||
gtol=.05)
|
gtol=.05)
|
||||||
if plot:
|
if plot:
|
||||||
m.X.plot("BGPLVM Latent Space 1D")
|
m.X.plot("BGPLVM Latent Space 1D")
|
||||||
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
|
m.kern.plot_ARD()
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def bgplvm_simulation_missing_data_stochastics(optimize=True, verbose=1,
|
def bgplvm_simulation_missing_data_stochastics(optimize=True, verbose=1,
|
||||||
|
|
@ -448,7 +448,7 @@ def bgplvm_simulation_missing_data_stochastics(optimize=True, verbose=1,
|
||||||
gtol=.05)
|
gtol=.05)
|
||||||
if plot:
|
if plot:
|
||||||
m.X.plot("BGPLVM Latent Space 1D")
|
m.X.plot("BGPLVM Latent Space 1D")
|
||||||
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
|
m.kern.plot_ARD()
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -469,7 +469,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
||||||
m.optimize(messages=verbose, max_iters=8e3)
|
m.optimize(messages=verbose, max_iters=8e3)
|
||||||
if plot:
|
if plot:
|
||||||
m.X.plot("MRD Latent Space 1D")
|
m.X.plot("MRD Latent Space 1D")
|
||||||
m.plot_scales("MRD Scales")
|
m.plot_scales()
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
||||||
|
|
@ -496,7 +496,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
|
||||||
m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
|
m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
|
||||||
if plot:
|
if plot:
|
||||||
m.X.plot("MRD Latent Space 1D")
|
m.X.plot("MRD Latent Space 1D")
|
||||||
m.plot_scales("MRD Scales")
|
m.plot_scales()
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def brendan_faces(optimize=True, verbose=True, plot=True):
|
def brendan_faces(optimize=True, verbose=True, plot=True):
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,14 @@ class EPBase(object):
|
||||||
# TODO: update approximation in the end as well? Maybe even with a switch?
|
# TODO: update approximation in the end as well? Maybe even with a switch?
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
super(EPBase, self).__setstate__(state[0])
|
||||||
|
self.epsilon, self.eta, self.delta = state[1]
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
return [super(EPBase, self).__getstate__() , [self.epsilon, self.eta, self.delta]]
|
||||||
|
|
||||||
class EP(EPBase, ExactGaussianInference):
|
class EP(EPBase, ExactGaussianInference):
|
||||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None):
|
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None):
|
||||||
if self.always_reset:
|
if self.always_reset:
|
||||||
|
|
@ -51,7 +59,7 @@ class EP(EPBase, ExactGaussianInference):
|
||||||
if K is None:
|
if K is None:
|
||||||
K = kern.K(X)
|
K = kern.K(X)
|
||||||
|
|
||||||
if self._ep_approximation is None:
|
if getattr(self, '_ep_approximation', None) is None:
|
||||||
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
|
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
|
||||||
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
||||||
else:
|
else:
|
||||||
|
|
@ -159,7 +167,7 @@ class EPDTC(EPBase, VarDTC):
|
||||||
else:
|
else:
|
||||||
Kmn = psi1.T
|
Kmn = psi1.T
|
||||||
|
|
||||||
if self._ep_approximation is None:
|
if getattr(self, '_ep_approximation', None) is None:
|
||||||
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
|
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
|
||||||
else:
|
else:
|
||||||
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation
|
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from .src.add import Add
|
||||||
from .src.prod import Prod
|
from .src.prod import Prod
|
||||||
from .src.rbf import RBF
|
from .src.rbf import RBF
|
||||||
from .src.linear import Linear, LinearFull
|
from .src.linear import Linear, LinearFull
|
||||||
from .src.static import Bias, White, Fixed, WhiteHeteroscedastic
|
from .src.static import Bias, White, Fixed, WhiteHeteroscedastic, Precomputed
|
||||||
from .src.brownian import Brownian
|
from .src.brownian import Brownian
|
||||||
from .src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
from .src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
||||||
from .src.mlp import MLP
|
from .src.mlp import MLP
|
||||||
|
|
@ -27,6 +27,7 @@ from .src.eq_ode2 import EQ_ODE2
|
||||||
from .src.integral import Integral
|
from .src.integral import Integral
|
||||||
from .src.integral_limits import Integral_Limits
|
from .src.integral_limits import Integral_Limits
|
||||||
from .src.multidimensional_integral_limits import Multidimensional_Integral_Limits
|
from .src.multidimensional_integral_limits import Multidimensional_Integral_Limits
|
||||||
|
from .src.eq_ode1 import EQ_ODE1
|
||||||
from .src.trunclinear import TruncLinear,TruncLinear_inf
|
from .src.trunclinear import TruncLinear,TruncLinear_inf
|
||||||
from .src.splitKern import SplitKern,DEtime
|
from .src.splitKern import SplitKern,DEtime
|
||||||
from .src.splitKern import DEtime as DiffGenomeKern
|
from .src.splitKern import DEtime as DiffGenomeKern
|
||||||
|
|
|
||||||
649
GPy/kern/src/eq_ode1.py
Normal file
|
|
@ -0,0 +1,649 @@
|
||||||
|
# Copyright (c) 2014, Cristian Guarnizo.
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy.special import erf, erfcx
|
||||||
|
from .kern import Kern
|
||||||
|
from ...core.parameterization import Param
|
||||||
|
from paramz.transformations import Logexp
|
||||||
|
from paramz.caching import Cache_this
|
||||||
|
|
||||||
|
class EQ_ODE1(Kern):
|
||||||
|
"""
|
||||||
|
Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
|
||||||
|
|
||||||
|
This outputs of this kernel have the form
|
||||||
|
.. math::
|
||||||
|
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} u_i(t-\delta_j) - d_jy_j(t)
|
||||||
|
|
||||||
|
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`u_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
|
||||||
|
|
||||||
|
:param output_dim: number of outputs driven by latent function.
|
||||||
|
:type output_dim: int
|
||||||
|
:param W: sensitivities of each output to the latent driving function.
|
||||||
|
:type W: ndarray (output_dim x rank).
|
||||||
|
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
|
||||||
|
:type rank: int
|
||||||
|
:param decay: decay rates for the first order system.
|
||||||
|
:type decay: array of length output_dim.
|
||||||
|
:param delay: delay between latent force and output response.
|
||||||
|
:type delay: array of length output_dim.
|
||||||
|
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
|
||||||
|
:type kappa: array of length output_dim.
|
||||||
|
|
||||||
|
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
|
||||||
|
"""
|
||||||
|
def __init__(self, input_dim=2, output_dim=1, rank=1, W = None, lengthscale=None, decay=None, active_dims=None, name='eq_ode1'):
|
||||||
|
assert input_dim == 2, "only defined for 1 input dims"
|
||||||
|
super(EQ_ODE1, self).__init__(input_dim=input_dim, active_dims=active_dims, name=name)
|
||||||
|
|
||||||
|
self.rank = rank
|
||||||
|
self.output_dim = output_dim
|
||||||
|
|
||||||
|
if lengthscale is None:
|
||||||
|
lengthscale = .5 + np.random.rand(self.rank)
|
||||||
|
else:
|
||||||
|
lengthscale = np.asarray(lengthscale)
|
||||||
|
assert lengthscale.size in [1, self.rank], "Bad number of lengthscales"
|
||||||
|
if lengthscale.size != self.rank:
|
||||||
|
lengthscale = np.ones(self.rank)*lengthscale
|
||||||
|
|
||||||
|
if W is None:
|
||||||
|
W = .5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
|
||||||
|
else:
|
||||||
|
assert W.shape == (self.output_dim, self.rank)
|
||||||
|
|
||||||
|
if decay is None:
|
||||||
|
decay = np.ones(self.output_dim)
|
||||||
|
else:
|
||||||
|
decay = np.asarray(decay)
|
||||||
|
assert decay.size in [1, self.output_dim], "Bad number of decay"
|
||||||
|
if decay.size != self.output_dim:
|
||||||
|
decay = np.ones(self.output_dim)*decay
|
||||||
|
|
||||||
|
# if kappa is None:
|
||||||
|
# self.kappa = np.ones(self.output_dim)
|
||||||
|
# else:
|
||||||
|
# kappa = np.asarray(kappa)
|
||||||
|
# assert kappa.size in [1, self.output_dim], "Bad number of kappa"
|
||||||
|
# if decay.size != self.output_dim:
|
||||||
|
# decay = np.ones(self.output_dim)*kappa
|
||||||
|
|
||||||
|
#self.kappa = Param('kappa', kappa, Logexp())
|
||||||
|
#self.delay = Param('delay', delay, Logexp())
|
||||||
|
#self.is_normalized = True
|
||||||
|
#self.is_stationary = False
|
||||||
|
#self.gaussian_initial = False
|
||||||
|
|
||||||
|
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
|
||||||
|
self.decay = Param('decay', decay, Logexp())
|
||||||
|
self.W = Param('W', W)
|
||||||
|
self.link_parameters(self.lengthscale, self.decay, self.W)
|
||||||
|
|
||||||
|
@Cache_this(limit=3)
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
#This way is not working, indexes are lost after using k._slice_X
|
||||||
|
#index = np.asarray(X, dtype=np.int)
|
||||||
|
#index = index.reshape(index.size,)
|
||||||
|
if hasattr(X, 'values'):
|
||||||
|
X = X.values
|
||||||
|
index = np.int_(np.round(X[:, 1]))
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
X_flag = index[0] >= self.output_dim
|
||||||
|
if X2 is None:
|
||||||
|
if X_flag:
|
||||||
|
#Calculate covariance function for the latent functions
|
||||||
|
index -= self.output_dim
|
||||||
|
return self._Kuu(X, index)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
else:
|
||||||
|
#This way is not working, indexes are lost after using k._slice_X
|
||||||
|
#index2 = np.asarray(X2, dtype=np.int)
|
||||||
|
#index2 = index2.reshape(index2.size,)
|
||||||
|
if hasattr(X2, 'values'):
|
||||||
|
X2 = X2.values
|
||||||
|
index2 = np.int_(np.round(X2[:, 1]))
|
||||||
|
index2 = index2.reshape(index2.size,)
|
||||||
|
X2_flag = index2[0] >= self.output_dim
|
||||||
|
#Calculate cross-covariance function
|
||||||
|
if not X_flag and X2_flag:
|
||||||
|
index2 -= self.output_dim
|
||||||
|
return self._Kfu(X, index, X2, index2) #Kfu
|
||||||
|
elif X_flag and not X2_flag:
|
||||||
|
index -= self.output_dim
|
||||||
|
return self._Kfu(X2, index2, X, index).T #Kuf
|
||||||
|
elif X_flag and X2_flag:
|
||||||
|
index -= self.output_dim
|
||||||
|
index2 -= self.output_dim
|
||||||
|
return self._Kusu(X, index, X2, index2) #Ku_s u
|
||||||
|
else:
|
||||||
|
raise NotImplementedError #Kf_s f
|
||||||
|
|
||||||
|
#Calculate the covariance function for diag(Kff(X,X))
|
||||||
|
def Kdiag(self, X):
|
||||||
|
if hasattr(X, 'values'):
|
||||||
|
index = np.int_(np.round(X[:, 1].values))
|
||||||
|
else:
|
||||||
|
index = np.int_(np.round(X[:, 1]))
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
X_flag = index[0] >= self.output_dim
|
||||||
|
|
||||||
|
if X_flag: #Kuudiag
|
||||||
|
return np.ones(X[:,0].shape)
|
||||||
|
else: #Kffdiag
|
||||||
|
kdiag = self._Kdiag(X)
|
||||||
|
return np.sum(kdiag, axis=1)
|
||||||
|
|
||||||
|
def _Kdiag(self, X):
|
||||||
|
#This way is not working, indexes are lost after using k._slice_X
|
||||||
|
#index = np.asarray(X, dtype=np.int)
|
||||||
|
#index = index.reshape(index.size,)
|
||||||
|
if hasattr(X, 'values'):
|
||||||
|
X = X.values
|
||||||
|
index = np.int_(X[:, 1])
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
|
||||||
|
#terms that move along t
|
||||||
|
t = X[:, 0].reshape(X.shape[0], 1)
|
||||||
|
d = np.unique(index) #Output Indexes
|
||||||
|
B = self.decay.values[d]
|
||||||
|
S = self.W.values[d, :]
|
||||||
|
#Index transformation
|
||||||
|
indd = np.arange(self.output_dim)
|
||||||
|
indd[d] = np.arange(d.size)
|
||||||
|
index = indd[index]
|
||||||
|
|
||||||
|
B = B.reshape(B.size, 1)
|
||||||
|
#Terms that move along q
|
||||||
|
lq = self.lengthscale.values.reshape(1, self.rank)
|
||||||
|
S2 = S*S
|
||||||
|
kdiag = np.empty((t.size, ))
|
||||||
|
|
||||||
|
#Dx1 terms
|
||||||
|
c0 = (S2/B)*((.5*np.sqrt(np.pi))*lq)
|
||||||
|
|
||||||
|
#DxQ terms
|
||||||
|
nu = lq*(B*.5)
|
||||||
|
nu2 = nu*nu
|
||||||
|
#Nx1 terms
|
||||||
|
gamt = -2.*B
|
||||||
|
gamt = gamt[index]*t
|
||||||
|
|
||||||
|
#NxQ terms
|
||||||
|
t_lq = t/lq
|
||||||
|
|
||||||
|
# Upsilon Calculations
|
||||||
|
# Using wofz
|
||||||
|
#erfnu = erf(nu)
|
||||||
|
|
||||||
|
upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :] ,t_lq+nu[index,:] ))
|
||||||
|
upm[t[:, 0] == 0, :] = 0.
|
||||||
|
|
||||||
|
|
||||||
|
upv = np.exp(nu2[index, :] + gamt + lnDifErf( -t_lq+nu[index,:], nu[index, :] ) )
|
||||||
|
upv[t[:, 0] == 0, :] = 0.
|
||||||
|
|
||||||
|
#Covariance calculation
|
||||||
|
#kdiag = np.sum(c0[index, :]*(upm-upv), axis=1)
|
||||||
|
kdiag = c0[index, :]*(upm-upv)
|
||||||
|
return kdiag
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2 = None):
|
||||||
|
#index = np.asarray(X, dtype=np.int)
|
||||||
|
#index = index.reshape(index.size,)
|
||||||
|
if hasattr(X, 'values'):
|
||||||
|
X = X.values
|
||||||
|
self.decay.gradient = np.zeros(self.decay.shape)
|
||||||
|
self.W.gradient = np.zeros(self.W.shape)
|
||||||
|
self.lengthscale.gradient = np.zeros(self.lengthscale.shape)
|
||||||
|
index = np.int_(np.round(X[:, 1]))
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
X_flag = index[0] >= self.output_dim
|
||||||
|
if X2 is None:
|
||||||
|
if X_flag: #Kuu or Kmm
|
||||||
|
index -= self.output_dim
|
||||||
|
tmp = dL_dK*self._gkuu_lq(X, index)
|
||||||
|
for q in np.unique(index):
|
||||||
|
ind = np.where(index == q)
|
||||||
|
self.lengthscale.gradient[q] = tmp[np.ix_(ind[0], ind[0])].sum()
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
else: #Kfu or Knm
|
||||||
|
#index2 = np.asarray(X2, dtype=np.int)
|
||||||
|
#index2 = index2.reshape(index2.size,)
|
||||||
|
if hasattr(X2, 'values'):
|
||||||
|
X2 = X2.values
|
||||||
|
index2 = np.int_(np.round(X2[:, 1]))
|
||||||
|
index2 = index2.reshape(index2.size,)
|
||||||
|
X2_flag = index2[0] >= self.output_dim
|
||||||
|
if not X_flag and X2_flag: #Kfu
|
||||||
|
index2 -= self.output_dim
|
||||||
|
else: #Kuf
|
||||||
|
dL_dK = dL_dK.T #so we obtaing dL_Kfu
|
||||||
|
indtemp = index - self.output_dim
|
||||||
|
Xtemp = X
|
||||||
|
X = X2
|
||||||
|
X2 = Xtemp
|
||||||
|
index = index2
|
||||||
|
index2 = indtemp
|
||||||
|
glq, gSdq, gB = self._gkfu(X, index, X2, index2)
|
||||||
|
tmp = dL_dK*glq
|
||||||
|
for q in np.unique(index2):
|
||||||
|
ind = np.where(index2 == q)
|
||||||
|
self.lengthscale.gradient[q] = tmp[:, ind].sum()
|
||||||
|
tmpB = dL_dK*gB
|
||||||
|
tmp = dL_dK*gSdq
|
||||||
|
for d in np.unique(index):
|
||||||
|
ind = np.where(index == d)
|
||||||
|
self.decay.gradient[d] = tmpB[ind, :].sum()
|
||||||
|
for q in np.unique(index2):
|
||||||
|
ind2 = np.where(index2 == q)
|
||||||
|
self.W.gradient[d, q] = tmp[np.ix_(ind[0], ind2[0])].sum()
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
#index = np.asarray(X, dtype=np.int)
|
||||||
|
#index = index.reshape(index.size,)
|
||||||
|
if hasattr(X, 'values'):
|
||||||
|
X = X.values
|
||||||
|
self.decay.gradient = np.zeros(self.decay.shape)
|
||||||
|
self.W.gradient = np.zeros(self.W.shape)
|
||||||
|
self.lengthscale.gradient = np.zeros(self.lengthscale.shape)
|
||||||
|
index = np.int_(X[:, 1])
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
|
||||||
|
glq, gS, gB = self._gkdiag(X, index)
|
||||||
|
if dL_dKdiag.size == X.shape[0]:
|
||||||
|
dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1))
|
||||||
|
tmp = dL_dKdiag*glq
|
||||||
|
self.lengthscale.gradient = tmp.sum(0)
|
||||||
|
tmpB = dL_dKdiag*gB
|
||||||
|
tmp = dL_dKdiag*gS
|
||||||
|
for d in np.unique(index):
|
||||||
|
ind = np.where(index == d)
|
||||||
|
self.decay.gradient[d] = tmpB[ind, :].sum()
|
||||||
|
self.W.gradient[d, :] = tmp[ind].sum(0)
|
||||||
|
|
||||||
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
|
#index = np.asarray(X, dtype=np.int)
|
||||||
|
#index = index.reshape(index.size,)
|
||||||
|
if hasattr(X, 'values'):
|
||||||
|
X = X.values
|
||||||
|
index = np.int_(np.round(X[:, 1]))
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
X_flag = index[0] >= self.output_dim
|
||||||
|
#If input_dim == 1, use this
|
||||||
|
#gX = np.zeros((X.shape[0], 1))
|
||||||
|
#Cheat to allow gradient for input_dim==2
|
||||||
|
gX = np.zeros(X.shape)
|
||||||
|
if X2 is None: #Kuu or Kmm
|
||||||
|
if X_flag:
|
||||||
|
index -= self.output_dim
|
||||||
|
gX[:, 0] = 2.*(dL_dK*self._gkuu_X(X, index)).sum(0)
|
||||||
|
return gX
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
else: #Kuf or Kmn
|
||||||
|
#index2 = np.asarray(X2, dtype=np.int)
|
||||||
|
#index2 = index2.reshape(index2.size,)
|
||||||
|
if hasattr(X2, 'values'):
|
||||||
|
X2 = X2.values
|
||||||
|
index2 = np.int_(np.round(X2[:, 1]))
|
||||||
|
index2 = index2.reshape(index2.size,)
|
||||||
|
X2_flag = index2[0] >= self.output_dim
|
||||||
|
if X_flag and not X2_flag: #gradient of Kuf(Z, X) wrt Z
|
||||||
|
index -= self.output_dim
|
||||||
|
gX[:, 0] = (dL_dK*self._gkfu_z(X2, index2, X, index).T).sum(1)
|
||||||
|
return gX
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
#---------------------------------------#
|
||||||
|
# Helper functions #
|
||||||
|
#---------------------------------------#
|
||||||
|
|
||||||
|
#Evaluation of squared exponential for LFM
|
||||||
|
def _Kuu(self, X, index):
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
t = X[:, 0].reshape(X.shape[0],)
|
||||||
|
lq = self.lengthscale.values.reshape(self.rank,)
|
||||||
|
lq2 = lq*lq
|
||||||
|
#Covariance matrix initialization
|
||||||
|
kuu = np.zeros((t.size, t.size))
|
||||||
|
#Assign 1. to diagonal terms
|
||||||
|
kuu[np.diag_indices(t.size)] = 1.
|
||||||
|
#Upper triangular indices
|
||||||
|
indtri1, indtri2 = np.triu_indices(t.size, 1)
|
||||||
|
#Block Diagonal indices among Upper Triangular indices
|
||||||
|
ind = np.where(index[indtri1] == index[indtri2])
|
||||||
|
indr = indtri1[ind]
|
||||||
|
indc = indtri2[ind]
|
||||||
|
r = t[indr] - t[indc]
|
||||||
|
r2 = r*r
|
||||||
|
#Calculation of covariance function
|
||||||
|
kuu[indr, indc] = np.exp(-r2/lq2[index[indr]])
|
||||||
|
#Completion of lower triangular part
|
||||||
|
kuu[indc, indr] = kuu[indr, indc]
|
||||||
|
return kuu
|
||||||
|
|
||||||
|
def _Kusu(self, X, index, X2, index2):
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
index2 = index2.reshape(index2.size,)
|
||||||
|
t = X[:, 0].reshape(X.shape[0],1)
|
||||||
|
t2 = X2[:, 0].reshape(1,X2.shape[0])
|
||||||
|
lq = self.lengthscale.values.reshape(self.rank,)
|
||||||
|
#Covariance matrix initialization
|
||||||
|
kuu = np.zeros((t.size, t2.size))
|
||||||
|
for q in range(self.rank):
|
||||||
|
ind1 = index == q
|
||||||
|
ind2 = index2 == q
|
||||||
|
r = t[ind1]/lq[q] - t2[0,ind2]/lq[q]
|
||||||
|
r2 = r*r
|
||||||
|
#Calculation of covariance function
|
||||||
|
kuu[np.ix_(ind1, ind2)] = np.exp(-r2)
|
||||||
|
return kuu
|
||||||
|
|
||||||
|
#Evaluation of cross-covariance function
|
||||||
|
def _Kfu(self, X, index, X2, index2):
|
||||||
|
#terms that move along t
|
||||||
|
t = X[:, 0].reshape(X.shape[0], 1)
|
||||||
|
d = np.unique(index) #Output Indexes
|
||||||
|
B = self.decay.values[d]
|
||||||
|
S = self.W.values[d, :]
|
||||||
|
#Index transformation
|
||||||
|
indd = np.arange(self.output_dim)
|
||||||
|
indd[d] = np.arange(d.size)
|
||||||
|
index = indd[index]
|
||||||
|
#Output related variables must be column-wise
|
||||||
|
B = B.reshape(B.size, 1)
|
||||||
|
#Input related variables must be row-wise
|
||||||
|
z = X2[:, 0].reshape(1, X2.shape[0])
|
||||||
|
lq = self.lengthscale.values.reshape((1, self.rank))
|
||||||
|
|
||||||
|
kfu = np.empty((t.size, z.size))
|
||||||
|
|
||||||
|
#DxQ terms
|
||||||
|
c0 = S*((.5*np.sqrt(np.pi))*lq)
|
||||||
|
nu = B*(.5*lq)
|
||||||
|
nu2 = nu**2
|
||||||
|
#1xM terms
|
||||||
|
z_lq = z/lq[0, index2]
|
||||||
|
#NxM terms
|
||||||
|
tz = t-z
|
||||||
|
tz_lq = tz/lq[0, index2]
|
||||||
|
|
||||||
|
# Upsilon Calculations
|
||||||
|
fullind = np.ix_(index, index2)
|
||||||
|
|
||||||
|
upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind]))
|
||||||
|
upsi[t[:, 0] == 0, :] = 0.
|
||||||
|
#Covariance calculation
|
||||||
|
kfu = c0[fullind]*upsi
|
||||||
|
|
||||||
|
return kfu
|
||||||
|
|
||||||
|
#Gradient of Kuu wrt lengthscale
|
||||||
|
def _gkuu_lq(self, X, index):
|
||||||
|
t = X[:, 0].reshape(X.shape[0],)
|
||||||
|
index = index.reshape(X.shape[0],)
|
||||||
|
lq = self.lengthscale.values.reshape(self.rank,)
|
||||||
|
lq2 = lq*lq
|
||||||
|
#Covariance matrix initialization
|
||||||
|
glq = np.zeros((t.size, t.size))
|
||||||
|
#Upper triangular indices
|
||||||
|
indtri1, indtri2 = np.triu_indices(t.size, 1)
|
||||||
|
#Block Diagonal indices among Upper Triangular indices
|
||||||
|
ind = np.where(index[indtri1] == index[indtri2])
|
||||||
|
indr = indtri1[ind]
|
||||||
|
indc = indtri2[ind]
|
||||||
|
r = t[indr] - t[indc]
|
||||||
|
r2 = r*r
|
||||||
|
r2_lq2 = r2/lq2[index[indr]]
|
||||||
|
#Calculation of covariance function
|
||||||
|
er2_lq2 = np.exp(-r2_lq2)
|
||||||
|
#Gradient wrt lq
|
||||||
|
c = 2.*r2_lq2/lq[index[indr]]
|
||||||
|
glq[indr, indc] = er2_lq2*c
|
||||||
|
#Complete the lower triangular
|
||||||
|
glq[indc, indr] = glq[indr, indc]
|
||||||
|
return glq
|
||||||
|
|
||||||
|
#Be careful this derivative should be transpose it
|
||||||
|
def _gkuu_X(self, X, index): #Diagonal terms are always zero
|
||||||
|
t = X[:, 0].reshape(X.shape[0],)
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
lq = self.lengthscale.values.reshape(self.rank,)
|
||||||
|
lq2 = lq*lq
|
||||||
|
#Covariance matrix initialization
|
||||||
|
gt = np.zeros((t.size, t.size))
|
||||||
|
#Upper triangular indices
|
||||||
|
indtri1, indtri2 = np.triu_indices(t.size, 1) #Offset of 1 from the diagonal
|
||||||
|
#Block Diagonal indices among Upper Triangular indices
|
||||||
|
ind = np.where(index[indtri1] == index[indtri2])
|
||||||
|
indr = indtri1[ind]
|
||||||
|
indc = indtri2[ind]
|
||||||
|
r = t[indr] - t[indc]
|
||||||
|
r2 = r*r
|
||||||
|
r2_lq2 = r2/(-lq2[index[indr]])
|
||||||
|
#Calculation of covariance function
|
||||||
|
er2_lq2 = np.exp(r2_lq2)
|
||||||
|
#Gradient wrt t
|
||||||
|
c = 2.*r/lq2[index[indr]]
|
||||||
|
gt[indr, indc] = er2_lq2*c
|
||||||
|
#Complete the lower triangular
|
||||||
|
gt[indc, indr] = -gt[indr, indc]
|
||||||
|
return gt
|
||||||
|
|
||||||
|
#Gradients for Diagonal Kff
|
||||||
|
def _gkdiag(self, X, index):
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
#terms that move along t
|
||||||
|
d = np.unique(index)
|
||||||
|
B = self.decay[d].values
|
||||||
|
S = self.W[d, :].values
|
||||||
|
#Index transformation
|
||||||
|
indd = np.arange(self.output_dim)
|
||||||
|
indd[d] = np.arange(d.size)
|
||||||
|
index = indd[index]
|
||||||
|
#Output related variables must be column-wise
|
||||||
|
t = X[:, 0].reshape(X.shape[0], 1)
|
||||||
|
B = B.reshape(B.size, 1)
|
||||||
|
S2 = S*S
|
||||||
|
|
||||||
|
#Input related variables must be row-wise
|
||||||
|
lq = self.lengthscale.values.reshape(1, self.rank)
|
||||||
|
|
||||||
|
gB = np.empty((t.size,))
|
||||||
|
glq = np.empty((t.size, lq.size))
|
||||||
|
gS = np.empty((t.size, lq.size))
|
||||||
|
|
||||||
|
#Dx1 terms
|
||||||
|
c0 = S2*lq*np.sqrt(np.pi)
|
||||||
|
|
||||||
|
#DxQ terms
|
||||||
|
nu = (.5*lq)*B
|
||||||
|
nu2 = nu*nu
|
||||||
|
|
||||||
|
#Nx1 terms
|
||||||
|
gamt = -B[index]*t
|
||||||
|
egamt = np.exp(gamt)
|
||||||
|
e2gamt = egamt*egamt
|
||||||
|
|
||||||
|
#NxQ terms
|
||||||
|
t_lq = t/lq
|
||||||
|
t2_lq2 = -t_lq*t_lq
|
||||||
|
|
||||||
|
etlq2gamt = np.exp(t2_lq2 + gamt) #NXQ
|
||||||
|
|
||||||
|
##Upsilon calculations
|
||||||
|
#erfnu = erf(nu) #TODO: This can be improved
|
||||||
|
|
||||||
|
upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :], t_lq + nu[index, :]) )
|
||||||
|
upm[t[:, 0] == 0, :] = 0.
|
||||||
|
|
||||||
|
upv = np.exp(nu2[index, :] + 2.*gamt + lnDifErf(-t_lq + nu[index, :], nu[index, :]) ) #egamt*upv
|
||||||
|
upv[t[:, 0] == 0, :] = 0.
|
||||||
|
|
||||||
|
#Gradient wrt S
|
||||||
|
c0_S = (S/B)*(lq*np.sqrt(np.pi))
|
||||||
|
|
||||||
|
gS = c0_S[index]*(upm - upv)
|
||||||
|
|
||||||
|
#For B
|
||||||
|
CB1 = (.5*lq)**2 - .5/B**2 #DXQ
|
||||||
|
lq2_2B = (.5*lq**2)*(S2/B) #DXQ
|
||||||
|
CB2 = 2.*etlq2gamt - e2gamt - 1. #NxQ
|
||||||
|
|
||||||
|
# gradient wrt B NxZ
|
||||||
|
gB = c0[index, :]*(CB1[index, :]*upm - (CB1[index, :] - t/B[index])*upv) + \
|
||||||
|
lq2_2B[index, :]*CB2
|
||||||
|
|
||||||
|
#Gradient wrt lengthscale
|
||||||
|
#DxQ terms
|
||||||
|
c0 = (.5*np.sqrt(np.pi))*(S2/B)*(1.+.5*(lq*B)**2)
|
||||||
|
Clq1 = S2*(lq*.5)
|
||||||
|
glq = c0[index]*(upm - upv) + Clq1[index]*CB2
|
||||||
|
|
||||||
|
return glq, gS, gB
|
||||||
|
|
||||||
|
def _gkfu(self, X, index, Z, index2):
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
#TODO: reduce memory usage
|
||||||
|
#terms that move along t
|
||||||
|
d = np.unique(index)
|
||||||
|
B = self.decay[d].values
|
||||||
|
S = self.W[d, :].values
|
||||||
|
|
||||||
|
#Index transformation
|
||||||
|
indd = np.arange(self.output_dim)
|
||||||
|
indd[d] = np.arange(d.size)
|
||||||
|
index = indd[index]
|
||||||
|
#t column
|
||||||
|
t = X[:, 0].reshape(X.shape[0], 1)
|
||||||
|
B = B.reshape(B.size, 1)
|
||||||
|
#z row
|
||||||
|
z = Z[:, 0].reshape(1, Z.shape[0])
|
||||||
|
index2 = index2.reshape(index2.size,)
|
||||||
|
lq = self.lengthscale.values.reshape((1, self.rank))
|
||||||
|
|
||||||
|
#kfu = np.empty((t.size, z.size))
|
||||||
|
glq = np.empty((t.size, z.size))
|
||||||
|
gSdq = np.empty((t.size, z.size))
|
||||||
|
gB = np.empty((t.size, z.size))
|
||||||
|
|
||||||
|
#Dx1 terms
|
||||||
|
B_2 = B*.5
|
||||||
|
S_pi = S*(.5*np.sqrt(np.pi))
|
||||||
|
#DxQ terms
|
||||||
|
c0 = S_pi*lq #lq*Sdq*sqrt(pi)
|
||||||
|
nu = B*lq*.5
|
||||||
|
nu2 = nu*nu
|
||||||
|
|
||||||
|
#1xM terms
|
||||||
|
z_lq = z/lq[0, index2]
|
||||||
|
|
||||||
|
#NxM terms
|
||||||
|
tz = t-z
|
||||||
|
tz_lq = tz/lq[0, index2]
|
||||||
|
etz_lq2 = -np.exp(-tz_lq*tz_lq)
|
||||||
|
ez_lq_Bt = np.exp(-z_lq*z_lq -B[index]*t)
|
||||||
|
|
||||||
|
# Upsilon calculations
|
||||||
|
fullind = np.ix_(index, index2)
|
||||||
|
upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind] ) )
|
||||||
|
upsi[t[:, 0] == 0., :] = 0.
|
||||||
|
|
||||||
|
#Gradient wrt S
|
||||||
|
#DxQ term
|
||||||
|
Sa1 = lq*(.5*np.sqrt(np.pi))
|
||||||
|
|
||||||
|
gSdq = Sa1[0,index2]*upsi
|
||||||
|
|
||||||
|
#Gradient wrt lq
|
||||||
|
la1 = S_pi*(1. + 2.*nu2)
|
||||||
|
Slq = S*lq
|
||||||
|
uplq = etz_lq2*(tz_lq/lq[0, index2] + B_2[index])
|
||||||
|
uplq += ez_lq_Bt*(-z_lq/lq[0, index2] + B_2[index])
|
||||||
|
|
||||||
|
glq = la1[fullind]*upsi
|
||||||
|
glq += Slq[fullind]*uplq
|
||||||
|
|
||||||
|
#Gradient wrt B
|
||||||
|
Slq = Slq*lq
|
||||||
|
nulq = nu*lq
|
||||||
|
upBd = etz_lq2 + ez_lq_Bt
|
||||||
|
gB = c0[fullind]*(nulq[fullind] - tz)*upsi + .5*Slq[fullind]*upBd
|
||||||
|
|
||||||
|
return glq, gSdq, gB
|
||||||
|
|
||||||
|
#TODO: reduce memory usage
|
||||||
|
def _gkfu_z(self, X, index, Z, index2): #Kfu(t,z)
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
#terms that move along t
|
||||||
|
d = np.unique(index)
|
||||||
|
B = self.decay[d].values
|
||||||
|
S = self.W[d, :].values
|
||||||
|
#Index transformation
|
||||||
|
indd = np.arange(self.output_dim)
|
||||||
|
indd[d] = np.arange(d.size)
|
||||||
|
index = indd[index]
|
||||||
|
|
||||||
|
#t column
|
||||||
|
t = X[:, 0].reshape(X.shape[0], 1)
|
||||||
|
B = B.reshape(B.size, 1)
|
||||||
|
#z row
|
||||||
|
z = Z[:, 0].reshape(1, Z.shape[0])
|
||||||
|
index2 = index2.reshape(index2.size,)
|
||||||
|
lq = self.lengthscale.values.reshape((1, self.rank))
|
||||||
|
|
||||||
|
#kfu = np.empty((t.size, z.size))
|
||||||
|
gz = np.empty((t.size, z.size))
|
||||||
|
|
||||||
|
#Dx1 terms
|
||||||
|
S_pi =S*(.5*np.sqrt(np.pi))
|
||||||
|
#DxQ terms
|
||||||
|
#Slq = S*lq
|
||||||
|
c0 = S_pi*lq #lq*Sdq*sqrt(pi)
|
||||||
|
nu = (.5*lq)*B
|
||||||
|
nu2 = nu*nu
|
||||||
|
|
||||||
|
#1xM terms
|
||||||
|
z_lq = z/lq[0, index2]
|
||||||
|
z_lq2 = -z_lq*z_lq
|
||||||
|
#NxQ terms
|
||||||
|
t_lq = t/lq
|
||||||
|
#NxM terms
|
||||||
|
zt_lq = z_lq - t_lq[:, index2]
|
||||||
|
zt_lq2 = -zt_lq*zt_lq
|
||||||
|
|
||||||
|
# Upsilon calculations
|
||||||
|
fullind = np.ix_(index, index2)
|
||||||
|
z2 = z_lq + nu[fullind]
|
||||||
|
z1 = z2 - t_lq[:, index2]
|
||||||
|
upsi = np.exp(nu2[fullind] - B[index]*(t-z) + lnDifErf(z1,z2) )
|
||||||
|
upsi[t[:, 0] == 0., :] = 0.
|
||||||
|
|
||||||
|
#Gradient wrt z
|
||||||
|
za1 = c0*B
|
||||||
|
#za2 = S_w
|
||||||
|
gz = za1[fullind]*upsi + S[fullind]*( np.exp(z_lq2 - B[index]*t) -np.exp(zt_lq2) )
|
||||||
|
|
||||||
|
return gz
|
||||||
|
|
||||||
|
def lnDifErf(z1,z2):
|
||||||
|
#Z2 is always positive
|
||||||
|
logdiferf = np.zeros(z1.shape)
|
||||||
|
ind = np.where(z1>0.)
|
||||||
|
ind2 = np.where(z1<=0.)
|
||||||
|
if ind[0].shape > 0:
|
||||||
|
z1i = z1[ind]
|
||||||
|
z12 = z1i*z1i
|
||||||
|
z2i = z2[ind]
|
||||||
|
logdiferf[ind] = -z12 + np.log(erfcx(z1i) - erfcx(z2i)*np.exp(z12-z2i**2))
|
||||||
|
|
||||||
|
if ind2[0].shape > 0:
|
||||||
|
z1i = z1[ind2]
|
||||||
|
z2i = z2[ind2]
|
||||||
|
logdiferf[ind2] = np.log(erf(z2i) - erf(z1i))
|
||||||
|
|
||||||
|
return logdiferf
|
||||||
|
|
@ -44,7 +44,7 @@ class EQ_ODE2(Kern):
|
||||||
lengthscale = np.asarray(lengthscale)
|
lengthscale = np.asarray(lengthscale)
|
||||||
assert lengthscale.size in [1, self.rank], "Bad number of lengthscales"
|
assert lengthscale.size in [1, self.rank], "Bad number of lengthscales"
|
||||||
if lengthscale.size != self.rank:
|
if lengthscale.size != self.rank:
|
||||||
lengthscale = np.ones(self.input_dim)*lengthscale
|
lengthscale = np.ones(self.rank)*lengthscale
|
||||||
|
|
||||||
if W is None:
|
if W is None:
|
||||||
#W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
|
#W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
|
||||||
|
|
@ -71,7 +71,7 @@ class EQ_ODE2(Kern):
|
||||||
#index = index.reshape(index.size,)
|
#index = index.reshape(index.size,)
|
||||||
if hasattr(X, 'values'):
|
if hasattr(X, 'values'):
|
||||||
X = X.values
|
X = X.values
|
||||||
index = np.int_(X[:, 1])
|
index = np.int_(np.round(X[:, 1]))
|
||||||
index = index.reshape(index.size,)
|
index = index.reshape(index.size,)
|
||||||
X_flag = index[0] >= self.output_dim
|
X_flag = index[0] >= self.output_dim
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
|
|
@ -79,7 +79,7 @@ class EQ_ODE2(Kern):
|
||||||
#Calculate covariance function for the latent functions
|
#Calculate covariance function for the latent functions
|
||||||
index -= self.output_dim
|
index -= self.output_dim
|
||||||
return self._Kuu(X, index)
|
return self._Kuu(X, index)
|
||||||
else:
|
else: #Kff full
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
else:
|
else:
|
||||||
#This way is not working, indexes are lost after using k._slice_X
|
#This way is not working, indexes are lost after using k._slice_X
|
||||||
|
|
@ -87,19 +87,40 @@ class EQ_ODE2(Kern):
|
||||||
#index2 = index2.reshape(index2.size,)
|
#index2 = index2.reshape(index2.size,)
|
||||||
if hasattr(X2, 'values'):
|
if hasattr(X2, 'values'):
|
||||||
X2 = X2.values
|
X2 = X2.values
|
||||||
index2 = np.int_(X2[:, 1])
|
index2 = np.int_(np.round(X2[:, 1]))
|
||||||
index2 = index2.reshape(index2.size,)
|
index2 = index2.reshape(index2.size,)
|
||||||
X2_flag = index2[0] >= self.output_dim
|
X2_flag = index2[0] >= self.output_dim
|
||||||
#Calculate cross-covariance function
|
#Calculate cross-covariance function
|
||||||
if not X_flag and X2_flag:
|
if not X_flag and X2_flag:
|
||||||
index2 -= self.output_dim
|
index2 -= self.output_dim
|
||||||
return self._Kfu(X, index, X2, index2) #Kfu
|
return self._Kfu(X, index, X2, index2) #Kfu
|
||||||
else:
|
elif X_flag and not X2_flag:
|
||||||
index -= self.output_dim
|
index -= self.output_dim
|
||||||
return self._Kfu(X2, index2, X, index).T #Kuf
|
return self._Kfu(X2, index2, X, index).T #Kuf
|
||||||
|
elif X_flag and X2_flag:
|
||||||
|
index -= self.output_dim
|
||||||
|
index2 -= self.output_dim
|
||||||
|
return self._Kusu(X, index, X2, index2) #Ku_s u
|
||||||
|
else:
|
||||||
|
raise NotImplementedError #Kf_s f
|
||||||
|
|
||||||
#Calculate the covariance function for diag(Kff(X,X))
|
#Calculate the covariance function for diag(Kff(X,X))
|
||||||
def Kdiag(self, X):
|
def Kdiag(self, X):
|
||||||
|
if hasattr(X, 'values'):
|
||||||
|
index = np.int_(np.round(X[:, 1].values))
|
||||||
|
else:
|
||||||
|
index = np.int_(np.round(X[:, 1]))
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
X_flag = index[0] >= self.output_dim
|
||||||
|
|
||||||
|
if X_flag: #Kuudiag
|
||||||
|
return np.ones(X[:,0].shape)
|
||||||
|
else: #Kffdiag
|
||||||
|
kdiag = self._Kdiag(X)
|
||||||
|
return np.sum(kdiag, axis=1)
|
||||||
|
|
||||||
|
#Calculate the covariance function for diag(Kff(X,X))
|
||||||
|
def _Kdiag(self, X):
|
||||||
#This way is not working, indexes are lost after using k._slice_X
|
#This way is not working, indexes are lost after using k._slice_X
|
||||||
#index = np.asarray(X, dtype=np.int)
|
#index = np.asarray(X, dtype=np.int)
|
||||||
#index = index.reshape(index.size,)
|
#index = index.reshape(index.size,)
|
||||||
|
|
@ -132,7 +153,7 @@ class EQ_ODE2(Kern):
|
||||||
#Terms that move along q
|
#Terms that move along q
|
||||||
lq = self.lengthscale.values.reshape(1, self.lengthscale.size)
|
lq = self.lengthscale.values.reshape(1, self.lengthscale.size)
|
||||||
S2 = S*S
|
S2 = S*S
|
||||||
kdiag = np.empty((t.size, ))
|
kdiag = np.empty((t.size, lq.size))
|
||||||
|
|
||||||
indD = np.arange(B.size)
|
indD = np.arange(B.size)
|
||||||
#(1) When wd is real
|
#(1) When wd is real
|
||||||
|
|
@ -187,8 +208,8 @@ class EQ_ODE2(Kern):
|
||||||
upv[t1[:, 0] == 0, :] = 0.
|
upv[t1[:, 0] == 0, :] = 0.
|
||||||
|
|
||||||
#Covariance calculation
|
#Covariance calculation
|
||||||
kdiag[ind3t] = np.sum(np.real(K01[ind]*upm), axis=1)
|
kdiag[ind3t] = np.real(K01[ind]*upm)
|
||||||
kdiag[ind3t] += np.sum(np.real((c0[ind]*ec)*upv), axis=1)
|
kdiag[ind3t] += np.real((c0[ind]*ec)*upv)
|
||||||
|
|
||||||
#(2) When w_d is complex
|
#(2) When w_d is complex
|
||||||
if np.any(wbool):
|
if np.any(wbool):
|
||||||
|
|
@ -265,7 +286,7 @@ class EQ_ODE2(Kern):
|
||||||
upvc[t1[:, 0] == 0, :] = 0.
|
upvc[t1[:, 0] == 0, :] = 0.
|
||||||
|
|
||||||
#Covariance calculation
|
#Covariance calculation
|
||||||
kdiag[ind2t] = np.sum(K011[ind]*upm + K012[ind]*upmc + (c0[ind]*ec)*upv + (c0[ind]*ec2)*upvc, axis=1)
|
kdiag[ind2t] = K011[ind]*upm + K012[ind]*upmc + (c0[ind]*ec)*upv + (c0[ind]*ec2)*upvc
|
||||||
return kdiag
|
return kdiag
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2 = None):
|
def update_gradients_full(self, dL_dK, X, X2 = None):
|
||||||
|
|
@ -336,16 +357,17 @@ class EQ_ODE2(Kern):
|
||||||
index = index.reshape(index.size,)
|
index = index.reshape(index.size,)
|
||||||
|
|
||||||
glq, gS, gB, gC = self._gkdiag(X, index)
|
glq, gS, gB, gC = self._gkdiag(X, index)
|
||||||
tmp = dL_dKdiag.reshape(index.size, 1)*glq
|
if dL_dKdiag.size == X.shape[0]:
|
||||||
|
dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1))
|
||||||
|
tmp = dL_dKdiag*glq
|
||||||
self.lengthscale.gradient = tmp.sum(0)
|
self.lengthscale.gradient = tmp.sum(0)
|
||||||
#TODO: Avoid the reshape by a priori knowing the shape of dL_dKdiag
|
tmpB = dL_dKdiag*gB
|
||||||
tmpB = dL_dKdiag*gB.reshape(dL_dKdiag.shape)
|
tmpC = dL_dKdiag*gC
|
||||||
tmpC = dL_dKdiag*gC.reshape(dL_dKdiag.shape)
|
tmp = dL_dKdiag*gS
|
||||||
tmp = dL_dKdiag.reshape(index.size, 1)*gS
|
|
||||||
for d in np.unique(index):
|
for d in np.unique(index):
|
||||||
ind = np.where(index == d)
|
ind = np.where(index == d)
|
||||||
self.B.gradient[d] = tmpB[ind].sum()
|
self.B.gradient[d] = tmpB[ind, :].sum()
|
||||||
self.C.gradient[d] = tmpC[ind].sum()
|
self.C.gradient[d] = tmpC[ind, :].sum()
|
||||||
self.W.gradient[d, :] = tmp[ind].sum(0)
|
self.W.gradient[d, :] = tmp[ind].sum(0)
|
||||||
|
|
||||||
def gradients_X(self, dL_dK, X, X2=None):
|
def gradients_X(self, dL_dK, X, X2=None):
|
||||||
|
|
@ -410,6 +432,23 @@ class EQ_ODE2(Kern):
|
||||||
kuu[indc, indr] = kuu[indr, indc]
|
kuu[indc, indr] = kuu[indr, indc]
|
||||||
return kuu
|
return kuu
|
||||||
|
|
||||||
|
def _Kusu(self, X, index, X2, index2):
|
||||||
|
index = index.reshape(index.size,)
|
||||||
|
index2 = index2.reshape(index2.size,)
|
||||||
|
t = X[:, 0].reshape(X.shape[0],1)
|
||||||
|
t2 = X2[:, 0].reshape(1,X2.shape[0])
|
||||||
|
lq = self.lengthscale.values.reshape(self.rank,)
|
||||||
|
#Covariance matrix initialization
|
||||||
|
kuu = np.zeros((t.size, t2.size))
|
||||||
|
for q in range(self.rank):
|
||||||
|
ind1 = index == q
|
||||||
|
ind2 = index2 == q
|
||||||
|
r = t[ind1]/lq[q] - t2[0,ind2]/lq[q]
|
||||||
|
r2 = r*r
|
||||||
|
#Calculation of covariance function
|
||||||
|
kuu[np.ix_(ind1, ind2)] = np.exp(-r2)
|
||||||
|
return kuu
|
||||||
|
|
||||||
#Evaluation of cross-covariance function
|
#Evaluation of cross-covariance function
|
||||||
def _Kfu(self, X, index, X2, index2):
|
def _Kfu(self, X, index, X2, index2):
|
||||||
#terms that move along t
|
#terms that move along t
|
||||||
|
|
@ -632,8 +671,8 @@ class EQ_ODE2(Kern):
|
||||||
lq = self.lengthscale.values.reshape(1, self.rank)
|
lq = self.lengthscale.values.reshape(1, self.rank)
|
||||||
lq2 = lq*lq
|
lq2 = lq*lq
|
||||||
|
|
||||||
gB = np.empty((t.size,))
|
gB = np.empty((t.size, lq.size))
|
||||||
gC = np.empty((t.size,))
|
gC = np.empty((t.size, lq.size))
|
||||||
glq = np.empty((t.size, lq.size))
|
glq = np.empty((t.size, lq.size))
|
||||||
gS = np.empty((t.size, lq.size))
|
gS = np.empty((t.size, lq.size))
|
||||||
|
|
||||||
|
|
@ -723,8 +762,8 @@ class EQ_ODE2(Kern):
|
||||||
Ba4_1 = (S2lq*lq)*dgam_dB/w2
|
Ba4_1 = (S2lq*lq)*dgam_dB/w2
|
||||||
Ba4 = Ba4_1*c
|
Ba4 = Ba4_1*c
|
||||||
|
|
||||||
gB[ind3t] = np.sum(np.real(Ba1[ind]*upm) - np.real(((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv)\
|
gB[ind3t] = np.real(Ba1[ind]*upm) - np.real(((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv)\
|
||||||
+ np.real(Ba4[ind]*upmd) + np.real((Ba4_1[ind]*ec)*upvd), axis=1)
|
+ np.real(Ba4[ind]*upmd) + np.real((Ba4_1[ind]*ec)*upvd)
|
||||||
|
|
||||||
# gradient wrt C
|
# gradient wrt C
|
||||||
dw_dC = - alphad*dw_dB
|
dw_dC = - alphad*dw_dB
|
||||||
|
|
@ -738,8 +777,8 @@ class EQ_ODE2(Kern):
|
||||||
Ca4_1 = (S2lq*lq)*dgam_dC/w2
|
Ca4_1 = (S2lq*lq)*dgam_dC/w2
|
||||||
Ca4 = Ca4_1*c
|
Ca4 = Ca4_1*c
|
||||||
|
|
||||||
gC[ind3t] = np.sum(np.real(Ca1[ind]*upm) - np.real(((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv)\
|
gC[ind3t] = np.real(Ca1[ind]*upm) - np.real(((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv)\
|
||||||
+ np.real(Ca4[ind]*upmd) + np.real((Ca4_1[ind]*ec)*upvd), axis=1)
|
+ np.real(Ca4[ind]*upmd) + np.real((Ca4_1[ind]*ec)*upvd)
|
||||||
|
|
||||||
#Gradient wrt lengthscale
|
#Gradient wrt lengthscale
|
||||||
#DxQ terms
|
#DxQ terms
|
||||||
|
|
@ -868,10 +907,10 @@ class EQ_ODE2(Kern):
|
||||||
Ba2_1c = c0*(dgamc_dB*(0.5/gamc2 - 0.25*lq2) + 0.5/(w2*gamc))
|
Ba2_1c = c0*(dgamc_dB*(0.5/gamc2 - 0.25*lq2) + 0.5/(w2*gamc))
|
||||||
Ba2_2c = c0*dgamc_dB/gamc
|
Ba2_2c = c0*dgamc_dB/gamc
|
||||||
|
|
||||||
gB[ind2t] = np.sum(Ba1[ind]*upm - ((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv\
|
gB[ind2t] = Ba1[ind]*upm - ((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv\
|
||||||
+ Ba4[ind]*upmd + (Ba4_1[ind]*ec)*upvd\
|
+ Ba4[ind]*upmd + (Ba4_1[ind]*ec)*upvd\
|
||||||
+ Ba1c[ind]*upmc - ((Ba2_1c[ind] + Ba2_2c[ind]*t1)*egamct - Ba3c[ind]*egamt)*upvc\
|
+ Ba1c[ind]*upmc - ((Ba2_1c[ind] + Ba2_2c[ind]*t1)*egamct - Ba3c[ind]*egamt)*upvc\
|
||||||
+ Ba4c[ind]*upmdc + (Ba4_1c[ind]*ec2)*upvdc, axis=1)
|
+ Ba4c[ind]*upmdc + (Ba4_1c[ind]*ec2)*upvdc
|
||||||
|
|
||||||
##Gradient wrt C
|
##Gradient wrt C
|
||||||
dw_dC = 0.5*alphad/w
|
dw_dC = 0.5*alphad/w
|
||||||
|
|
@ -895,10 +934,10 @@ class EQ_ODE2(Kern):
|
||||||
Ca4_1c = S2lq2*(dgamc_dC/w2)
|
Ca4_1c = S2lq2*(dgamc_dC/w2)
|
||||||
Ca4c = Ca4_1c*c2
|
Ca4c = Ca4_1c*c2
|
||||||
|
|
||||||
gC[ind2t] = np.sum(Ca1[ind]*upm - ((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv\
|
gC[ind2t] = Ca1[ind]*upm - ((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv\
|
||||||
+ Ca4[ind]*upmd + (Ca4_1[ind]*ec)*upvd\
|
+ Ca4[ind]*upmd + (Ca4_1[ind]*ec)*upvd\
|
||||||
+ Ca1c[ind]*upmc - ((Ca2_1c[ind] + Ca2_2c[ind]*t1)*egamct - (Ca3_1c[ind] + Ca3_2c[ind]*t1)*egamt)*upvc\
|
+ Ca1c[ind]*upmc - ((Ca2_1c[ind] + Ca2_2c[ind]*t1)*egamct - (Ca3_1c[ind] + Ca3_2c[ind]*t1)*egamt)*upvc\
|
||||||
+ Ca4c[ind]*upmdc + (Ca4_1c[ind]*ec2)*upvdc, axis=1)
|
+ Ca4c[ind]*upmdc + (Ca4_1c[ind]*ec2)*upvdc
|
||||||
|
|
||||||
#Gradient wrt lengthscale
|
#Gradient wrt lengthscale
|
||||||
#DxQ terms
|
#DxQ terms
|
||||||
|
|
|
||||||
|
|
@ -14,10 +14,10 @@ import scipy as sp
|
||||||
|
|
||||||
class sde_RBF(RBF):
|
class sde_RBF(RBF):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Class provide extra functionality to transfer this covariance function into
|
Class provide extra functionality to transfer this covariance function into
|
||||||
SDE form.
|
SDE form.
|
||||||
|
|
||||||
Radial Basis Function kernel:
|
Radial Basis Function kernel:
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
|
|
@ -30,90 +30,90 @@ class sde_RBF(RBF):
|
||||||
Update gradient in the order in which parameters are represented in the
|
Update gradient in the order in which parameters are represented in the
|
||||||
kernel
|
kernel
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.variance.gradient = gradients[0]
|
self.variance.gradient = gradients[0]
|
||||||
self.lengthscale.gradient = gradients[1]
|
self.lengthscale.gradient = gradients[1]
|
||||||
|
|
||||||
def sde(self):
|
def sde(self):
|
||||||
"""
|
"""
|
||||||
Return the state space representation of the covariance.
|
Return the state space representation of the covariance.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
N = 10# approximation order ( number of terms in exponent series expansion)
|
N = 10# approximation order ( number of terms in exponent series expansion)
|
||||||
roots_rounding_decimals = 6
|
roots_rounding_decimals = 6
|
||||||
|
|
||||||
fn = np.math.factorial(N)
|
fn = np.math.factorial(N)
|
||||||
|
|
||||||
kappa = 1.0/2.0/self.lengthscale**2
|
kappa = 1.0/2.0/self.lengthscale**2
|
||||||
|
|
||||||
Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
|
Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
|
||||||
|
|
||||||
pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
|
pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
|
||||||
|
|
||||||
for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
|
for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
|
||||||
pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
|
pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
|
||||||
|
|
||||||
pp = sp.poly1d(pp)
|
pp = sp.poly1d(pp)
|
||||||
roots = sp.roots(pp)
|
roots = sp.roots(pp)
|
||||||
|
|
||||||
neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
|
neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
|
||||||
aa = sp.poly1d(neg_real_part_roots, r=True).coeffs
|
aa = sp.poly1d(neg_real_part_roots, r=True).coeffs
|
||||||
|
|
||||||
F = np.diag(np.ones((N-1,)),1)
|
F = np.diag(np.ones((N-1,)),1)
|
||||||
F[-1,:] = -aa[-1:0:-1]
|
F[-1,:] = -aa[-1:0:-1]
|
||||||
|
|
||||||
L= np.zeros((N,1))
|
L= np.zeros((N,1))
|
||||||
L[N-1,0] = 1
|
L[N-1,0] = 1
|
||||||
|
|
||||||
H = np.zeros((1,N))
|
H = np.zeros((1,N))
|
||||||
H[0,0] = 1
|
H[0,0] = 1
|
||||||
|
|
||||||
# Infinite covariance:
|
# Infinite covariance:
|
||||||
Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
|
Pinf = sp.linalg.solve_lyapunov(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
|
||||||
Pinf = 0.5*(Pinf + Pinf.T)
|
Pinf = 0.5*(Pinf + Pinf.T)
|
||||||
# Allocating space for derivatives
|
# Allocating space for derivatives
|
||||||
dF = np.empty([F.shape[0],F.shape[1],2])
|
dF = np.empty([F.shape[0],F.shape[1],2])
|
||||||
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
|
dQc = np.empty([Qc.shape[0],Qc.shape[1],2])
|
||||||
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
|
dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2])
|
||||||
|
|
||||||
# Derivatives:
|
# Derivatives:
|
||||||
dFvariance = np.zeros(F.shape)
|
dFvariance = np.zeros(F.shape)
|
||||||
dFlengthscale = np.zeros(F.shape)
|
dFlengthscale = np.zeros(F.shape)
|
||||||
dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
|
dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
|
||||||
|
|
||||||
dQcvariance = Qc/self.variance
|
dQcvariance = Qc/self.variance
|
||||||
dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
|
dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
|
||||||
|
|
||||||
dPinf_variance = Pinf/self.variance
|
dPinf_variance = Pinf/self.variance
|
||||||
|
|
||||||
lp = Pinf.shape[0]
|
lp = Pinf.shape[0]
|
||||||
coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
|
coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
|
||||||
coeff[np.mod(coeff,2) != 0] = 0
|
coeff[np.mod(coeff,2) != 0] = 0
|
||||||
dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
|
dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
|
||||||
|
|
||||||
dF[:,:,0] = dFvariance
|
dF[:,:,0] = dFvariance
|
||||||
dF[:,:,1] = dFlengthscale
|
dF[:,:,1] = dFlengthscale
|
||||||
dQc[:,:,0] = dQcvariance
|
dQc[:,:,0] = dQcvariance
|
||||||
dQc[:,:,1] = dQclengthscale
|
dQc[:,:,1] = dQclengthscale
|
||||||
dPinf[:,:,0] = dPinf_variance
|
dPinf[:,:,0] = dPinf_variance
|
||||||
dPinf[:,:,1] = dPinf_lengthscale
|
dPinf[:,:,1] = dPinf_lengthscale
|
||||||
|
|
||||||
P0 = Pinf.copy()
|
P0 = Pinf.copy()
|
||||||
dP0 = dPinf.copy()
|
dP0 = dPinf.copy()
|
||||||
|
|
||||||
# Benefits of this are not very sound. Helps only in one case:
|
# Benefits of this are not very sound. Helps only in one case:
|
||||||
# SVD Kalman + RBF kernel
|
# SVD Kalman + RBF kernel
|
||||||
import GPy.models.state_space_main as ssm
|
import GPy.models.state_space_main as ssm
|
||||||
(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
|
(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
|
||||||
|
|
||||||
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
|
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
|
||||||
|
|
||||||
class sde_Exponential(Exponential):
|
class sde_Exponential(Exponential):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Class provide extra functionality to transfer this covariance function into
|
Class provide extra functionality to transfer this covariance function into
|
||||||
SDE form.
|
SDE form.
|
||||||
|
|
||||||
Exponential kernel:
|
Exponential kernel:
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
|
|
@ -121,53 +121,53 @@ class sde_Exponential(Exponential):
|
||||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\ \text{ where } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def sde_update_gradient_full(self, gradients):
|
def sde_update_gradient_full(self, gradients):
|
||||||
"""
|
"""
|
||||||
Update gradient in the order in which parameters are represented in the
|
Update gradient in the order in which parameters are represented in the
|
||||||
kernel
|
kernel
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.variance.gradient = gradients[0]
|
self.variance.gradient = gradients[0]
|
||||||
self.lengthscale.gradient = gradients[1]
|
self.lengthscale.gradient = gradients[1]
|
||||||
|
|
||||||
def sde(self):
|
def sde(self):
|
||||||
"""
|
"""
|
||||||
Return the state space representation of the covariance.
|
Return the state space representation of the covariance.
|
||||||
"""
|
"""
|
||||||
variance = float(self.variance.values)
|
variance = float(self.variance.values)
|
||||||
lengthscale = float(self.lengthscale)
|
lengthscale = float(self.lengthscale)
|
||||||
|
|
||||||
F = np.array(((-1.0/lengthscale,),))
|
F = np.array(((-1.0/lengthscale,),))
|
||||||
L = np.array(((1.0,),))
|
L = np.array(((1.0,),))
|
||||||
Qc = np.array( ((2.0*variance/lengthscale,),) )
|
Qc = np.array( ((2.0*variance/lengthscale,),) )
|
||||||
H = np.array(((1.0,),))
|
H = np.array(((1.0,),))
|
||||||
Pinf = np.array(((variance,),))
|
Pinf = np.array(((variance,),))
|
||||||
P0 = Pinf.copy()
|
P0 = Pinf.copy()
|
||||||
|
|
||||||
dF = np.zeros((1,1,2));
|
dF = np.zeros((1,1,2));
|
||||||
dQc = np.zeros((1,1,2));
|
dQc = np.zeros((1,1,2));
|
||||||
dPinf = np.zeros((1,1,2));
|
dPinf = np.zeros((1,1,2));
|
||||||
|
|
||||||
dF[:,:,0] = 0.0
|
dF[:,:,0] = 0.0
|
||||||
dF[:,:,1] = 1.0/lengthscale**2
|
dF[:,:,1] = 1.0/lengthscale**2
|
||||||
|
|
||||||
dQc[:,:,0] = 2.0/lengthscale
|
dQc[:,:,0] = 2.0/lengthscale
|
||||||
dQc[:,:,1] = -2.0*variance/lengthscale**2
|
dQc[:,:,1] = -2.0*variance/lengthscale**2
|
||||||
|
|
||||||
dPinf[:,:,0] = 1.0
|
dPinf[:,:,0] = 1.0
|
||||||
dPinf[:,:,1] = 0.0
|
dPinf[:,:,1] = 0.0
|
||||||
|
|
||||||
dP0 = dPinf.copy()
|
dP0 = dPinf.copy()
|
||||||
|
|
||||||
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
|
return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
|
||||||
|
|
||||||
class sde_RatQuad(RatQuad):
|
class sde_RatQuad(RatQuad):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Class provide extra functionality to transfer this covariance function into
|
Class provide extra functionality to transfer this covariance function into
|
||||||
SDE form.
|
SDE form.
|
||||||
|
|
||||||
Rational Quadratic kernel:
|
Rational Quadratic kernel:
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
|
|
@ -177,16 +177,16 @@ class sde_RatQuad(RatQuad):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def sde(self):
|
def sde(self):
|
||||||
"""
|
"""
|
||||||
Return the state space representation of the covariance.
|
Return the state space representation of the covariance.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
assert False, 'Not Implemented'
|
assert False, 'Not Implemented'
|
||||||
|
|
||||||
# Params to use:
|
# Params to use:
|
||||||
|
|
||||||
# self.lengthscale
|
# self.lengthscale
|
||||||
# self.variance
|
# self.variance
|
||||||
#self.power
|
#self.power
|
||||||
|
|
||||||
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
|
#return (F, L, Qc, H, Pinf, dF, dQc, dPinf)
|
||||||
|
|
|
||||||
|
|
@ -85,10 +85,10 @@ class WhiteHeteroscedastic(Static):
|
||||||
def __init__(self, input_dim, num_data, variance=1., active_dims=None, name='white_hetero'):
|
def __init__(self, input_dim, num_data, variance=1., active_dims=None, name='white_hetero'):
|
||||||
"""
|
"""
|
||||||
A heteroscedastic White kernel (nugget/noise).
|
A heteroscedastic White kernel (nugget/noise).
|
||||||
It defines one variance (nugget) per input sample.
|
It defines one variance (nugget) per input sample.
|
||||||
|
|
||||||
Prediction excludes any noise learnt by this Kernel, so be careful using this kernel.
|
Prediction excludes any noise learnt by this Kernel, so be careful using this kernel.
|
||||||
|
|
||||||
You can plot the errors learnt by this kernel by something similar as:
|
You can plot the errors learnt by this kernel by something similar as:
|
||||||
plt.errorbar(m.X, m.Y, yerr=2*np.sqrt(m.kern.white.variance))
|
plt.errorbar(m.X, m.Y, yerr=2*np.sqrt(m.kern.white.variance))
|
||||||
"""
|
"""
|
||||||
|
|
@ -98,7 +98,7 @@ class WhiteHeteroscedastic(Static):
|
||||||
|
|
||||||
def Kdiag(self, X):
|
def Kdiag(self, X):
|
||||||
if X.shape[0] == self.variance.shape[0]:
|
if X.shape[0] == self.variance.shape[0]:
|
||||||
# If the input has the same number of samples as
|
# If the input has the same number of samples as
|
||||||
# the number of variances, we return the variances
|
# the number of variances, we return the variances
|
||||||
return self.variance
|
return self.variance
|
||||||
return 0.
|
return 0.
|
||||||
|
|
@ -181,7 +181,7 @@ class Fixed(Static):
|
||||||
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
|
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
|
||||||
|
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K)
|
self.variance.gradient = np.einsum('i,i', dL_dKdiag, np.diagonal(self.fixed_K))
|
||||||
|
|
||||||
def psi2(self, Z, variational_posterior):
|
def psi2(self, Z, variational_posterior):
|
||||||
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
|
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||||
|
|
@ -192,3 +192,53 @@ class Fixed(Static):
|
||||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
self.variance.gradient = dL_dpsi0.sum()
|
self.variance.gradient = dL_dpsi0.sum()
|
||||||
|
|
||||||
|
class Precomputed(Fixed):
|
||||||
|
def __init__(self, input_dim, covariance_matrix, variance=1., active_dims=None, name='precomputed'):
|
||||||
|
"""
|
||||||
|
Class for precomputed kernels, indexed by columns in X
|
||||||
|
|
||||||
|
Usage example:
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from GPy.models import GPClassification
|
||||||
|
from GPy.kern import Precomputed
|
||||||
|
from sklearn.cross_validation import LeaveOneOut
|
||||||
|
|
||||||
|
n = 10
|
||||||
|
d = 100
|
||||||
|
X = np.arange(n).reshape((n,1)) # column vector of indices
|
||||||
|
y = 2*np.random.binomial(1,0.5,(n,1))-1
|
||||||
|
X0 = np.random.randn(n,d)
|
||||||
|
k = np.dot(X0,X0.T)
|
||||||
|
kern = Precomputed(1,k) # k is a n x n covariance matrix
|
||||||
|
|
||||||
|
cv = LeaveOneOut(n)
|
||||||
|
ypred = y.copy()
|
||||||
|
for train, test in cv:
|
||||||
|
m = GPClassification(X[train], y[train], kernel=kern)
|
||||||
|
m.optimize()
|
||||||
|
ypred[test] = 2*(m.predict(X[test])[0]>0.5)-1
|
||||||
|
|
||||||
|
:param input_dim: the number of input dimensions
|
||||||
|
:type input_dim: int
|
||||||
|
:param variance: the variance of the kernel
|
||||||
|
:type variance: float
|
||||||
|
"""
|
||||||
|
super(Precomputed, self).__init__(input_dim, covariance_matrix, variance, active_dims, name)
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
if X2 is None:
|
||||||
|
return self.variance * self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')]
|
||||||
|
else:
|
||||||
|
return self.variance * self.fixed_K[X[:,0].astype('int')][:,X2[:,0].astype('int')]
|
||||||
|
|
||||||
|
def Kdiag(self, X):
|
||||||
|
return self.variance * self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')].diagonal()
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
if X2 is None:
|
||||||
|
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')])
|
||||||
|
else:
|
||||||
|
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K[X[:,0].astype('int')][:,X2[:,0].astype('int')])
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')])
|
||||||
|
|
@ -315,10 +315,10 @@ class Exponential(Stationary):
|
||||||
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||||
|
|
||||||
def K_of_r(self, r):
|
def K_of_r(self, r):
|
||||||
return self.variance * np.exp(-0.5 * r)
|
return self.variance * np.exp(-r)
|
||||||
|
|
||||||
def dK_dr(self, r):
|
def dK_dr(self, r):
|
||||||
return -0.5*self.K_of_r(r)
|
return -self.K_of_r(r)
|
||||||
|
|
||||||
# def sde(self):
|
# def sde(self):
|
||||||
# """
|
# """
|
||||||
|
|
|
||||||
|
|
@ -24,3 +24,5 @@ from .one_vs_all_sparse_classification import OneVsAllSparseClassification
|
||||||
from .dpgplvm import DPBayesianGPLVM
|
from .dpgplvm import DPBayesianGPLVM
|
||||||
|
|
||||||
from .state_space_model import StateSpace
|
from .state_space_model import StateSpace
|
||||||
|
|
||||||
|
from .ibp_lfm import IBPLFM
|
||||||
|
|
|
||||||
535
GPy/models/ibp_lfm.py
Normal file
|
|
@ -0,0 +1,535 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from ..core.sparse_gp_mpi import SparseGP_MPI
|
||||||
|
from .. import kern
|
||||||
|
from ..util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, pdinv
|
||||||
|
from ..util import diag
|
||||||
|
from ..core.parameterization import Param
|
||||||
|
from ..likelihoods import Gaussian
|
||||||
|
from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
|
||||||
|
from ..inference.latent_function_inference.posterior import Posterior
|
||||||
|
from GPy.core.parameterization.variational import VariationalPrior
|
||||||
|
from ..core.parameterization.parameterized import Parameterized
|
||||||
|
from paramz.transformations import Logexp, Logistic, __fixed__
|
||||||
|
log_2_pi = np.log(2*np.pi)
|
||||||
|
|
||||||
|
class VarDTC_minibatch_IBPLFM(VarDTC_minibatch):
|
||||||
|
'''
|
||||||
|
Modifications of VarDTC_minibatch for IBP LFM
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, batchsize=None, limit=3, mpi_comm=None):
|
||||||
|
super(VarDTC_minibatch_IBPLFM, self).__init__(batchsize, limit, mpi_comm)
|
||||||
|
|
||||||
|
def gatherPsiStat(self, kern, X, Z, Y, beta, Zp):
|
||||||
|
|
||||||
|
het_noise = beta.size > 1
|
||||||
|
|
||||||
|
assert beta.size == 1
|
||||||
|
|
||||||
|
trYYT = self.get_trYYT(Y)
|
||||||
|
if self.Y_speedup and not het_noise:
|
||||||
|
Y = self.get_YYTfactor(Y)
|
||||||
|
|
||||||
|
num_inducing = Z.shape[0]
|
||||||
|
num_data, output_dim = Y.shape
|
||||||
|
batchsize = num_data if self.batchsize is None else self.batchsize
|
||||||
|
|
||||||
|
psi2_full = np.zeros((num_inducing, num_inducing)) # MxM
|
||||||
|
psi1Y_full = np.zeros((output_dim, num_inducing)) # DxM
|
||||||
|
psi0_full = 0.
|
||||||
|
YRY_full = 0.
|
||||||
|
|
||||||
|
for n_start in range(0, num_data, batchsize):
|
||||||
|
n_end = min(batchsize+n_start, num_data)
|
||||||
|
if batchsize == num_data:
|
||||||
|
Y_slice = Y
|
||||||
|
X_slice = X
|
||||||
|
else:
|
||||||
|
Y_slice = Y[n_start:n_end]
|
||||||
|
X_slice = X[n_start:n_end]
|
||||||
|
|
||||||
|
if het_noise:
|
||||||
|
b = beta[n_start]
|
||||||
|
YRY_full += np.inner(Y_slice, Y_slice)*b
|
||||||
|
else:
|
||||||
|
b = beta
|
||||||
|
|
||||||
|
psi0 = kern._Kdiag(X_slice) #Kff^q
|
||||||
|
psi1 = kern.K(X_slice, Z) #Kfu
|
||||||
|
|
||||||
|
indX = X_slice.values
|
||||||
|
indX = np.int_(np.round(indX[:, -1]))
|
||||||
|
|
||||||
|
Zp = Zp.gamma.values
|
||||||
|
# Extend Zp across columns
|
||||||
|
indZ = Z.values
|
||||||
|
indZ = np.int_(np.round(indZ[:, -1])) - Zp.shape[0]
|
||||||
|
Zpq = Zp[:, indZ]
|
||||||
|
|
||||||
|
for d in np.unique(indX):
|
||||||
|
indd = indX == d
|
||||||
|
psi1d = psi1[indd, :]
|
||||||
|
Zpd = Zp[d, :]
|
||||||
|
Zp2 = Zpd[:, None]*Zpd[None, :] - np.diag(np.power(Zpd, 2)) + np.diag(Zpd)
|
||||||
|
psi2_full += (np.dot(psi1d.T, psi1d)*Zp2[np.ix_(indZ, indZ)])*b #Zp2*Kufd*Kfud*beta
|
||||||
|
|
||||||
|
psi0_full += np.sum(psi0*Zp[indX, :])*b
|
||||||
|
psi1Y_full += np.dot(Y_slice.T, psi1*Zpq[indX, :])*b
|
||||||
|
|
||||||
|
if not het_noise:
|
||||||
|
YRY_full = trYYT*beta
|
||||||
|
|
||||||
|
if self.mpi_comm is not None:
|
||||||
|
from mpi4py import MPI
|
||||||
|
psi0_all = np.array(psi0_full)
|
||||||
|
psi1Y_all = psi1Y_full.copy()
|
||||||
|
psi2_all = psi2_full.copy()
|
||||||
|
YRY_all = np.array(YRY_full)
|
||||||
|
self.mpi_comm.Allreduce([psi0_full, MPI.DOUBLE], [psi0_all, MPI.DOUBLE])
|
||||||
|
self.mpi_comm.Allreduce([psi1Y_full, MPI.DOUBLE], [psi1Y_all, MPI.DOUBLE])
|
||||||
|
self.mpi_comm.Allreduce([psi2_full, MPI.DOUBLE], [psi2_all, MPI.DOUBLE])
|
||||||
|
self.mpi_comm.Allreduce([YRY_full, MPI.DOUBLE], [YRY_all, MPI.DOUBLE])
|
||||||
|
return psi0_all, psi1Y_all, psi2_all, YRY_all
|
||||||
|
|
||||||
|
return psi0_full, psi1Y_full, psi2_full, YRY_full
|
||||||
|
|
||||||
|
|
||||||
|
def inference_likelihood(self, kern, X, Z, likelihood, Y, Zp):
|
||||||
|
"""
|
||||||
|
The first phase of inference:
|
||||||
|
Compute: log-likelihood, dL_dKmm
|
||||||
|
|
||||||
|
Cached intermediate results: Kmm, KmmInv,
|
||||||
|
"""
|
||||||
|
|
||||||
|
num_data, output_dim = Y.shape
|
||||||
|
input_dim = Z.shape[0]
|
||||||
|
if self.mpi_comm is not None:
|
||||||
|
from mpi4py import MPI
|
||||||
|
num_data_all = np.array(num_data,dtype=np.int32)
|
||||||
|
self.mpi_comm.Allreduce([np.int32(num_data), MPI.INT], [num_data_all, MPI.INT])
|
||||||
|
num_data = num_data_all
|
||||||
|
|
||||||
|
#see whether we've got a different noise variance for each datum
|
||||||
|
beta = 1./np.fmax(likelihood.variance, 1e-6)
|
||||||
|
het_noise = beta.size > 1
|
||||||
|
if het_noise:
|
||||||
|
self.batchsize = 1
|
||||||
|
|
||||||
|
psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, Zp)
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute Common Components
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
Kmm = kern.K(Z).copy()
|
||||||
|
diag.add(Kmm, self.const_jitter)
|
||||||
|
if not np.isfinite(Kmm).all():
|
||||||
|
print(Kmm)
|
||||||
|
Lm = jitchol(Kmm)
|
||||||
|
LmInv = dtrtri(Lm)
|
||||||
|
|
||||||
|
LmInvPsi2LmInvT = np.dot(LmInv, np.dot(psi2_full, LmInv.T))
|
||||||
|
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
|
||||||
|
LL = jitchol(Lambda)
|
||||||
|
LLInv = dtrtri(LL)
|
||||||
|
logdet_L = 2.*np.sum(np.log(np.diag(LL)))
|
||||||
|
LmLLInv = np.dot(LLInv, LmInv)
|
||||||
|
|
||||||
|
b = np.dot(psi1Y_full, LmLLInv.T)
|
||||||
|
bbt = np.sum(np.square(b))
|
||||||
|
v = np.dot(b, LmLLInv).T
|
||||||
|
LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
|
||||||
|
|
||||||
|
tmp = -np.dot(np.dot(LLInv.T, LLinvPsi1TYYTPsi1LLinvT + output_dim*np.eye(input_dim)), LLInv)
|
||||||
|
dL_dpsi2R = .5*np.dot(np.dot(LmInv.T, tmp + output_dim*np.eye(input_dim)), LmInv)
|
||||||
|
|
||||||
|
# Cache intermediate results
|
||||||
|
self.midRes['dL_dpsi2R'] = dL_dpsi2R
|
||||||
|
self.midRes['v'] = v
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute log-likelihood
|
||||||
|
#======================================================================
|
||||||
|
if het_noise:
|
||||||
|
logL_R = -np.sum(np.log(beta))
|
||||||
|
else:
|
||||||
|
logL_R = -num_data*np.log(beta)
|
||||||
|
logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)*.5 - output_dim*logdet_L*.5
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dKmm
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
dL_dKmm = dL_dpsi2R - .5*output_dim*np.dot(np.dot(LmInv.T, LmInvPsi2LmInvT), LmInv)
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute the Posterior distribution of inducing points p(u|Y)
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
if not self.Y_speedup or het_noise:
|
||||||
|
wd_inv = backsub_both_sides(Lm, np.eye(input_dim)- backsub_both_sides(LL, np.identity(input_dim), transpose='left'), transpose='left')
|
||||||
|
post = Posterior(woodbury_inv=wd_inv, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
|
||||||
|
else:
|
||||||
|
post = None
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dthetaL for uncertian input and non-heter noise
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
if not het_noise:
|
||||||
|
dL_dthetaL = .5*(YRY_full*beta + beta*output_dim*psi0_full - num_data*output_dim*beta) - beta*(dL_dpsi2R*psi2_full).sum() - beta*(v.T*psi1Y_full).sum()
|
||||||
|
self.midRes['dL_dthetaL'] = dL_dthetaL
|
||||||
|
|
||||||
|
return logL, dL_dKmm, post
|
||||||
|
|
||||||
|
def inference_minibatch(self, kern, X, Z, likelihood, Y, Zp):
|
||||||
|
"""
|
||||||
|
The second phase of inference: Computing the derivatives over a minibatch of Y
|
||||||
|
Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
|
||||||
|
return a flag showing whether it reached the end of Y (isEnd)
|
||||||
|
"""
|
||||||
|
|
||||||
|
num_data, output_dim = Y.shape
|
||||||
|
|
||||||
|
#see whether we've got a different noise variance for each datum
|
||||||
|
beta = 1./np.fmax(likelihood.variance, 1e-6)
|
||||||
|
het_noise = beta.size > 1
|
||||||
|
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
|
||||||
|
#self.YYTfactor = beta*self.get_YYTfactor(Y)
|
||||||
|
if self.Y_speedup and not het_noise:
|
||||||
|
YYT_factor = self.get_YYTfactor(Y)
|
||||||
|
else:
|
||||||
|
YYT_factor = Y
|
||||||
|
|
||||||
|
n_start = self.batch_pos
|
||||||
|
batchsize = num_data if self.batchsize is None else self.batchsize
|
||||||
|
n_end = min(batchsize+n_start, num_data)
|
||||||
|
if n_end == num_data:
|
||||||
|
isEnd = True
|
||||||
|
self.batch_pos = 0
|
||||||
|
else:
|
||||||
|
isEnd = False
|
||||||
|
self.batch_pos = n_end
|
||||||
|
|
||||||
|
if batchsize == num_data:
|
||||||
|
Y_slice = YYT_factor
|
||||||
|
X_slice = X
|
||||||
|
else:
|
||||||
|
Y_slice = YYT_factor[n_start:n_end]
|
||||||
|
X_slice = X[n_start:n_end]
|
||||||
|
|
||||||
|
psi0 = kern._Kdiag(X_slice) #Kffdiag
|
||||||
|
psi1 = kern.K(X_slice, Z) #Kfu
|
||||||
|
betapsi1 = np.einsum('n,nm->nm', beta, psi1)
|
||||||
|
|
||||||
|
X_slice = X_slice.values
|
||||||
|
Z = Z.values
|
||||||
|
|
||||||
|
Zp = Zp.gamma.values
|
||||||
|
indX = np.int_(X_slice[:, -1])
|
||||||
|
indZ = np.int_(Z[:, -1]) - Zp.shape[0]
|
||||||
|
|
||||||
|
betaY = beta*Y_slice
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Load Intermediate Results
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
dL_dpsi2R = self.midRes['dL_dpsi2R']
|
||||||
|
v = self.midRes['v']
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dpsi
|
||||||
|
#======================================================================
|
||||||
|
|
||||||
|
dL_dpsi0 = -.5*output_dim*(beta * Zp[indX, :]) #XxQ #TODO: Check this gradient
|
||||||
|
|
||||||
|
dL_dpsi1 = np.dot(betaY, v.T)
|
||||||
|
dL_dEZp = psi1*dL_dpsi1
|
||||||
|
dL_dpsi1 = Zp[np.ix_(indX, indZ)]*dL_dpsi1
|
||||||
|
dL_dgamma = np.zeros(Zp.shape)
|
||||||
|
for d in np.unique(indX):
|
||||||
|
indd = indX == d
|
||||||
|
betapsi1d = betapsi1[indd, :]
|
||||||
|
psi1d = psi1[indd, :]
|
||||||
|
Zpd = Zp[d, :]
|
||||||
|
Zp2 = Zpd[:, None]*Zpd[None, :] - np.diag(np.power(Zpd, 2)) + np.diag(Zpd)
|
||||||
|
dL_dpsi1[indd, :] += np.dot(betapsi1d, Zp2[np.ix_(indZ, indZ)] * dL_dpsi2R)*2.
|
||||||
|
|
||||||
|
dL_EZp2 = dL_dpsi2R * (np.dot(psi1d.T, psi1d) * beta)*2. # Zpd*Kufd*Kfud*beta
|
||||||
|
#Gradient of Likelihood wrt gamma is calculated here
|
||||||
|
EZ = Zp[d, indZ]
|
||||||
|
for q in range(Zp.shape[1]):
|
||||||
|
EZt = EZ.copy()
|
||||||
|
indq = indZ == q
|
||||||
|
EZt[indq] = .5
|
||||||
|
dL_dgamma[d, q] = np.sum(dL_dEZp[np.ix_(indd, indq)]) + np.sum(dL_EZp2[:, indq]*EZt[:, None]) -\
|
||||||
|
.5*beta*(np.sum(psi0[indd, q]))
|
||||||
|
|
||||||
|
#======================================================================
|
||||||
|
# Compute dL_dthetaL
|
||||||
|
#======================================================================
|
||||||
|
if isEnd:
|
||||||
|
dL_dthetaL = self.midRes['dL_dthetaL']
|
||||||
|
else:
|
||||||
|
dL_dthetaL = 0.
|
||||||
|
|
||||||
|
grad_dict = {'dL_dKdiag': dL_dpsi0,
|
||||||
|
'dL_dKnm': dL_dpsi1,
|
||||||
|
'dL_dthetaL': dL_dthetaL,
|
||||||
|
'dL_dgamma': dL_dgamma}
|
||||||
|
|
||||||
|
return isEnd, (n_start, n_end), grad_dict
|
||||||
|
|
||||||
|
|
||||||
|
def update_gradients(model, mpi_comm=None):
|
||||||
|
if mpi_comm is None:
|
||||||
|
Y = model.Y
|
||||||
|
X = model.X
|
||||||
|
else:
|
||||||
|
Y = model.Y_local
|
||||||
|
X = model.X[model.N_range[0]:model.N_range[1]]
|
||||||
|
|
||||||
|
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y, model.Zp)
|
||||||
|
|
||||||
|
het_noise = model.likelihood.variance.size > 1
|
||||||
|
|
||||||
|
if het_noise:
|
||||||
|
dL_dthetaL = np.empty((model.Y.shape[0],))
|
||||||
|
else:
|
||||||
|
dL_dthetaL = np.float64(0.)
|
||||||
|
|
||||||
|
kern_grad = model.kern.gradient.copy()
|
||||||
|
kern_grad[:] = 0.
|
||||||
|
model.Z.gradient = 0.
|
||||||
|
gamma_gradient = model.Zp.gamma.copy()
|
||||||
|
gamma_gradient[:] = 0.
|
||||||
|
|
||||||
|
isEnd = False
|
||||||
|
while not isEnd:
|
||||||
|
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, X, model.Z, model.likelihood, Y, model.Zp)
|
||||||
|
|
||||||
|
if (n_range[1]-n_range[0]) == X.shape[0]:
|
||||||
|
X_slice = X
|
||||||
|
elif mpi_comm is None:
|
||||||
|
X_slice = model.X[n_range[0]:n_range[1]]
|
||||||
|
else:
|
||||||
|
X_slice = model.X[model.N_range[0]+n_range[0]:model.N_range[0]+n_range[1]]
|
||||||
|
|
||||||
|
#gradients w.r.t. kernel
|
||||||
|
model.kern.update_gradients_diag(grad_dict['dL_dKdiag'], X_slice)
|
||||||
|
kern_grad += model.kern.gradient
|
||||||
|
|
||||||
|
model.kern.update_gradients_full(grad_dict['dL_dKnm'], X_slice, model.Z)
|
||||||
|
kern_grad += model.kern.gradient
|
||||||
|
|
||||||
|
#gradients w.r.t. Z
|
||||||
|
model.Z.gradient += model.kern.gradients_X(grad_dict['dL_dKnm'].T, model.Z, X_slice)
|
||||||
|
|
||||||
|
#gradients w.r.t. posterior parameters of Zp
|
||||||
|
gamma_gradient += grad_dict['dL_dgamma']
|
||||||
|
|
||||||
|
if het_noise:
|
||||||
|
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
|
||||||
|
else:
|
||||||
|
dL_dthetaL += grad_dict['dL_dthetaL']
|
||||||
|
|
||||||
|
# Gather the gradients from multiple MPI nodes
|
||||||
|
if mpi_comm is not None:
|
||||||
|
from mpi4py import MPI
|
||||||
|
if het_noise:
|
||||||
|
raise "het_noise not implemented!"
|
||||||
|
kern_grad_all = kern_grad.copy()
|
||||||
|
Z_grad_all = model.Z.gradient.copy()
|
||||||
|
gamma_grad_all = gamma_gradient.copy()
|
||||||
|
mpi_comm.Allreduce([kern_grad, MPI.DOUBLE], [kern_grad_all, MPI.DOUBLE])
|
||||||
|
mpi_comm.Allreduce([model.Z.gradient, MPI.DOUBLE], [Z_grad_all, MPI.DOUBLE])
|
||||||
|
mpi_comm.Allreduce([gamma_gradient, MPI.DOUBLE], [gamma_grad_all, MPI.DOUBLE])
|
||||||
|
kern_grad = kern_grad_all
|
||||||
|
model.Z.gradient = Z_grad_all
|
||||||
|
gamma_gradient = gamma_grad_all
|
||||||
|
|
||||||
|
#gradients w.r.t. kernel
|
||||||
|
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
|
||||||
|
model.kern.gradient += kern_grad
|
||||||
|
|
||||||
|
#gradients w.r.t. Z
|
||||||
|
model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z)
|
||||||
|
|
||||||
|
#gradient w.r.t. gamma
|
||||||
|
model.Zp.gamma.gradient = gamma_gradient
|
||||||
|
|
||||||
|
# Update Log-likelihood
|
||||||
|
KL_div = model.variational_prior.KL_divergence(model.Zp)
|
||||||
|
# update for the KL divergence
|
||||||
|
model.variational_prior.update_gradients_KL(model.Zp)
|
||||||
|
|
||||||
|
model._log_marginal_likelihood += KL_div
|
||||||
|
|
||||||
|
# dL_dthetaL
|
||||||
|
model.likelihood.update_gradients(dL_dthetaL)
|
||||||
|
|
||||||
|
|
||||||
|
class IBPPosterior(Parameterized):
|
||||||
|
'''
|
||||||
|
The IBP distribution for variational approximations.
|
||||||
|
'''
|
||||||
|
def __init__(self, binary_prob, tau=None, name='Sensitivity space', *a, **kw):
|
||||||
|
"""
|
||||||
|
binary_prob : the probability of including a latent function over an output.
|
||||||
|
"""
|
||||||
|
super(IBPPosterior, self).__init__(name=name, *a, **kw)
|
||||||
|
self.gamma = Param("binary_prob", binary_prob, Logistic(1e-10, 1. - 1e-10))
|
||||||
|
self.link_parameter(self.gamma)
|
||||||
|
if tau is not None:
|
||||||
|
assert tau.size == 2*self.gamma_.shape[1]
|
||||||
|
self.tau = Param("tau", tau, Logexp())
|
||||||
|
else:
|
||||||
|
self.tau = Param("tau", np.ones((2, self.gamma.shape[1])), Logexp())
|
||||||
|
self.link_parameter(self.tau)
|
||||||
|
|
||||||
|
def set_gradients(self, grad):
|
||||||
|
self.gamma.gradient, self.tau.gradient = grad
|
||||||
|
|
||||||
|
def __getitem__(self, s):
|
||||||
|
pass
|
||||||
|
# if isinstance(s, (int, slice, tuple, list, np.ndarray)):
|
||||||
|
# import copy
|
||||||
|
# n = self.__new__(self.__class__, self.name)
|
||||||
|
# dc = self.__dict__.copy()
|
||||||
|
# dc['binary_prob'] = self.binary_prob[s]
|
||||||
|
# dc['tau'] = self.tau
|
||||||
|
# dc['parameters'] = copy.copy(self.parameters)
|
||||||
|
# n.__dict__.update(dc)
|
||||||
|
# n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
|
||||||
|
# n.parameters[dc['tau']._parent_index_] = dc['tau']
|
||||||
|
# n._gradient_array_ = None
|
||||||
|
# oversize = self.size - self.gamma.size - self.tau.size
|
||||||
|
# n.size = n.gamma.size + n.tau.size + oversize
|
||||||
|
# return n
|
||||||
|
# else:
|
||||||
|
# return super(IBPPosterior, self).__getitem__(s)
|
||||||
|
|
||||||
|
class IBPPrior(VariationalPrior):
|
||||||
|
def __init__(self, rank, alpha=2., name='IBPPrior', **kw):
|
||||||
|
super(IBPPrior, self).__init__(name=name, **kw)
|
||||||
|
from paramz.transformations import __fixed__
|
||||||
|
self.rank = rank
|
||||||
|
self.alpha = Param('alpha', alpha, __fixed__)
|
||||||
|
self.link_parameter(self.alpha)
|
||||||
|
|
||||||
|
def KL_divergence(self, variational_posterior):
|
||||||
|
from scipy.special import gamma, psi
|
||||||
|
|
||||||
|
eta, tau = variational_posterior.gamma.values, variational_posterior.tau.values
|
||||||
|
|
||||||
|
sum_eta = np.sum(eta, axis=0) #sum_d gamma(d,q)
|
||||||
|
D_seta = eta.shape[0] - sum_eta
|
||||||
|
ad = self.alpha/eta.shape[1]
|
||||||
|
psitau1 = psi(tau[0, :])
|
||||||
|
psitau2 = psi(tau[1, :])
|
||||||
|
sumtau = np.sum(tau, axis=0)
|
||||||
|
psitau = psi(sumtau)
|
||||||
|
# E[log p(z)]
|
||||||
|
part1 = np.sum(sum_eta*psitau1 + D_seta*psitau2 - eta.shape[0]*psitau)
|
||||||
|
|
||||||
|
# E[log p(pi)]
|
||||||
|
part1 += (ad - 1.)*np.sum(psitau1 - psitau) + eta.shape[1]*np.log(ad)
|
||||||
|
|
||||||
|
#H(z)
|
||||||
|
part2 = np.sum(-(1.-eta)*np.log(1.-eta) - eta*np.log(eta))
|
||||||
|
#H(pi)
|
||||||
|
part2 += np.sum(np.log(gamma(tau[0, :])*gamma(tau[1, :])/gamma(sumtau))-(tau[0, :]-1.)*psitau1-(tau[1, :]-1.)*psitau2\
|
||||||
|
+ (sumtau-2.)*psitau)
|
||||||
|
|
||||||
|
return part1+part2
|
||||||
|
|
||||||
|
def update_gradients_KL(self, variational_posterior):
|
||||||
|
eta, tau = variational_posterior.gamma.values, variational_posterior.tau.values
|
||||||
|
|
||||||
|
from scipy.special import psi, polygamma
|
||||||
|
dgamma = np.log(1. - eta) - np.log(eta) + psi(tau[0, :]) - psi(tau[1, :])
|
||||||
|
variational_posterior.gamma.gradient += dgamma
|
||||||
|
ad = self.alpha/self.rank
|
||||||
|
sumeta = np.sum(eta, axis=0)
|
||||||
|
sumtau = np.sum(tau, axis=0)
|
||||||
|
common = (-eta.shape[0] - (ad - 1.) + (sumtau - 2.))*polygamma(1, sumtau)
|
||||||
|
variational_posterior.tau.gradient[0, :] = (sumeta + ad - tau[0, :])*polygamma(1, tau[0, :]) + common
|
||||||
|
variational_posterior.tau.gradient[1, :] = ((eta.shape[0] - sumeta) - (tau[1, :] - 1.))*polygamma(1, tau[1, :])\
|
||||||
|
+ common
|
||||||
|
|
||||||
|
|
||||||
|
class IBPLFM(SparseGP_MPI):
|
||||||
|
"""
|
||||||
|
Indian Buffet Process for Latent Force Models
|
||||||
|
|
||||||
|
:param Y: observed data (np.ndarray) or GPy.likelihood
|
||||||
|
:type Y: np.ndarray| GPy.likelihood instance
|
||||||
|
:param X: input data (np.ndarray) [X:values, X:index], index refers to the number of the output
|
||||||
|
:type X: np.ndarray
|
||||||
|
:param input_dim: latent dimensionality
|
||||||
|
:type input_dim: int
|
||||||
|
: param rank: number of latent functions
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, X, Y, input_dim=2, output_dim=1, rank=1, Gamma=None, num_inducing=10,
|
||||||
|
Z=None, kernel=None, inference_method=None, likelihood=None, name='IBP for LFM', alpha=2., beta=2., connM=None, tau=None, mpi_comm=None, normalizer=False, variational_prior=None,**kwargs):
|
||||||
|
|
||||||
|
if kernel is None:
|
||||||
|
kernel = kern.EQ_ODE2(input_dim, output_dim, rank)
|
||||||
|
|
||||||
|
if Gamma is None:
|
||||||
|
gamma = np.empty((output_dim, rank)) # The posterior probabilities of the binary variable in the variational approximation
|
||||||
|
gamma[:] = 0.5 + 0.1 * np.random.randn(output_dim, rank)
|
||||||
|
gamma[gamma>1.-1e-9] = 1.-1e-9
|
||||||
|
gamma[gamma<1e-9] = 1e-9
|
||||||
|
else:
|
||||||
|
gamma = Gamma.copy()
|
||||||
|
|
||||||
|
#TODO: create a vector of inducing points
|
||||||
|
if Z is None:
|
||||||
|
Z = np.random.permutation(X.copy())[:num_inducing]
|
||||||
|
assert Z.shape[1] == X.shape[1]
|
||||||
|
|
||||||
|
if likelihood is None:
|
||||||
|
likelihood = Gaussian()
|
||||||
|
|
||||||
|
if inference_method is None:
|
||||||
|
inference_method = VarDTC_minibatch_IBPLFM(mpi_comm=mpi_comm)
|
||||||
|
|
||||||
|
#Definition of variational terms
|
||||||
|
self.variational_prior = IBPPrior(rank=rank, alpha=alpha) if variational_prior is None else variational_prior
|
||||||
|
self.Zp = IBPPosterior(gamma, tau=tau)
|
||||||
|
|
||||||
|
super(IBPLFM, self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
|
||||||
|
self.link_parameter(self.Zp, index=0)
|
||||||
|
|
||||||
|
def set_Zp_gradients(self, Zp, Zp_grad):
|
||||||
|
"""Set the gradients of the posterior distribution of Zp in its specific form."""
|
||||||
|
Zp.gamma.gradient = Zp_grad
|
||||||
|
|
||||||
|
def get_Zp_gradients(self, Zp):
|
||||||
|
"""Get the gradients of the posterior distribution of Zp in its specific form."""
|
||||||
|
return Zp.gamma.gradient
|
||||||
|
|
||||||
|
def _propogate_Zp_val(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def parameters_changed(self):
|
||||||
|
#super(IBPLFM,self).parameters_changed()
|
||||||
|
if isinstance(self.inference_method, VarDTC_minibatch_IBPLFM):
|
||||||
|
update_gradients(self, mpi_comm=self.mpi_comm)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Add the KL divergence term
|
||||||
|
self._log_marginal_likelihood += self.variational_prior.KL_divergence(self.Zp)
|
||||||
|
#TODO Change the following according to this variational distribution
|
||||||
|
#self.Zp.gamma.gradient = self.
|
||||||
|
|
||||||
|
# update for the KL divergence
|
||||||
|
self.variational_prior.update_gradients_KL(self.Zp)
|
||||||
|
|
@ -3237,6 +3237,7 @@ class ContDescrStateSpace(DescreteStateSpace):
|
||||||
AB = np.dot(AB, np.vstack((np.zeros((n,n)),np.eye(n))))
|
AB = np.dot(AB, np.vstack((np.zeros((n,n)),np.eye(n))))
|
||||||
|
|
||||||
Q_noise_1 = linalg.solve(AB[n:,:].T,AB[:n,:].T)
|
Q_noise_1 = linalg.solve(AB[n:,:].T,AB[:n,:].T)
|
||||||
|
Q_noise_2 = P_inf - A.dot(P_inf).dot(A.T)
|
||||||
# The covariance matrix Q by matrix fraction decomposition <-
|
# The covariance matrix Q by matrix fraction decomposition <-
|
||||||
|
|
||||||
if compute_derivatives:
|
if compute_derivatives:
|
||||||
|
|
@ -3276,8 +3277,9 @@ class ContDescrStateSpace(DescreteStateSpace):
|
||||||
else:
|
else:
|
||||||
dA = None
|
dA = None
|
||||||
dQ = None
|
dQ = None
|
||||||
Q_noise = Q_noise_1
|
Q_noise = Q_noise_2
|
||||||
|
# Innacuracies have been observed when Q_noise_1 was used.
|
||||||
|
|
||||||
#Q_noise = Q_noise_1
|
#Q_noise = Q_noise_1
|
||||||
|
|
||||||
# Return
|
# Return
|
||||||
|
|
@ -3484,4 +3486,4 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None):
|
||||||
|
|
||||||
# (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
|
# (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
|
||||||
|
|
||||||
return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0, T
|
return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0, T
|
||||||
|
|
|
||||||
|
|
@ -15,52 +15,43 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import linalg
|
|
||||||
from scipy import stats
|
from scipy import stats
|
||||||
from ..core import Model
|
|
||||||
from .. import kern
|
|
||||||
#from GPy.plotting.matplot_dep.models_plots import gpplot
|
|
||||||
#from GPy.plotting.matplot_dep.base_plots import x_frame1D
|
|
||||||
#from GPy.plotting.matplot_dep import Tango
|
|
||||||
#import pylab as pb
|
|
||||||
from GPy.core.parameterization.param import Param
|
|
||||||
|
|
||||||
import GPy
|
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
|
#from . import state_space_setup as ss_setup
|
||||||
|
from ..core import Model
|
||||||
from . import state_space_main as ssm
|
from . import state_space_main as ssm
|
||||||
from . import state_space_setup as ss_setup
|
from . import state_space_setup as ss_setup
|
||||||
|
|
||||||
class StateSpace(Model):
|
class StateSpace(Model):
|
||||||
def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, name='StateSpace'):
|
def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, name='StateSpace'):
|
||||||
super(StateSpace, self).__init__(name=name)
|
super(StateSpace, self).__init__(name=name)
|
||||||
|
|
||||||
if len(X.shape) == 1:
|
if len(X.shape) == 1:
|
||||||
X = np.atleast_2d(X).T
|
X = np.atleast_2d(X).T
|
||||||
self.num_data, input_dim = X.shape
|
self.num_data, self.input_dim = X.shape
|
||||||
|
|
||||||
if len(Y.shape) == 1:
|
if len(Y.shape) == 1:
|
||||||
Y = np.atleast_2d(Y).T
|
Y = np.atleast_2d(Y).T
|
||||||
|
|
||||||
assert input_dim==1, "State space methods are only for 1D data"
|
assert self.input_dim==1, "State space methods are only for 1D data"
|
||||||
|
|
||||||
if len(Y.shape)==2:
|
if len(Y.shape)==2:
|
||||||
num_data_Y, self.output_dim = Y.shape
|
num_data_Y, self.output_dim = Y.shape
|
||||||
ts_number = None
|
ts_number = None
|
||||||
elif len(Y.shape)==3:
|
elif len(Y.shape)==3:
|
||||||
num_data_Y, self.output_dim, ts_number = Y.shape
|
num_data_Y, self.output_dim, ts_number = Y.shape
|
||||||
|
|
||||||
self.ts_number = ts_number
|
self.ts_number = ts_number
|
||||||
|
|
||||||
assert num_data_Y == self.num_data, "X and Y data don't match"
|
assert num_data_Y == self.num_data, "X and Y data don't match"
|
||||||
assert self.output_dim == 1, "State space methods are for single outputs only"
|
assert self.output_dim == 1, "State space methods are for single outputs only"
|
||||||
|
|
||||||
self.kalman_filter_type = kalman_filter_type
|
self.kalman_filter_type = kalman_filter_type
|
||||||
#self.kalman_filter_type = 'svd' # temp test
|
#self.kalman_filter_type = 'svd' # temp test
|
||||||
ss_setup.use_cython = use_cython
|
ss_setup.use_cython = use_cython
|
||||||
|
|
||||||
#import pdb; pdb.set_trace()
|
#import pdb; pdb.set_trace()
|
||||||
|
|
||||||
global ssm
|
global ssm
|
||||||
#from . import state_space_main as ssm
|
#from . import state_space_main as ssm
|
||||||
if (ssm.cython_code_available) and (ssm.use_cython != ss_setup.use_cython):
|
if (ssm.cython_code_available) and (ssm.use_cython != ss_setup.use_cython):
|
||||||
|
|
@ -72,13 +63,13 @@ class StateSpace(Model):
|
||||||
|
|
||||||
# Noise variance
|
# Noise variance
|
||||||
self.likelihood = likelihoods.Gaussian(variance=noise_var)
|
self.likelihood = likelihoods.Gaussian(variance=noise_var)
|
||||||
|
|
||||||
# Default kernel
|
# Default kernel
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
raise ValueError("State-Space Model: the kernel must be provided.")
|
raise ValueError("State-Space Model: the kernel must be provided.")
|
||||||
else:
|
else:
|
||||||
self.kern = kernel
|
self.kern = kernel
|
||||||
|
|
||||||
self.link_parameter(self.kern)
|
self.link_parameter(self.kern)
|
||||||
self.link_parameter(self.likelihood)
|
self.link_parameter(self.likelihood)
|
||||||
self.posterior = None
|
self.posterior = None
|
||||||
|
|
@ -92,14 +83,14 @@ class StateSpace(Model):
|
||||||
"""
|
"""
|
||||||
Parameters have now changed
|
Parameters have now changed
|
||||||
"""
|
"""
|
||||||
|
|
||||||
#np.set_printoptions(16)
|
#np.set_printoptions(16)
|
||||||
#print(self.param_array)
|
#print(self.param_array)
|
||||||
#import pdb; pdb.set_trace()
|
#import pdb; pdb.set_trace()
|
||||||
|
|
||||||
# Get the model matrices from the kernel
|
# Get the model matrices from the kernel
|
||||||
(F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
|
(F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
|
||||||
|
|
||||||
# necessary parameters
|
# necessary parameters
|
||||||
measurement_dim = self.output_dim
|
measurement_dim = self.output_dim
|
||||||
grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
|
grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
|
||||||
|
|
@ -109,30 +100,30 @@ class StateSpace(Model):
|
||||||
dQc = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no])
|
dQc = np.zeros([dQct.shape[0],dQct.shape[1],grad_params_no])
|
||||||
dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no])
|
dP_inf = np.zeros([dP_inft.shape[0],dP_inft.shape[1],grad_params_no])
|
||||||
dP0 = np.zeros([dP0t.shape[0],dP0t.shape[1],grad_params_no])
|
dP0 = np.zeros([dP0t.shape[0],dP0t.shape[1],grad_params_no])
|
||||||
|
|
||||||
# Assign the values for the kernel function
|
# Assign the values for the kernel function
|
||||||
dF[:,:,:-1] = dFt
|
dF[:,:,:-1] = dFt
|
||||||
dQc[:,:,:-1] = dQct
|
dQc[:,:,:-1] = dQct
|
||||||
dP_inf[:,:,:-1] = dP_inft
|
dP_inf[:,:,:-1] = dP_inft
|
||||||
dP0[:,:,:-1] = dP0t
|
dP0[:,:,:-1] = dP0t
|
||||||
|
|
||||||
# The sigma2 derivative
|
# The sigma2 derivative
|
||||||
dR = np.zeros([measurement_dim,measurement_dim,grad_params_no])
|
dR = np.zeros([measurement_dim,measurement_dim,grad_params_no])
|
||||||
dR[:,:,-1] = np.eye(measurement_dim)
|
dR[:,:,-1] = np.eye(measurement_dim)
|
||||||
|
|
||||||
# Balancing
|
# Balancing
|
||||||
#(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
|
#(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
|
||||||
|
|
||||||
# Use the Kalman filter to evaluate the likelihood
|
# Use the Kalman filter to evaluate the likelihood
|
||||||
grad_calc_params = {}
|
grad_calc_params = {}
|
||||||
grad_calc_params['dP_inf'] = dP_inf
|
grad_calc_params['dP_inf'] = dP_inf
|
||||||
grad_calc_params['dF'] = dF
|
grad_calc_params['dF'] = dF
|
||||||
grad_calc_params['dQc'] = dQc
|
grad_calc_params['dQc'] = dQc
|
||||||
grad_calc_params['dR'] = dR
|
grad_calc_params['dR'] = dR
|
||||||
grad_calc_params['dP_init'] = dP0
|
grad_calc_params['dP_init'] = dP0
|
||||||
|
|
||||||
kalman_filter_type = self.kalman_filter_type
|
kalman_filter_type = self.kalman_filter_type
|
||||||
|
|
||||||
# The following code is required because sometimes the shapes of self.Y
|
# The following code is required because sometimes the shapes of self.Y
|
||||||
# becomes 3D even though is must be 2D. The reason is undescovered.
|
# becomes 3D even though is must be 2D. The reason is undescovered.
|
||||||
Y = self.Y
|
Y = self.Y
|
||||||
|
|
@ -140,63 +131,63 @@ class StateSpace(Model):
|
||||||
Y.shape = (self.num_data,1)
|
Y.shape = (self.num_data,1)
|
||||||
else:
|
else:
|
||||||
Y.shape = (self.num_data,1,self.ts_number)
|
Y.shape = (self.num_data,1,self.ts_number)
|
||||||
|
|
||||||
(filter_means, filter_covs, log_likelihood,
|
(filter_means, filter_covs, log_likelihood,
|
||||||
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
|
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
|
||||||
float(self.Gaussian_noise.variance),P_inf,self.X,Y,m_init=None,
|
float(self.Gaussian_noise.variance),P_inf,self.X,Y,m_init=None,
|
||||||
P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True,
|
P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True,
|
||||||
calc_grad_log_likelihood=True,
|
calc_grad_log_likelihood=True,
|
||||||
grad_params_no=grad_params_no,
|
grad_params_no=grad_params_no,
|
||||||
grad_calc_params=grad_calc_params)
|
grad_calc_params=grad_calc_params)
|
||||||
|
|
||||||
if np.any( np.isfinite(log_likelihood) == False):
|
if np.any( np.isfinite(log_likelihood) == False):
|
||||||
#import pdb; pdb.set_trace()
|
#import pdb; pdb.set_trace()
|
||||||
print("State-Space: NaN valkues in the log_likelihood")
|
print("State-Space: NaN valkues in the log_likelihood")
|
||||||
|
|
||||||
if np.any( np.isfinite(grad_log_likelihood) == False):
|
if np.any( np.isfinite(grad_log_likelihood) == False):
|
||||||
#import pdb; pdb.set_trace()
|
#import pdb; pdb.set_trace()
|
||||||
print("State-Space: NaN valkues in the grad_log_likelihood")
|
print("State-Space: NaN valkues in the grad_log_likelihood")
|
||||||
#print(grad_log_likelihood)
|
#print(grad_log_likelihood)
|
||||||
|
|
||||||
grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
|
grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
|
||||||
grad_log_likelihood_sum.shape = (grad_log_likelihood_sum.shape[0],1)
|
grad_log_likelihood_sum.shape = (grad_log_likelihood_sum.shape[0],1)
|
||||||
self._log_marginal_likelihood = np.sum( log_likelihood,axis=1 )
|
self._log_marginal_likelihood = np.sum( log_likelihood,axis=1 )
|
||||||
self.likelihood.update_gradients(grad_log_likelihood_sum[-1,0])
|
self.likelihood.update_gradients(grad_log_likelihood_sum[-1,0])
|
||||||
|
|
||||||
self.kern.sde_update_gradient_full(grad_log_likelihood_sum[:-1,0])
|
self.kern.sde_update_gradient_full(grad_log_likelihood_sum[:-1,0])
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
return self._log_marginal_likelihood
|
return self._log_marginal_likelihood
|
||||||
|
|
||||||
def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False):
|
def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, **kw):
|
||||||
"""
|
"""
|
||||||
Performs the actual prediction for new X points.
|
Performs the actual prediction for new X points.
|
||||||
Inner function. It is called only from inside this class.
|
Inner function. It is called only from inside this class.
|
||||||
|
|
||||||
Input:
|
Input:
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
Xnews: vector or (n_points,1) matrix
|
Xnews: vector or (n_points,1) matrix
|
||||||
New time points where to evaluate predictions.
|
New time points where to evaluate predictions.
|
||||||
|
|
||||||
Ynews: (n_train_points, ts_no) matrix
|
Ynews: (n_train_points, ts_no) matrix
|
||||||
This matrix can substitude the original training points (in order
|
This matrix can substitude the original training points (in order
|
||||||
to use only the parameters of the model).
|
to use only the parameters of the model).
|
||||||
|
|
||||||
filteronly: bool
|
filteronly: bool
|
||||||
Use only Kalman Filter for prediction. In this case the output does
|
Use only Kalman Filter for prediction. In this case the output does
|
||||||
not coincide with corresponding Gaussian process.
|
not coincide with corresponding Gaussian process.
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
m: vector
|
m: vector
|
||||||
Mean prediction
|
Mean prediction
|
||||||
|
|
||||||
V: vector
|
V: vector
|
||||||
Variance in every point
|
Variance in every point
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Set defaults
|
# Set defaults
|
||||||
if Ynew is None:
|
if Ynew is None:
|
||||||
Ynew = self.Y
|
Ynew = self.Y
|
||||||
|
|
@ -209,8 +200,8 @@ class StateSpace(Model):
|
||||||
else:
|
else:
|
||||||
X = self.X
|
X = self.X
|
||||||
Y = Ynew
|
Y = Ynew
|
||||||
predict_only_training = True
|
predict_only_training = True
|
||||||
|
|
||||||
# Sort the matrix (save the order)
|
# Sort the matrix (save the order)
|
||||||
_, return_index, return_inverse = np.unique(X,True,True)
|
_, return_index, return_inverse = np.unique(X,True,True)
|
||||||
X = X[return_index] # TODO they are not used
|
X = X[return_index] # TODO they are not used
|
||||||
|
|
@ -218,37 +209,37 @@ class StateSpace(Model):
|
||||||
|
|
||||||
# Get the model matrices from the kernel
|
# Get the model matrices from the kernel
|
||||||
(F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
|
(F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
|
||||||
state_dim = F.shape[0]
|
state_dim = F.shape[0]
|
||||||
|
|
||||||
#Y = self.Y[:, 0,0]
|
#Y = self.Y[:, 0,0]
|
||||||
# Run the Kalman filter
|
# Run the Kalman filter
|
||||||
#import pdb; pdb.set_trace()
|
#import pdb; pdb.set_trace()
|
||||||
kalman_filter_type = self.kalman_filter_type
|
kalman_filter_type = self.kalman_filter_type
|
||||||
|
|
||||||
(M, P, log_likelihood,
|
(M, P, log_likelihood,
|
||||||
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(
|
grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(
|
||||||
F,L,Qc,H,float(self.Gaussian_noise.variance),P_inf,X,Y,m_init=None,
|
F,L,Qc,H,float(self.Gaussian_noise.variance),P_inf,X,Y,m_init=None,
|
||||||
P_init=P0, p_kalman_filter_type = kalman_filter_type,
|
P_init=P0, p_kalman_filter_type = kalman_filter_type,
|
||||||
calc_log_likelihood=False,
|
calc_log_likelihood=False,
|
||||||
calc_grad_log_likelihood=False)
|
calc_grad_log_likelihood=False)
|
||||||
|
|
||||||
# (filter_means, filter_covs, log_likelihood,
|
# (filter_means, filter_covs, log_likelihood,
|
||||||
# grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
|
# grad_log_likelihood,SmootherMatrObject) = ssm.ContDescrStateSpace.cont_discr_kalman_filter(F,L,Qc,H,
|
||||||
# float(self.Gaussian_noise.variance),P_inf,self.X,self.Y,m_init=None,
|
# float(self.Gaussian_noise.variance),P_inf,self.X,self.Y,m_init=None,
|
||||||
# P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True,
|
# P_init=P0, p_kalman_filter_type = kalman_filter_type, calc_log_likelihood=True,
|
||||||
# calc_grad_log_likelihood=True,
|
# calc_grad_log_likelihood=True,
|
||||||
# grad_params_no=grad_params_no,
|
# grad_params_no=grad_params_no,
|
||||||
# grad_calc_params=grad_calc_params)
|
# grad_calc_params=grad_calc_params)
|
||||||
|
|
||||||
# Run the Rauch-Tung-Striebel smoother
|
# Run the Rauch-Tung-Striebel smoother
|
||||||
if not filteronly:
|
if not filteronly:
|
||||||
(M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P,
|
(M, P) = ssm.ContDescrStateSpace.cont_discr_rts_smoother(state_dim, M, P,
|
||||||
p_dynamic_callables=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc)
|
p_dynamic_callables=SmootherMatrObject, X=X, F=F,L=L,Qc=Qc)
|
||||||
|
|
||||||
# remove initial values
|
# remove initial values
|
||||||
M = M[1:,:,:]
|
M = M[1:,:,:]
|
||||||
P = P[1:,:,:]
|
P = P[1:,:,:]
|
||||||
|
|
||||||
# Put the data back in the original order
|
# Put the data back in the original order
|
||||||
M = M[return_inverse,:,:]
|
M = M[return_inverse,:,:]
|
||||||
P = P[return_inverse,:,:]
|
P = P[return_inverse,:,:]
|
||||||
|
|
@ -257,40 +248,41 @@ class StateSpace(Model):
|
||||||
if not predict_only_training:
|
if not predict_only_training:
|
||||||
M = M[self.num_data:,:,:]
|
M = M[self.num_data:,:,:]
|
||||||
P = P[self.num_data:,:,:]
|
P = P[self.num_data:,:,:]
|
||||||
|
|
||||||
# Calculate the mean and variance
|
# Calculate the mean and variance
|
||||||
# after einsum m has dimension in 3D (sample_num, dim_no,time_series_no)
|
# after einsum m has dimension in 3D (sample_num, dim_no,time_series_no)
|
||||||
m = np.einsum('ijl,kj', M, H)# np.dot(M,H.T)
|
m = np.einsum('ijl,kj', M, H)# np.dot(M,H.T)
|
||||||
m.shape = (m.shape[0], m.shape[1]) # remove the third dimension
|
m.shape = (m.shape[0], m.shape[1]) # remove the third dimension
|
||||||
|
|
||||||
V = np.einsum('ij,ajk,kl', H, P, H.T)
|
V = np.einsum('ij,ajk,kl', H, P, H.T)
|
||||||
|
|
||||||
V.shape = (V.shape[0], V.shape[1]) # remove the third dimension
|
V.shape = (V.shape[0], V.shape[1]) # remove the third dimension
|
||||||
|
|
||||||
# Return the posterior of the state
|
# Return the posterior of the state
|
||||||
return (m, V)
|
return (m, V)
|
||||||
|
|
||||||
def predict(self, Xnew=None, filteronly=False):
|
def predict(self, Xnew=None, filteronly=False, include_likelihood=True, **kw):
|
||||||
|
|
||||||
# Run the Kalman filter to get the state
|
# Run the Kalman filter to get the state
|
||||||
(m, V) = self._raw_predict(Xnew,filteronly=filteronly)
|
(m, V) = self._raw_predict(Xnew,filteronly=filteronly)
|
||||||
|
|
||||||
# Add the noise variance to the state variance
|
# Add the noise variance to the state variance
|
||||||
V += float(self.Gaussian_noise.variance)
|
if include_likelihood:
|
||||||
|
V += float(self.likelihood.variance)
|
||||||
|
|
||||||
# Lower and upper bounds
|
# Lower and upper bounds
|
||||||
lower = m - 2*np.sqrt(V)
|
#lower = m - 2*np.sqrt(V)
|
||||||
upper = m + 2*np.sqrt(V)
|
#upper = m + 2*np.sqrt(V)
|
||||||
|
|
||||||
# Return mean and variance
|
# Return mean and variance
|
||||||
return (m, V, lower, upper)
|
return m, V
|
||||||
|
|
||||||
def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5)):
|
def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), **kw):
|
||||||
mu, var = self._raw_predict(Xnew)
|
mu, var = self._raw_predict(Xnew)
|
||||||
#import pdb; pdb.set_trace()
|
#import pdb; pdb.set_trace()
|
||||||
return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
|
return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
|
||||||
|
|
||||||
|
|
||||||
# def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
|
# def plot(self, plot_limits=None, levels=20, samples=0, fignum=None,
|
||||||
# ax=None, resolution=None, plot_raw=False, plot_filter=False,
|
# ax=None, resolution=None, plot_raw=False, plot_filter=False,
|
||||||
# linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
|
# linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
|
||||||
|
|
@ -399,8 +391,8 @@ class StateSpace(Model):
|
||||||
#
|
#
|
||||||
# # Return trajectory
|
# # Return trajectory
|
||||||
# return Y
|
# return Y
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# def simulate(self,F,L,Qc,Pinf,X,size=1):
|
# def simulate(self,F,L,Qc,Pinf,X,size=1):
|
||||||
# # Simulate a trajectory using the state space model
|
# # Simulate a trajectory using the state space model
|
||||||
#
|
#
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,17 @@ def inject_plotting():
|
||||||
GP.plot_f = gpy_plot.gp_plots.plot_f
|
GP.plot_f = gpy_plot.gp_plots.plot_f
|
||||||
GP.plot_magnification = gpy_plot.latent_plots.plot_magnification
|
GP.plot_magnification = gpy_plot.latent_plots.plot_magnification
|
||||||
|
|
||||||
|
from ..models import StateSpace
|
||||||
|
StateSpace.plot_data = gpy_plot.data_plots.plot_data
|
||||||
|
StateSpace.plot_data_error = gpy_plot.data_plots.plot_data_error
|
||||||
|
StateSpace.plot_errorbars_trainset = gpy_plot.data_plots.plot_errorbars_trainset
|
||||||
|
StateSpace.plot_mean = gpy_plot.gp_plots.plot_mean
|
||||||
|
StateSpace.plot_confidence = gpy_plot.gp_plots.plot_confidence
|
||||||
|
StateSpace.plot_density = gpy_plot.gp_plots.plot_density
|
||||||
|
StateSpace.plot_samples = gpy_plot.gp_plots.plot_samples
|
||||||
|
StateSpace.plot = gpy_plot.gp_plots.plot
|
||||||
|
StateSpace.plot_f = gpy_plot.gp_plots.plot_f
|
||||||
|
|
||||||
from ..core import SparseGP
|
from ..core import SparseGP
|
||||||
SparseGP.plot_inducing = gpy_plot.data_plots.plot_inducing
|
SparseGP.plot_inducing = gpy_plot.data_plots.plot_inducing
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -158,7 +158,7 @@ def _plot_data_error(self, canvas, which_data_rows='all',
|
||||||
|
|
||||||
return plots
|
return plots
|
||||||
|
|
||||||
def plot_inducing(self, visible_dims=None, projection='2d', label='inducing', **plot_kwargs):
|
def plot_inducing(self, visible_dims=None, projection='2d', label='inducing', legend=True, **plot_kwargs):
|
||||||
"""
|
"""
|
||||||
Plot the inducing inputs of a sparse gp model
|
Plot the inducing inputs of a sparse gp model
|
||||||
|
|
||||||
|
|
@ -167,7 +167,7 @@ def plot_inducing(self, visible_dims=None, projection='2d', label='inducing', **
|
||||||
"""
|
"""
|
||||||
canvas, kwargs = pl().new_canvas(projection=projection, **plot_kwargs)
|
canvas, kwargs = pl().new_canvas(projection=projection, **plot_kwargs)
|
||||||
plots = _plot_inducing(self, canvas, visible_dims, projection, label, **kwargs)
|
plots = _plot_inducing(self, canvas, visible_dims, projection, label, **kwargs)
|
||||||
return pl().add_to_canvas(canvas, plots, legend=label is not None)
|
return pl().add_to_canvas(canvas, plots, legend=legend)
|
||||||
|
|
||||||
def _plot_inducing(self, canvas, visible_dims, projection, label, **plot_kwargs):
|
def _plot_inducing(self, canvas, visible_dims, projection, label, **plot_kwargs):
|
||||||
if visible_dims is None:
|
if visible_dims is None:
|
||||||
|
|
@ -175,7 +175,7 @@ def _plot_inducing(self, canvas, visible_dims, projection, label, **plot_kwargs)
|
||||||
visible_dims = [i for i in sig_dims if i is not None]
|
visible_dims = [i for i in sig_dims if i is not None]
|
||||||
free_dims = get_free_dims(self, visible_dims, None)
|
free_dims = get_free_dims(self, visible_dims, None)
|
||||||
|
|
||||||
Z = self.Z[:, free_dims]
|
Z = self.Z.values
|
||||||
plots = {}
|
plots = {}
|
||||||
|
|
||||||
#one dimensional plotting
|
#one dimensional plotting
|
||||||
|
|
|
||||||
|
|
@ -112,28 +112,29 @@ def plot_latent_inducing(self,
|
||||||
which_indices=None,
|
which_indices=None,
|
||||||
legend=False,
|
legend=False,
|
||||||
plot_limits=None,
|
plot_limits=None,
|
||||||
marker='^',
|
marker=None,
|
||||||
num_samples=1000,
|
|
||||||
projection='2d',
|
projection='2d',
|
||||||
**kwargs):
|
**kwargs):
|
||||||
"""
|
"""
|
||||||
Plot a scatter plot of the inducing inputs.
|
Plot a scatter plot of the inducing inputs.
|
||||||
|
|
||||||
:param array-like labels: a label for each data point (row) of the inputs
|
:param [int] which_indices: which input dimensions to plot against each other
|
||||||
:param (int, int) which_indices: which input dimensions to plot against each other
|
|
||||||
:param bool legend: whether to plot the legend on the figure
|
:param bool legend: whether to plot the legend on the figure
|
||||||
:param plot_limits: the plot limits for the plot
|
:param plot_limits: the plot limits for the plot
|
||||||
:type plot_limits: (xmin, xmax, ymin, ymax) or ((xmin, xmax), (ymin, ymax))
|
:type plot_limits: (xmin, xmax, ymin, ymax) or ((xmin, xmax), (ymin, ymax))
|
||||||
:param str marker: markers to use - cycle if more labels then markers are given
|
:param str marker: marker to use [default is custom arrow like]
|
||||||
:param kwargs: the kwargs for the scatter plots
|
:param kwargs: the kwargs for the scatter plots
|
||||||
|
:param str projection: for now 2d or 3d projection (other projections can be implemented, see developer documentation)
|
||||||
"""
|
"""
|
||||||
canvas, projection, kwargs, sig_dims = _new_canvas(self, projection, kwargs, which_indices)
|
canvas, projection, kwargs, sig_dims = _new_canvas(self, projection, kwargs, which_indices)
|
||||||
|
|
||||||
Z = self.Z.values
|
if legend: label = 'inducing'
|
||||||
labels = np.array(['inducing'] * Z.shape[0])
|
else: label = None
|
||||||
kwargs['marker'] = marker
|
if marker is not None:
|
||||||
|
kwargs['marker'] = marker
|
||||||
update_not_existing_kwargs(kwargs, pl().defaults.inducing_2d) # @UndefinedVariable
|
update_not_existing_kwargs(kwargs, pl().defaults.inducing_2d) # @UndefinedVariable
|
||||||
scatters = _plot_latent_scatter(canvas, Z, sig_dims, labels, num_samples=num_samples, projection=projection, **kwargs)
|
from .data_plots import _plot_inducing
|
||||||
|
scatters = _plot_inducing(self, canvas, sig_dims[:2], projection, label, **kwargs)
|
||||||
return pl().add_to_canvas(canvas, dict(scatter=scatters), legend=legend)
|
return pl().add_to_canvas(canvas, dict(scatter=scatters), legend=legend)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -190,6 +190,7 @@ def scatter_label_generator(labels, X, visible_dims, marker=None):
|
||||||
x = X[index, input_1]
|
x = X[index, input_1]
|
||||||
y = X[index, input_2]
|
y = X[index, input_2]
|
||||||
z = X[index, input_3]
|
z = X[index, input_3]
|
||||||
|
|
||||||
yield x, y, z, this_label, index, m
|
yield x, y, z, this_label, index, m
|
||||||
|
|
||||||
def subsample_X(X, labels, num_samples=1000):
|
def subsample_X(X, labels, num_samples=1000):
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ it gives back an empty default, when defaults are not defined.
|
||||||
# Data plots:
|
# Data plots:
|
||||||
data_1d = dict(lw=1.5, marker='x', color='k')
|
data_1d = dict(lw=1.5, marker='x', color='k')
|
||||||
data_2d = dict(s=35, edgecolors='none', linewidth=0., cmap=cm.get_cmap('hot'), alpha=.5)
|
data_2d = dict(s=35, edgecolors='none', linewidth=0., cmap=cm.get_cmap('hot'), alpha=.5)
|
||||||
inducing_1d = dict(lw=0, s=500, facecolors=Tango.colorsHex['darkRed'])
|
inducing_1d = dict(lw=0, s=500, color=Tango.colorsHex['darkRed'])
|
||||||
inducing_2d = dict(s=17, edgecolor='k', linewidth=.4, color='white', alpha=.5, marker='^')
|
inducing_2d = dict(s=17, edgecolor='k', linewidth=.4, color='white', alpha=.5, marker='^')
|
||||||
inducing_3d = dict(lw=.3, s=500, color=Tango.colorsHex['darkRed'], edgecolor='k')
|
inducing_3d = dict(lw=.3, s=500, color=Tango.colorsHex['darkRed'], edgecolor='k')
|
||||||
xerrorbar = dict(color='k', fmt='none', elinewidth=.5, alpha=.5)
|
xerrorbar = dict(color='k', fmt='none', elinewidth=.5, alpha=.5)
|
||||||
|
|
|
||||||
|
|
@ -106,7 +106,7 @@ class MatplotlibPlots(AbstractPlottingLibrary):
|
||||||
return ax.plot(X, Y, color=color, zs=Z, label=label, **kwargs)
|
return ax.plot(X, Y, color=color, zs=Z, label=label, **kwargs)
|
||||||
return ax.plot(X, Y, color=color, label=label, **kwargs)
|
return ax.plot(X, Y, color=color, label=label, **kwargs)
|
||||||
|
|
||||||
def plot_axis_lines(self, ax, X, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs):
|
def plot_axis_lines(self, ax, X, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
|
||||||
from matplotlib import transforms
|
from matplotlib import transforms
|
||||||
from matplotlib.path import Path
|
from matplotlib.path import Path
|
||||||
if 'marker' not in kwargs:
|
if 'marker' not in kwargs:
|
||||||
|
|
@ -126,14 +126,14 @@ class MatplotlibPlots(AbstractPlottingLibrary):
|
||||||
bottom=bottom, label=label, color=color,
|
bottom=bottom, label=label, color=color,
|
||||||
**kwargs)
|
**kwargs)
|
||||||
|
|
||||||
def xerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs):
|
def xerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
|
||||||
if not('linestyle' in kwargs or 'ls' in kwargs):
|
if not('linestyle' in kwargs or 'ls' in kwargs):
|
||||||
kwargs['ls'] = 'none'
|
kwargs['ls'] = 'none'
|
||||||
#if Z is not None:
|
#if Z is not None:
|
||||||
# return ax.errorbar(X, Y, Z, xerr=error, ecolor=color, label=label, **kwargs)
|
# return ax.errorbar(X, Y, Z, xerr=error, ecolor=color, label=label, **kwargs)
|
||||||
return ax.errorbar(X, Y, xerr=error, ecolor=color, label=label, **kwargs)
|
return ax.errorbar(X, Y, xerr=error, ecolor=color, label=label, **kwargs)
|
||||||
|
|
||||||
def yerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs):
|
def yerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
|
||||||
if not('linestyle' in kwargs or 'ls' in kwargs):
|
if not('linestyle' in kwargs or 'ls' in kwargs):
|
||||||
kwargs['ls'] = 'none'
|
kwargs['ls'] = 'none'
|
||||||
#if Z is not None:
|
#if Z is not None:
|
||||||
|
|
|
||||||
|
|
@ -131,14 +131,15 @@ class PlotlyPlots(AbstractPlottingLibrary):
|
||||||
#not matplotlib marker
|
#not matplotlib marker
|
||||||
pass
|
pass
|
||||||
marker_kwargs = marker_kwargs or {}
|
marker_kwargs = marker_kwargs or {}
|
||||||
marker_kwargs.setdefault('symbol', marker)
|
if 'symbol' not in marker_kwargs:
|
||||||
|
marker_kwargs['symbol'] = marker
|
||||||
if Z is not None:
|
if Z is not None:
|
||||||
return Scatter3d(x=X, y=Y, z=Z, mode='markers',
|
return Scatter3d(x=X, y=Y, z=Z, mode='markers',
|
||||||
showlegend=label is not None,
|
showlegend=label is not None,
|
||||||
marker=Marker(color=color, colorscale=cmap, **marker_kwargs),
|
marker=Marker(color=color, colorscale=cmap, **marker_kwargs),
|
||||||
name=label, **kwargs)
|
name=label, **kwargs)
|
||||||
return Scatter(x=X, y=Y, mode='markers', showlegend=label is not None,
|
return Scatter(x=X, y=Y, mode='markers', showlegend=label is not None,
|
||||||
marker=Marker(color=color, colorscale=cmap, **marker_kwargs or {}),
|
marker=Marker(color=color, colorscale=cmap, **marker_kwargs),
|
||||||
name=label, **kwargs)
|
name=label, **kwargs)
|
||||||
|
|
||||||
def plot(self, ax, X, Y, Z=None, color=None, label=None, line_kwargs=None, **kwargs):
|
def plot(self, ax, X, Y, Z=None, color=None, label=None, line_kwargs=None, **kwargs):
|
||||||
|
|
|
||||||
BIN
GPy/testing/baseline/bayesian_gplvm_gradient.npz
Normal file
|
Before Width: | Height: | Size: 42 KiB |
BIN
GPy/testing/baseline/bayesian_gplvm_inducing.npz
Normal file
|
Before Width: | Height: | Size: 16 KiB |
BIN
GPy/testing/baseline/bayesian_gplvm_inducing_3d.npz
Normal file
|
Before Width: | Height: | Size: 135 KiB |
BIN
GPy/testing/baseline/bayesian_gplvm_latent.npz
Normal file
|
Before Width: | Height: | Size: 196 KiB |
BIN
GPy/testing/baseline/bayesian_gplvm_latent_3d.npz
Normal file
|
Before Width: | Height: | Size: 121 KiB |
BIN
GPy/testing/baseline/bayesian_gplvm_magnification.npz
Normal file
|
Before Width: | Height: | Size: 186 KiB |
BIN
GPy/testing/baseline/coverage_3d_plot.npz
Normal file
|
Before Width: | Height: | Size: 51 KiB |
BIN
GPy/testing/baseline/coverage_annotation_interact.npz
Normal file
|
Before Width: | Height: | Size: 8.9 KiB |
BIN
GPy/testing/baseline/coverage_gradient.npz
Normal file
|
Before Width: | Height: | Size: 5.2 KiB |
BIN
GPy/testing/baseline/coverage_imshow_interact.npz
Normal file
|
Before Width: | Height: | Size: 10 KiB |
BIN
GPy/testing/baseline/gp_2d_data.npz
Normal file
|
Before Width: | Height: | Size: 13 KiB |
BIN
GPy/testing/baseline/gp_2d_in_error.npz
Normal file
|
Before Width: | Height: | Size: 11 KiB |
BIN
GPy/testing/baseline/gp_2d_inducing.npz
Normal file
|
Before Width: | Height: | Size: 12 KiB |
BIN
GPy/testing/baseline/gp_2d_mean.npz
Normal file
|
Before Width: | Height: | Size: 118 KiB |
BIN
GPy/testing/baseline/gp_3d_data.npz
Normal file
|
Before Width: | Height: | Size: 116 KiB |
BIN
GPy/testing/baseline/gp_3d_inducing.npz
Normal file
|
Before Width: | Height: | Size: 108 KiB |
BIN
GPy/testing/baseline/gp_3d_mean.npz
Normal file
|
Before Width: | Height: | Size: 178 KiB |
BIN
GPy/testing/baseline/gp_class_likelihood.npz
Normal file
|
Before Width: | Height: | Size: 36 KiB |
BIN
GPy/testing/baseline/gp_class_raw.npz
Normal file
|
Before Width: | Height: | Size: 50 KiB |
BIN
GPy/testing/baseline/gp_class_raw_link.npz
Normal file
|
Before Width: | Height: | Size: 55 KiB |
BIN
GPy/testing/baseline/gp_conf.npz
Normal file
|
Before Width: | Height: | Size: 29 KiB |
BIN
GPy/testing/baseline/gp_data.npz
Normal file
|
Before Width: | Height: | Size: 14 KiB |
BIN
GPy/testing/baseline/gp_density.npz
Normal file
|
Before Width: | Height: | Size: 115 KiB |
BIN
GPy/testing/baseline/gp_in_error.npz
Normal file
|
Before Width: | Height: | Size: 11 KiB |
BIN
GPy/testing/baseline/gp_mean.npz
Normal file
|
Before Width: | Height: | Size: 23 KiB |
BIN
GPy/testing/baseline/gp_out_error.npz
Normal file
|
Before Width: | Height: | Size: 9.2 KiB |
BIN
GPy/testing/baseline/gp_samples.npz
Normal file
|
Before Width: | Height: | Size: 36 KiB |
BIN
GPy/testing/baseline/gplvm_gradient.npz
Normal file
|
Before Width: | Height: | Size: 43 KiB |
BIN
GPy/testing/baseline/gplvm_latent.npz
Normal file
|
Before Width: | Height: | Size: 182 KiB |
BIN
GPy/testing/baseline/gplvm_latent_3d.npz
Normal file
|
Before Width: | Height: | Size: 121 KiB |
BIN
GPy/testing/baseline/gplvm_magnification.npz
Normal file
|
Before Width: | Height: | Size: 180 KiB |
BIN
GPy/testing/baseline/kern_ARD.npz
Normal file
|
Before Width: | Height: | Size: 14 KiB |
BIN
GPy/testing/baseline/kern_cov_1d.npz
Normal file
|
Before Width: | Height: | Size: 34 KiB |
BIN
GPy/testing/baseline/kern_cov_2d.npz
Normal file
|
Before Width: | Height: | Size: 78 KiB |
BIN
GPy/testing/baseline/kern_cov_3d.npz
Normal file
|
Before Width: | Height: | Size: 206 KiB |
BIN
GPy/testing/baseline/kern_cov_no_lim.npz
Normal file
|
Before Width: | Height: | Size: 58 KiB |
BIN
GPy/testing/baseline/sparse_gp_class_likelihood.npz
Normal file
|
Before Width: | Height: | Size: 40 KiB |
BIN
GPy/testing/baseline/sparse_gp_class_raw.npz
Normal file
|
Before Width: | Height: | Size: 86 KiB |