Merged __init__

This commit is contained in:
Michael T Smith 2016-06-02 15:59:10 +01:00
commit 163f5bcb04
138 changed files with 1952 additions and 2393 deletions

View file

@ -9,17 +9,16 @@ omit = ./GPy/testing/*.py, travis_tests.py, setup.py, ./GPy/__version__.py
exclude_lines = exclude_lines =
# Have to re-enable the standard pragma # Have to re-enable the standard pragma
pragma: no cover pragma: no cover
verbose
# Don't complain about missing debug-only code: # Don't complain about missing debug-only code:
if self\.debug if self\.debug
# Don't complain if tests don't hit defensive assertion code: # Don't complain if tests don't hit defensive assertion code:
raise AssertionError raise
raise NotImplementedError
raise NotImplemented
except except
pass pass
Not implemented
# Don't complain if non-runnable code isn't run: # Don't complain if non-runnable code isn't run:
if 0: if 0:

View file

@ -32,6 +32,7 @@ install:
- pip install pypandoc - pip install pypandoc
- pip install git+git://github.com/BRML/climin.git - pip install git+git://github.com/BRML/climin.git
- pip install autograd - pip install autograd
- pip install nose-show-skipped
- python setup.py develop - python setup.py develop
script: script:
@ -47,9 +48,11 @@ before_deploy:
- make html - make html
- cd ../ - cd ../
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; - if [[ "$TRAVIS_OS_NAME" == "linux" ]];
then export DIST='sdist'; then
export DIST='sdist';
elif [[ "$TRAVIS_OS_NAME" == "osx" ]]; elif [[ "$TRAVIS_OS_NAME" == "osx" ]];
then export DIST='bdist_wheel'; then
export DIST='bdist_wheel';
fi; fi;
deploy: deploy:
@ -60,5 +63,6 @@ deploy:
on: on:
tags: true tags: true
branch: deploy branch: deploy
#condition: "$TRAVIS_OS_NAME" == "osx" || ( "$TRAVIS_OS_NAME" == "linux" && "$PYTHON_VERSION" == "2.7" )
distributions: $DIST distributions: $DIST
skip_cleanup: true skip_cleanup: true

View file

@ -1 +1 @@
[GPy Authors](https://github.com/SheffieldML/GPy/graphs/contributors) GPy Authors: https://github.com/SheffieldML/GPy/graphs/contributors

View file

@ -1 +1 @@
__version__ = "1.0.2" __version__ = "1.0.9"

View file

@ -148,14 +148,16 @@ class GP(Model):
# LVM models # LVM models
if isinstance(self.X, VariationalPosterior): if isinstance(self.X, VariationalPosterior):
assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!" assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
index = self.X._parent_index_
self.unlink_parameter(self.X) self.unlink_parameter(self.X)
self.X = X self.X = X
self.link_parameter(self.X) self.link_parameter(self.X, index=index)
else: else:
index = self.X._parent_index_
self.unlink_parameter(self.X) self.unlink_parameter(self.X)
from ..core import Param from ..core import Param
self.X = Param('latent mean',X) self.X = Param('latent mean', X)
self.link_parameter(self.X) self.link_parameter(self.X, index=index)
else: else:
self.X = ObsAr(X) self.X = ObsAr(X)
self.update_model(True) self.update_model(True)
@ -437,15 +439,22 @@ class GP(Model):
warnings.warn("Wrong naming, use predict_wishart_embedding instead. Will be removed in future versions!", DeprecationWarning) warnings.warn("Wrong naming, use predict_wishart_embedding instead. Will be removed in future versions!", DeprecationWarning)
return self.predict_wishart_embedding(Xnew, kern, mean, covariance) return self.predict_wishart_embedding(Xnew, kern, mean, covariance)
def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True): def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True, dimensions=None):
""" """
Predict the magnification factor as Predict the magnification factor as
sqrt(det(G)) sqrt(det(G))
for each point N in Xnew for each point N in Xnew.
:param bool mean: whether to include the mean of the wishart embedding.
:param bool covariance: whether to include the covariance of the wishart embedding.
:param array-like dimensions: which dimensions of the input space to use [defaults to self.get_most_significant_input_dimensions()[:2]]
""" """
G = self.predict_wishard_embedding(Xnew, kern, mean, covariance) G = self.predict_wishard_embedding(Xnew, kern, mean, covariance)
if dimensions is None:
dimensions = self.get_most_significant_input_dimensions()[:2]
G = G[:, dimensions][:,:,dimensions]
from ..util.linalg import jitchol from ..util.linalg import jitchol
mag = np.empty(Xnew.shape[0]) mag = np.empty(Xnew.shape[0])
for n in range(Xnew.shape[0]): for n in range(Xnew.shape[0]):
@ -525,21 +534,23 @@ class GP(Model):
def get_most_significant_input_dimensions(self, which_indices=None): def get_most_significant_input_dimensions(self, which_indices=None):
return self.kern.get_most_significant_input_dimensions(which_indices) return self.kern.get_most_significant_input_dimensions(which_indices)
def optimize(self, optimizer=None, start=None, **kwargs): def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs):
""" """
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be: kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations :param max_iters: maximum number of function evaluations
:type max_f_eval: int :type max_iters: int
:messages: whether to display during optimisation :messages: whether to display during optimisation
:type messages: bool :type messages: bool
:param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'. :param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'.
:type optimizer: string :type optimizer: string
:param bool ipython_notebook: whether to use ipython notebook widgets or not.
:param bool clear_after_finish: if in ipython notebook, we can clear the widgets after optimization.
""" """
self.inference_method.on_optimization_start() self.inference_method.on_optimization_start()
try: try:
super(GP, self).optimize(optimizer, start, **kwargs) super(GP, self).optimize(optimizer, start, messages, max_iters, ipython_notebook, clear_after_finish, **kwargs)
except KeyboardInterrupt: except KeyboardInterrupt:
print("KeyboardInterrupt caught, calling on_optimization_end() to round things up") print("KeyboardInterrupt caught, calling on_optimization_end() to round things up")
self.inference_method.on_optimization_end() self.inference_method.on_optimization_end()

View file

@ -340,7 +340,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
gtol=.05) gtol=.05)
if plot: if plot:
m.X.plot("BGPLVM Latent Space 1D") m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD()
return m return m
def gplvm_simulation(optimize=True, verbose=1, def gplvm_simulation(optimize=True, verbose=1,
@ -364,7 +364,7 @@ def gplvm_simulation(optimize=True, verbose=1,
gtol=.05) gtol=.05)
if plot: if plot:
m.X.plot("BGPLVM Latent Space 1D") m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD()
return m return m
def ssgplvm_simulation(optimize=True, verbose=1, def ssgplvm_simulation(optimize=True, verbose=1,
plot=True, plot_sim=False, plot=True, plot_sim=False,
@ -388,7 +388,7 @@ def ssgplvm_simulation(optimize=True, verbose=1,
gtol=.05) gtol=.05)
if plot: if plot:
m.X.plot("SSGPLVM Latent Space 1D") m.X.plot("SSGPLVM Latent Space 1D")
m.kern.plot_ARD('SSGPLVM Simulation ARD Parameters') m.kern.plot_ARD()
return m return m
def bgplvm_simulation_missing_data(optimize=True, verbose=1, def bgplvm_simulation_missing_data(optimize=True, verbose=1,
@ -418,7 +418,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
gtol=.05) gtol=.05)
if plot: if plot:
m.X.plot("BGPLVM Latent Space 1D") m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD()
return m return m
def bgplvm_simulation_missing_data_stochastics(optimize=True, verbose=1, def bgplvm_simulation_missing_data_stochastics(optimize=True, verbose=1,
@ -448,7 +448,7 @@ def bgplvm_simulation_missing_data_stochastics(optimize=True, verbose=1,
gtol=.05) gtol=.05)
if plot: if plot:
m.X.plot("BGPLVM Latent Space 1D") m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD()
return m return m
@ -469,7 +469,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
m.optimize(messages=verbose, max_iters=8e3) m.optimize(messages=verbose, max_iters=8e3)
if plot: if plot:
m.X.plot("MRD Latent Space 1D") m.X.plot("MRD Latent Space 1D")
m.plot_scales("MRD Scales") m.plot_scales()
return m return m
def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
@ -496,7 +496,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1) m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
if plot: if plot:
m.X.plot("MRD Latent Space 1D") m.X.plot("MRD Latent Space 1D")
m.plot_scales("MRD Scales") m.plot_scales()
return m return m
def brendan_faces(optimize=True, verbose=True, plot=True): def brendan_faces(optimize=True, verbose=True, plot=True):

View file

@ -40,6 +40,14 @@ class EPBase(object):
# TODO: update approximation in the end as well? Maybe even with a switch? # TODO: update approximation in the end as well? Maybe even with a switch?
pass pass
def __setstate__(self, state):
super(EPBase, self).__setstate__(state[0])
self.epsilon, self.eta, self.delta = state[1]
self.reset()
def __getstate__(self):
return [super(EPBase, self).__getstate__() , [self.epsilon, self.eta, self.delta]]
class EP(EPBase, ExactGaussianInference): class EP(EPBase, ExactGaussianInference):
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None): def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None):
if self.always_reset: if self.always_reset:
@ -51,7 +59,7 @@ class EP(EPBase, ExactGaussianInference):
if K is None: if K is None:
K = kern.K(X) K = kern.K(X)
if self._ep_approximation is None: if getattr(self, '_ep_approximation', None) is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata) mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
else: else:
@ -159,7 +167,7 @@ class EPDTC(EPBase, VarDTC):
else: else:
Kmn = psi1.T Kmn = psi1.T
if self._ep_approximation is None: if getattr(self, '_ep_approximation', None) is None:
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata) mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
else: else:
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation

View file

@ -10,7 +10,7 @@ from .src.add import Add
from .src.prod import Prod from .src.prod import Prod
from .src.rbf import RBF from .src.rbf import RBF
from .src.linear import Linear, LinearFull from .src.linear import Linear, LinearFull
from .src.static import Bias, White, Fixed, WhiteHeteroscedastic from .src.static import Bias, White, Fixed, WhiteHeteroscedastic, Precomputed
from .src.brownian import Brownian from .src.brownian import Brownian
from .src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine from .src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from .src.mlp import MLP from .src.mlp import MLP
@ -27,6 +27,7 @@ from .src.eq_ode2 import EQ_ODE2
from .src.integral import Integral from .src.integral import Integral
from .src.integral_limits import Integral_Limits from .src.integral_limits import Integral_Limits
from .src.multidimensional_integral_limits import Multidimensional_Integral_Limits from .src.multidimensional_integral_limits import Multidimensional_Integral_Limits
from .src.eq_ode1 import EQ_ODE1
from .src.trunclinear import TruncLinear,TruncLinear_inf from .src.trunclinear import TruncLinear,TruncLinear_inf
from .src.splitKern import SplitKern,DEtime from .src.splitKern import SplitKern,DEtime
from .src.splitKern import DEtime as DiffGenomeKern from .src.splitKern import DEtime as DiffGenomeKern

649
GPy/kern/src/eq_ode1.py Normal file
View file

@ -0,0 +1,649 @@
# Copyright (c) 2014, Cristian Guarnizo.
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy.special import erf, erfcx
from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
from paramz.caching import Cache_this
class EQ_ODE1(Kern):
"""
Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
This outputs of this kernel have the form
.. math::
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} u_i(t-\delta_j) - d_jy_j(t)
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`u_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
:param output_dim: number of outputs driven by latent function.
:type output_dim: int
:param W: sensitivities of each output to the latent driving function.
:type W: ndarray (output_dim x rank).
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
:type rank: int
:param decay: decay rates for the first order system.
:type decay: array of length output_dim.
:param delay: delay between latent force and output response.
:type delay: array of length output_dim.
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
:type kappa: array of length output_dim.
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
"""
def __init__(self, input_dim=2, output_dim=1, rank=1, W = None, lengthscale=None, decay=None, active_dims=None, name='eq_ode1'):
assert input_dim == 2, "only defined for 1 input dims"
super(EQ_ODE1, self).__init__(input_dim=input_dim, active_dims=active_dims, name=name)
self.rank = rank
self.output_dim = output_dim
if lengthscale is None:
lengthscale = .5 + np.random.rand(self.rank)
else:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size in [1, self.rank], "Bad number of lengthscales"
if lengthscale.size != self.rank:
lengthscale = np.ones(self.rank)*lengthscale
if W is None:
W = .5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
else:
assert W.shape == (self.output_dim, self.rank)
if decay is None:
decay = np.ones(self.output_dim)
else:
decay = np.asarray(decay)
assert decay.size in [1, self.output_dim], "Bad number of decay"
if decay.size != self.output_dim:
decay = np.ones(self.output_dim)*decay
# if kappa is None:
# self.kappa = np.ones(self.output_dim)
# else:
# kappa = np.asarray(kappa)
# assert kappa.size in [1, self.output_dim], "Bad number of kappa"
# if decay.size != self.output_dim:
# decay = np.ones(self.output_dim)*kappa
#self.kappa = Param('kappa', kappa, Logexp())
#self.delay = Param('delay', delay, Logexp())
#self.is_normalized = True
#self.is_stationary = False
#self.gaussian_initial = False
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
self.decay = Param('decay', decay, Logexp())
self.W = Param('W', W)
self.link_parameters(self.lengthscale, self.decay, self.W)
@Cache_this(limit=3)
def K(self, X, X2=None):
#This way is not working, indexes are lost after using k._slice_X
#index = np.asarray(X, dtype=np.int)
#index = index.reshape(index.size,)
if hasattr(X, 'values'):
X = X.values
index = np.int_(np.round(X[:, 1]))
index = index.reshape(index.size,)
X_flag = index[0] >= self.output_dim
if X2 is None:
if X_flag:
#Calculate covariance function for the latent functions
index -= self.output_dim
return self._Kuu(X, index)
else:
raise NotImplementedError
else:
#This way is not working, indexes are lost after using k._slice_X
#index2 = np.asarray(X2, dtype=np.int)
#index2 = index2.reshape(index2.size,)
if hasattr(X2, 'values'):
X2 = X2.values
index2 = np.int_(np.round(X2[:, 1]))
index2 = index2.reshape(index2.size,)
X2_flag = index2[0] >= self.output_dim
#Calculate cross-covariance function
if not X_flag and X2_flag:
index2 -= self.output_dim
return self._Kfu(X, index, X2, index2) #Kfu
elif X_flag and not X2_flag:
index -= self.output_dim
return self._Kfu(X2, index2, X, index).T #Kuf
elif X_flag and X2_flag:
index -= self.output_dim
index2 -= self.output_dim
return self._Kusu(X, index, X2, index2) #Ku_s u
else:
raise NotImplementedError #Kf_s f
#Calculate the covariance function for diag(Kff(X,X))
def Kdiag(self, X):
if hasattr(X, 'values'):
index = np.int_(np.round(X[:, 1].values))
else:
index = np.int_(np.round(X[:, 1]))
index = index.reshape(index.size,)
X_flag = index[0] >= self.output_dim
if X_flag: #Kuudiag
return np.ones(X[:,0].shape)
else: #Kffdiag
kdiag = self._Kdiag(X)
return np.sum(kdiag, axis=1)
def _Kdiag(self, X):
#This way is not working, indexes are lost after using k._slice_X
#index = np.asarray(X, dtype=np.int)
#index = index.reshape(index.size,)
if hasattr(X, 'values'):
X = X.values
index = np.int_(X[:, 1])
index = index.reshape(index.size,)
#terms that move along t
t = X[:, 0].reshape(X.shape[0], 1)
d = np.unique(index) #Output Indexes
B = self.decay.values[d]
S = self.W.values[d, :]
#Index transformation
indd = np.arange(self.output_dim)
indd[d] = np.arange(d.size)
index = indd[index]
B = B.reshape(B.size, 1)
#Terms that move along q
lq = self.lengthscale.values.reshape(1, self.rank)
S2 = S*S
kdiag = np.empty((t.size, ))
#Dx1 terms
c0 = (S2/B)*((.5*np.sqrt(np.pi))*lq)
#DxQ terms
nu = lq*(B*.5)
nu2 = nu*nu
#Nx1 terms
gamt = -2.*B
gamt = gamt[index]*t
#NxQ terms
t_lq = t/lq
# Upsilon Calculations
# Using wofz
#erfnu = erf(nu)
upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :] ,t_lq+nu[index,:] ))
upm[t[:, 0] == 0, :] = 0.
upv = np.exp(nu2[index, :] + gamt + lnDifErf( -t_lq+nu[index,:], nu[index, :] ) )
upv[t[:, 0] == 0, :] = 0.
#Covariance calculation
#kdiag = np.sum(c0[index, :]*(upm-upv), axis=1)
kdiag = c0[index, :]*(upm-upv)
return kdiag
def update_gradients_full(self, dL_dK, X, X2 = None):
#index = np.asarray(X, dtype=np.int)
#index = index.reshape(index.size,)
if hasattr(X, 'values'):
X = X.values
self.decay.gradient = np.zeros(self.decay.shape)
self.W.gradient = np.zeros(self.W.shape)
self.lengthscale.gradient = np.zeros(self.lengthscale.shape)
index = np.int_(np.round(X[:, 1]))
index = index.reshape(index.size,)
X_flag = index[0] >= self.output_dim
if X2 is None:
if X_flag: #Kuu or Kmm
index -= self.output_dim
tmp = dL_dK*self._gkuu_lq(X, index)
for q in np.unique(index):
ind = np.where(index == q)
self.lengthscale.gradient[q] = tmp[np.ix_(ind[0], ind[0])].sum()
else:
raise NotImplementedError
else: #Kfu or Knm
#index2 = np.asarray(X2, dtype=np.int)
#index2 = index2.reshape(index2.size,)
if hasattr(X2, 'values'):
X2 = X2.values
index2 = np.int_(np.round(X2[:, 1]))
index2 = index2.reshape(index2.size,)
X2_flag = index2[0] >= self.output_dim
if not X_flag and X2_flag: #Kfu
index2 -= self.output_dim
else: #Kuf
dL_dK = dL_dK.T #so we obtaing dL_Kfu
indtemp = index - self.output_dim
Xtemp = X
X = X2
X2 = Xtemp
index = index2
index2 = indtemp
glq, gSdq, gB = self._gkfu(X, index, X2, index2)
tmp = dL_dK*glq
for q in np.unique(index2):
ind = np.where(index2 == q)
self.lengthscale.gradient[q] = tmp[:, ind].sum()
tmpB = dL_dK*gB
tmp = dL_dK*gSdq
for d in np.unique(index):
ind = np.where(index == d)
self.decay.gradient[d] = tmpB[ind, :].sum()
for q in np.unique(index2):
ind2 = np.where(index2 == q)
self.W.gradient[d, q] = tmp[np.ix_(ind[0], ind2[0])].sum()
def update_gradients_diag(self, dL_dKdiag, X):
#index = np.asarray(X, dtype=np.int)
#index = index.reshape(index.size,)
if hasattr(X, 'values'):
X = X.values
self.decay.gradient = np.zeros(self.decay.shape)
self.W.gradient = np.zeros(self.W.shape)
self.lengthscale.gradient = np.zeros(self.lengthscale.shape)
index = np.int_(X[:, 1])
index = index.reshape(index.size,)
glq, gS, gB = self._gkdiag(X, index)
if dL_dKdiag.size == X.shape[0]:
dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1))
tmp = dL_dKdiag*glq
self.lengthscale.gradient = tmp.sum(0)
tmpB = dL_dKdiag*gB
tmp = dL_dKdiag*gS
for d in np.unique(index):
ind = np.where(index == d)
self.decay.gradient[d] = tmpB[ind, :].sum()
self.W.gradient[d, :] = tmp[ind].sum(0)
def gradients_X(self, dL_dK, X, X2=None):
#index = np.asarray(X, dtype=np.int)
#index = index.reshape(index.size,)
if hasattr(X, 'values'):
X = X.values
index = np.int_(np.round(X[:, 1]))
index = index.reshape(index.size,)
X_flag = index[0] >= self.output_dim
#If input_dim == 1, use this
#gX = np.zeros((X.shape[0], 1))
#Cheat to allow gradient for input_dim==2
gX = np.zeros(X.shape)
if X2 is None: #Kuu or Kmm
if X_flag:
index -= self.output_dim
gX[:, 0] = 2.*(dL_dK*self._gkuu_X(X, index)).sum(0)
return gX
else:
raise NotImplementedError
else: #Kuf or Kmn
#index2 = np.asarray(X2, dtype=np.int)
#index2 = index2.reshape(index2.size,)
if hasattr(X2, 'values'):
X2 = X2.values
index2 = np.int_(np.round(X2[:, 1]))
index2 = index2.reshape(index2.size,)
X2_flag = index2[0] >= self.output_dim
if X_flag and not X2_flag: #gradient of Kuf(Z, X) wrt Z
index -= self.output_dim
gX[:, 0] = (dL_dK*self._gkfu_z(X2, index2, X, index).T).sum(1)
return gX
else:
raise NotImplementedError
#---------------------------------------#
# Helper functions #
#---------------------------------------#
#Evaluation of squared exponential for LFM
def _Kuu(self, X, index):
index = index.reshape(index.size,)
t = X[:, 0].reshape(X.shape[0],)
lq = self.lengthscale.values.reshape(self.rank,)
lq2 = lq*lq
#Covariance matrix initialization
kuu = np.zeros((t.size, t.size))
#Assign 1. to diagonal terms
kuu[np.diag_indices(t.size)] = 1.
#Upper triangular indices
indtri1, indtri2 = np.triu_indices(t.size, 1)
#Block Diagonal indices among Upper Triangular indices
ind = np.where(index[indtri1] == index[indtri2])
indr = indtri1[ind]
indc = indtri2[ind]
r = t[indr] - t[indc]
r2 = r*r
#Calculation of covariance function
kuu[indr, indc] = np.exp(-r2/lq2[index[indr]])
#Completion of lower triangular part
kuu[indc, indr] = kuu[indr, indc]
return kuu
def _Kusu(self, X, index, X2, index2):
index = index.reshape(index.size,)
index2 = index2.reshape(index2.size,)
t = X[:, 0].reshape(X.shape[0],1)
t2 = X2[:, 0].reshape(1,X2.shape[0])
lq = self.lengthscale.values.reshape(self.rank,)
#Covariance matrix initialization
kuu = np.zeros((t.size, t2.size))
for q in range(self.rank):
ind1 = index == q
ind2 = index2 == q
r = t[ind1]/lq[q] - t2[0,ind2]/lq[q]
r2 = r*r
#Calculation of covariance function
kuu[np.ix_(ind1, ind2)] = np.exp(-r2)
return kuu
#Evaluation of cross-covariance function
def _Kfu(self, X, index, X2, index2):
#terms that move along t
t = X[:, 0].reshape(X.shape[0], 1)
d = np.unique(index) #Output Indexes
B = self.decay.values[d]
S = self.W.values[d, :]
#Index transformation
indd = np.arange(self.output_dim)
indd[d] = np.arange(d.size)
index = indd[index]
#Output related variables must be column-wise
B = B.reshape(B.size, 1)
#Input related variables must be row-wise
z = X2[:, 0].reshape(1, X2.shape[0])
lq = self.lengthscale.values.reshape((1, self.rank))
kfu = np.empty((t.size, z.size))
#DxQ terms
c0 = S*((.5*np.sqrt(np.pi))*lq)
nu = B*(.5*lq)
nu2 = nu**2
#1xM terms
z_lq = z/lq[0, index2]
#NxM terms
tz = t-z
tz_lq = tz/lq[0, index2]
# Upsilon Calculations
fullind = np.ix_(index, index2)
upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind]))
upsi[t[:, 0] == 0, :] = 0.
#Covariance calculation
kfu = c0[fullind]*upsi
return kfu
#Gradient of Kuu wrt lengthscale
def _gkuu_lq(self, X, index):
t = X[:, 0].reshape(X.shape[0],)
index = index.reshape(X.shape[0],)
lq = self.lengthscale.values.reshape(self.rank,)
lq2 = lq*lq
#Covariance matrix initialization
glq = np.zeros((t.size, t.size))
#Upper triangular indices
indtri1, indtri2 = np.triu_indices(t.size, 1)
#Block Diagonal indices among Upper Triangular indices
ind = np.where(index[indtri1] == index[indtri2])
indr = indtri1[ind]
indc = indtri2[ind]
r = t[indr] - t[indc]
r2 = r*r
r2_lq2 = r2/lq2[index[indr]]
#Calculation of covariance function
er2_lq2 = np.exp(-r2_lq2)
#Gradient wrt lq
c = 2.*r2_lq2/lq[index[indr]]
glq[indr, indc] = er2_lq2*c
#Complete the lower triangular
glq[indc, indr] = glq[indr, indc]
return glq
#Be careful this derivative should be transpose it
def _gkuu_X(self, X, index): #Diagonal terms are always zero
t = X[:, 0].reshape(X.shape[0],)
index = index.reshape(index.size,)
lq = self.lengthscale.values.reshape(self.rank,)
lq2 = lq*lq
#Covariance matrix initialization
gt = np.zeros((t.size, t.size))
#Upper triangular indices
indtri1, indtri2 = np.triu_indices(t.size, 1) #Offset of 1 from the diagonal
#Block Diagonal indices among Upper Triangular indices
ind = np.where(index[indtri1] == index[indtri2])
indr = indtri1[ind]
indc = indtri2[ind]
r = t[indr] - t[indc]
r2 = r*r
r2_lq2 = r2/(-lq2[index[indr]])
#Calculation of covariance function
er2_lq2 = np.exp(r2_lq2)
#Gradient wrt t
c = 2.*r/lq2[index[indr]]
gt[indr, indc] = er2_lq2*c
#Complete the lower triangular
gt[indc, indr] = -gt[indr, indc]
return gt
#Gradients for Diagonal Kff
def _gkdiag(self, X, index):
index = index.reshape(index.size,)
#terms that move along t
d = np.unique(index)
B = self.decay[d].values
S = self.W[d, :].values
#Index transformation
indd = np.arange(self.output_dim)
indd[d] = np.arange(d.size)
index = indd[index]
#Output related variables must be column-wise
t = X[:, 0].reshape(X.shape[0], 1)
B = B.reshape(B.size, 1)
S2 = S*S
#Input related variables must be row-wise
lq = self.lengthscale.values.reshape(1, self.rank)
gB = np.empty((t.size,))
glq = np.empty((t.size, lq.size))
gS = np.empty((t.size, lq.size))
#Dx1 terms
c0 = S2*lq*np.sqrt(np.pi)
#DxQ terms
nu = (.5*lq)*B
nu2 = nu*nu
#Nx1 terms
gamt = -B[index]*t
egamt = np.exp(gamt)
e2gamt = egamt*egamt
#NxQ terms
t_lq = t/lq
t2_lq2 = -t_lq*t_lq
etlq2gamt = np.exp(t2_lq2 + gamt) #NXQ
##Upsilon calculations
#erfnu = erf(nu) #TODO: This can be improved
upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :], t_lq + nu[index, :]) )
upm[t[:, 0] == 0, :] = 0.
upv = np.exp(nu2[index, :] + 2.*gamt + lnDifErf(-t_lq + nu[index, :], nu[index, :]) ) #egamt*upv
upv[t[:, 0] == 0, :] = 0.
#Gradient wrt S
c0_S = (S/B)*(lq*np.sqrt(np.pi))
gS = c0_S[index]*(upm - upv)
#For B
CB1 = (.5*lq)**2 - .5/B**2 #DXQ
lq2_2B = (.5*lq**2)*(S2/B) #DXQ
CB2 = 2.*etlq2gamt - e2gamt - 1. #NxQ
# gradient wrt B NxZ
gB = c0[index, :]*(CB1[index, :]*upm - (CB1[index, :] - t/B[index])*upv) + \
lq2_2B[index, :]*CB2
#Gradient wrt lengthscale
#DxQ terms
c0 = (.5*np.sqrt(np.pi))*(S2/B)*(1.+.5*(lq*B)**2)
Clq1 = S2*(lq*.5)
glq = c0[index]*(upm - upv) + Clq1[index]*CB2
return glq, gS, gB
def _gkfu(self, X, index, Z, index2):
index = index.reshape(index.size,)
#TODO: reduce memory usage
#terms that move along t
d = np.unique(index)
B = self.decay[d].values
S = self.W[d, :].values
#Index transformation
indd = np.arange(self.output_dim)
indd[d] = np.arange(d.size)
index = indd[index]
#t column
t = X[:, 0].reshape(X.shape[0], 1)
B = B.reshape(B.size, 1)
#z row
z = Z[:, 0].reshape(1, Z.shape[0])
index2 = index2.reshape(index2.size,)
lq = self.lengthscale.values.reshape((1, self.rank))
#kfu = np.empty((t.size, z.size))
glq = np.empty((t.size, z.size))
gSdq = np.empty((t.size, z.size))
gB = np.empty((t.size, z.size))
#Dx1 terms
B_2 = B*.5
S_pi = S*(.5*np.sqrt(np.pi))
#DxQ terms
c0 = S_pi*lq #lq*Sdq*sqrt(pi)
nu = B*lq*.5
nu2 = nu*nu
#1xM terms
z_lq = z/lq[0, index2]
#NxM terms
tz = t-z
tz_lq = tz/lq[0, index2]
etz_lq2 = -np.exp(-tz_lq*tz_lq)
ez_lq_Bt = np.exp(-z_lq*z_lq -B[index]*t)
# Upsilon calculations
fullind = np.ix_(index, index2)
upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind] ) )
upsi[t[:, 0] == 0., :] = 0.
#Gradient wrt S
#DxQ term
Sa1 = lq*(.5*np.sqrt(np.pi))
gSdq = Sa1[0,index2]*upsi
#Gradient wrt lq
la1 = S_pi*(1. + 2.*nu2)
Slq = S*lq
uplq = etz_lq2*(tz_lq/lq[0, index2] + B_2[index])
uplq += ez_lq_Bt*(-z_lq/lq[0, index2] + B_2[index])
glq = la1[fullind]*upsi
glq += Slq[fullind]*uplq
#Gradient wrt B
Slq = Slq*lq
nulq = nu*lq
upBd = etz_lq2 + ez_lq_Bt
gB = c0[fullind]*(nulq[fullind] - tz)*upsi + .5*Slq[fullind]*upBd
return glq, gSdq, gB
#TODO: reduce memory usage
def _gkfu_z(self, X, index, Z, index2): #Kfu(t,z)
index = index.reshape(index.size,)
#terms that move along t
d = np.unique(index)
B = self.decay[d].values
S = self.W[d, :].values
#Index transformation
indd = np.arange(self.output_dim)
indd[d] = np.arange(d.size)
index = indd[index]
#t column
t = X[:, 0].reshape(X.shape[0], 1)
B = B.reshape(B.size, 1)
#z row
z = Z[:, 0].reshape(1, Z.shape[0])
index2 = index2.reshape(index2.size,)
lq = self.lengthscale.values.reshape((1, self.rank))
#kfu = np.empty((t.size, z.size))
gz = np.empty((t.size, z.size))
#Dx1 terms
S_pi =S*(.5*np.sqrt(np.pi))
#DxQ terms
#Slq = S*lq
c0 = S_pi*lq #lq*Sdq*sqrt(pi)
nu = (.5*lq)*B
nu2 = nu*nu
#1xM terms
z_lq = z/lq[0, index2]
z_lq2 = -z_lq*z_lq
#NxQ terms
t_lq = t/lq
#NxM terms
zt_lq = z_lq - t_lq[:, index2]
zt_lq2 = -zt_lq*zt_lq
# Upsilon calculations
fullind = np.ix_(index, index2)
z2 = z_lq + nu[fullind]
z1 = z2 - t_lq[:, index2]
upsi = np.exp(nu2[fullind] - B[index]*(t-z) + lnDifErf(z1,z2) )
upsi[t[:, 0] == 0., :] = 0.
#Gradient wrt z
za1 = c0*B
#za2 = S_w
gz = za1[fullind]*upsi + S[fullind]*( np.exp(z_lq2 - B[index]*t) -np.exp(zt_lq2) )
return gz
def lnDifErf(z1,z2):
#Z2 is always positive
logdiferf = np.zeros(z1.shape)
ind = np.where(z1>0.)
ind2 = np.where(z1<=0.)
if ind[0].shape > 0:
z1i = z1[ind]
z12 = z1i*z1i
z2i = z2[ind]
logdiferf[ind] = -z12 + np.log(erfcx(z1i) - erfcx(z2i)*np.exp(z12-z2i**2))
if ind2[0].shape > 0:
z1i = z1[ind2]
z2i = z2[ind2]
logdiferf[ind2] = np.log(erf(z2i) - erf(z1i))
return logdiferf

View file

@ -44,7 +44,7 @@ class EQ_ODE2(Kern):
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
assert lengthscale.size in [1, self.rank], "Bad number of lengthscales" assert lengthscale.size in [1, self.rank], "Bad number of lengthscales"
if lengthscale.size != self.rank: if lengthscale.size != self.rank:
lengthscale = np.ones(self.input_dim)*lengthscale lengthscale = np.ones(self.rank)*lengthscale
if W is None: if W is None:
#W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank) #W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
@ -71,7 +71,7 @@ class EQ_ODE2(Kern):
#index = index.reshape(index.size,) #index = index.reshape(index.size,)
if hasattr(X, 'values'): if hasattr(X, 'values'):
X = X.values X = X.values
index = np.int_(X[:, 1]) index = np.int_(np.round(X[:, 1]))
index = index.reshape(index.size,) index = index.reshape(index.size,)
X_flag = index[0] >= self.output_dim X_flag = index[0] >= self.output_dim
if X2 is None: if X2 is None:
@ -79,7 +79,7 @@ class EQ_ODE2(Kern):
#Calculate covariance function for the latent functions #Calculate covariance function for the latent functions
index -= self.output_dim index -= self.output_dim
return self._Kuu(X, index) return self._Kuu(X, index)
else: else: #Kff full
raise NotImplementedError raise NotImplementedError
else: else:
#This way is not working, indexes are lost after using k._slice_X #This way is not working, indexes are lost after using k._slice_X
@ -87,19 +87,40 @@ class EQ_ODE2(Kern):
#index2 = index2.reshape(index2.size,) #index2 = index2.reshape(index2.size,)
if hasattr(X2, 'values'): if hasattr(X2, 'values'):
X2 = X2.values X2 = X2.values
index2 = np.int_(X2[:, 1]) index2 = np.int_(np.round(X2[:, 1]))
index2 = index2.reshape(index2.size,) index2 = index2.reshape(index2.size,)
X2_flag = index2[0] >= self.output_dim X2_flag = index2[0] >= self.output_dim
#Calculate cross-covariance function #Calculate cross-covariance function
if not X_flag and X2_flag: if not X_flag and X2_flag:
index2 -= self.output_dim index2 -= self.output_dim
return self._Kfu(X, index, X2, index2) #Kfu return self._Kfu(X, index, X2, index2) #Kfu
else: elif X_flag and not X2_flag:
index -= self.output_dim index -= self.output_dim
return self._Kfu(X2, index2, X, index).T #Kuf return self._Kfu(X2, index2, X, index).T #Kuf
elif X_flag and X2_flag:
index -= self.output_dim
index2 -= self.output_dim
return self._Kusu(X, index, X2, index2) #Ku_s u
else:
raise NotImplementedError #Kf_s f
#Calculate the covariance function for diag(Kff(X,X)) #Calculate the covariance function for diag(Kff(X,X))
def Kdiag(self, X): def Kdiag(self, X):
if hasattr(X, 'values'):
index = np.int_(np.round(X[:, 1].values))
else:
index = np.int_(np.round(X[:, 1]))
index = index.reshape(index.size,)
X_flag = index[0] >= self.output_dim
if X_flag: #Kuudiag
return np.ones(X[:,0].shape)
else: #Kffdiag
kdiag = self._Kdiag(X)
return np.sum(kdiag, axis=1)
#Calculate the covariance function for diag(Kff(X,X))
def _Kdiag(self, X):
#This way is not working, indexes are lost after using k._slice_X #This way is not working, indexes are lost after using k._slice_X
#index = np.asarray(X, dtype=np.int) #index = np.asarray(X, dtype=np.int)
#index = index.reshape(index.size,) #index = index.reshape(index.size,)
@ -132,7 +153,7 @@ class EQ_ODE2(Kern):
#Terms that move along q #Terms that move along q
lq = self.lengthscale.values.reshape(1, self.lengthscale.size) lq = self.lengthscale.values.reshape(1, self.lengthscale.size)
S2 = S*S S2 = S*S
kdiag = np.empty((t.size, )) kdiag = np.empty((t.size, lq.size))
indD = np.arange(B.size) indD = np.arange(B.size)
#(1) When wd is real #(1) When wd is real
@ -187,8 +208,8 @@ class EQ_ODE2(Kern):
upv[t1[:, 0] == 0, :] = 0. upv[t1[:, 0] == 0, :] = 0.
#Covariance calculation #Covariance calculation
kdiag[ind3t] = np.sum(np.real(K01[ind]*upm), axis=1) kdiag[ind3t] = np.real(K01[ind]*upm)
kdiag[ind3t] += np.sum(np.real((c0[ind]*ec)*upv), axis=1) kdiag[ind3t] += np.real((c0[ind]*ec)*upv)
#(2) When w_d is complex #(2) When w_d is complex
if np.any(wbool): if np.any(wbool):
@ -265,7 +286,7 @@ class EQ_ODE2(Kern):
upvc[t1[:, 0] == 0, :] = 0. upvc[t1[:, 0] == 0, :] = 0.
#Covariance calculation #Covariance calculation
kdiag[ind2t] = np.sum(K011[ind]*upm + K012[ind]*upmc + (c0[ind]*ec)*upv + (c0[ind]*ec2)*upvc, axis=1) kdiag[ind2t] = K011[ind]*upm + K012[ind]*upmc + (c0[ind]*ec)*upv + (c0[ind]*ec2)*upvc
return kdiag return kdiag
def update_gradients_full(self, dL_dK, X, X2 = None): def update_gradients_full(self, dL_dK, X, X2 = None):
@ -336,16 +357,17 @@ class EQ_ODE2(Kern):
index = index.reshape(index.size,) index = index.reshape(index.size,)
glq, gS, gB, gC = self._gkdiag(X, index) glq, gS, gB, gC = self._gkdiag(X, index)
tmp = dL_dKdiag.reshape(index.size, 1)*glq if dL_dKdiag.size == X.shape[0]:
dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1))
tmp = dL_dKdiag*glq
self.lengthscale.gradient = tmp.sum(0) self.lengthscale.gradient = tmp.sum(0)
#TODO: Avoid the reshape by a priori knowing the shape of dL_dKdiag tmpB = dL_dKdiag*gB
tmpB = dL_dKdiag*gB.reshape(dL_dKdiag.shape) tmpC = dL_dKdiag*gC
tmpC = dL_dKdiag*gC.reshape(dL_dKdiag.shape) tmp = dL_dKdiag*gS
tmp = dL_dKdiag.reshape(index.size, 1)*gS
for d in np.unique(index): for d in np.unique(index):
ind = np.where(index == d) ind = np.where(index == d)
self.B.gradient[d] = tmpB[ind].sum() self.B.gradient[d] = tmpB[ind, :].sum()
self.C.gradient[d] = tmpC[ind].sum() self.C.gradient[d] = tmpC[ind, :].sum()
self.W.gradient[d, :] = tmp[ind].sum(0) self.W.gradient[d, :] = tmp[ind].sum(0)
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
@ -410,6 +432,23 @@ class EQ_ODE2(Kern):
kuu[indc, indr] = kuu[indr, indc] kuu[indc, indr] = kuu[indr, indc]
return kuu return kuu
def _Kusu(self, X, index, X2, index2):
index = index.reshape(index.size,)
index2 = index2.reshape(index2.size,)
t = X[:, 0].reshape(X.shape[0],1)
t2 = X2[:, 0].reshape(1,X2.shape[0])
lq = self.lengthscale.values.reshape(self.rank,)
#Covariance matrix initialization
kuu = np.zeros((t.size, t2.size))
for q in range(self.rank):
ind1 = index == q
ind2 = index2 == q
r = t[ind1]/lq[q] - t2[0,ind2]/lq[q]
r2 = r*r
#Calculation of covariance function
kuu[np.ix_(ind1, ind2)] = np.exp(-r2)
return kuu
#Evaluation of cross-covariance function #Evaluation of cross-covariance function
def _Kfu(self, X, index, X2, index2): def _Kfu(self, X, index, X2, index2):
#terms that move along t #terms that move along t
@ -632,8 +671,8 @@ class EQ_ODE2(Kern):
lq = self.lengthscale.values.reshape(1, self.rank) lq = self.lengthscale.values.reshape(1, self.rank)
lq2 = lq*lq lq2 = lq*lq
gB = np.empty((t.size,)) gB = np.empty((t.size, lq.size))
gC = np.empty((t.size,)) gC = np.empty((t.size, lq.size))
glq = np.empty((t.size, lq.size)) glq = np.empty((t.size, lq.size))
gS = np.empty((t.size, lq.size)) gS = np.empty((t.size, lq.size))
@ -723,8 +762,8 @@ class EQ_ODE2(Kern):
Ba4_1 = (S2lq*lq)*dgam_dB/w2 Ba4_1 = (S2lq*lq)*dgam_dB/w2
Ba4 = Ba4_1*c Ba4 = Ba4_1*c
gB[ind3t] = np.sum(np.real(Ba1[ind]*upm) - np.real(((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv)\ gB[ind3t] = np.real(Ba1[ind]*upm) - np.real(((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv)\
+ np.real(Ba4[ind]*upmd) + np.real((Ba4_1[ind]*ec)*upvd), axis=1) + np.real(Ba4[ind]*upmd) + np.real((Ba4_1[ind]*ec)*upvd)
# gradient wrt C # gradient wrt C
dw_dC = - alphad*dw_dB dw_dC = - alphad*dw_dB
@ -738,8 +777,8 @@ class EQ_ODE2(Kern):
Ca4_1 = (S2lq*lq)*dgam_dC/w2 Ca4_1 = (S2lq*lq)*dgam_dC/w2
Ca4 = Ca4_1*c Ca4 = Ca4_1*c
gC[ind3t] = np.sum(np.real(Ca1[ind]*upm) - np.real(((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv)\ gC[ind3t] = np.real(Ca1[ind]*upm) - np.real(((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv)\
+ np.real(Ca4[ind]*upmd) + np.real((Ca4_1[ind]*ec)*upvd), axis=1) + np.real(Ca4[ind]*upmd) + np.real((Ca4_1[ind]*ec)*upvd)
#Gradient wrt lengthscale #Gradient wrt lengthscale
#DxQ terms #DxQ terms
@ -868,10 +907,10 @@ class EQ_ODE2(Kern):
Ba2_1c = c0*(dgamc_dB*(0.5/gamc2 - 0.25*lq2) + 0.5/(w2*gamc)) Ba2_1c = c0*(dgamc_dB*(0.5/gamc2 - 0.25*lq2) + 0.5/(w2*gamc))
Ba2_2c = c0*dgamc_dB/gamc Ba2_2c = c0*dgamc_dB/gamc
gB[ind2t] = np.sum(Ba1[ind]*upm - ((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv\ gB[ind2t] = Ba1[ind]*upm - ((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv\
+ Ba4[ind]*upmd + (Ba4_1[ind]*ec)*upvd\ + Ba4[ind]*upmd + (Ba4_1[ind]*ec)*upvd\
+ Ba1c[ind]*upmc - ((Ba2_1c[ind] + Ba2_2c[ind]*t1)*egamct - Ba3c[ind]*egamt)*upvc\ + Ba1c[ind]*upmc - ((Ba2_1c[ind] + Ba2_2c[ind]*t1)*egamct - Ba3c[ind]*egamt)*upvc\
+ Ba4c[ind]*upmdc + (Ba4_1c[ind]*ec2)*upvdc, axis=1) + Ba4c[ind]*upmdc + (Ba4_1c[ind]*ec2)*upvdc
##Gradient wrt C ##Gradient wrt C
dw_dC = 0.5*alphad/w dw_dC = 0.5*alphad/w
@ -895,10 +934,10 @@ class EQ_ODE2(Kern):
Ca4_1c = S2lq2*(dgamc_dC/w2) Ca4_1c = S2lq2*(dgamc_dC/w2)
Ca4c = Ca4_1c*c2 Ca4c = Ca4_1c*c2
gC[ind2t] = np.sum(Ca1[ind]*upm - ((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv\ gC[ind2t] = Ca1[ind]*upm - ((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv\
+ Ca4[ind]*upmd + (Ca4_1[ind]*ec)*upvd\ + Ca4[ind]*upmd + (Ca4_1[ind]*ec)*upvd\
+ Ca1c[ind]*upmc - ((Ca2_1c[ind] + Ca2_2c[ind]*t1)*egamct - (Ca3_1c[ind] + Ca3_2c[ind]*t1)*egamt)*upvc\ + Ca1c[ind]*upmc - ((Ca2_1c[ind] + Ca2_2c[ind]*t1)*egamct - (Ca3_1c[ind] + Ca3_2c[ind]*t1)*egamt)*upvc\
+ Ca4c[ind]*upmdc + (Ca4_1c[ind]*ec2)*upvdc, axis=1) + Ca4c[ind]*upmdc + (Ca4_1c[ind]*ec2)*upvdc
#Gradient wrt lengthscale #Gradient wrt lengthscale
#DxQ terms #DxQ terms

View file

@ -181,7 +181,7 @@ class Fixed(Static):
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K) self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K) self.variance.gradient = np.einsum('i,i', dL_dKdiag, np.diagonal(self.fixed_K))
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64) return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
@ -192,3 +192,53 @@ class Fixed(Static):
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum() self.variance.gradient = dL_dpsi0.sum()
class Precomputed(Fixed):
def __init__(self, input_dim, covariance_matrix, variance=1., active_dims=None, name='precomputed'):
"""
Class for precomputed kernels, indexed by columns in X
Usage example:
import numpy as np
from GPy.models import GPClassification
from GPy.kern import Precomputed
from sklearn.cross_validation import LeaveOneOut
n = 10
d = 100
X = np.arange(n).reshape((n,1)) # column vector of indices
y = 2*np.random.binomial(1,0.5,(n,1))-1
X0 = np.random.randn(n,d)
k = np.dot(X0,X0.T)
kern = Precomputed(1,k) # k is a n x n covariance matrix
cv = LeaveOneOut(n)
ypred = y.copy()
for train, test in cv:
m = GPClassification(X[train], y[train], kernel=kern)
m.optimize()
ypred[test] = 2*(m.predict(X[test])[0]>0.5)-1
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
super(Precomputed, self).__init__(input_dim, covariance_matrix, variance, active_dims, name)
def K(self, X, X2=None):
if X2 is None:
return self.variance * self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')]
else:
return self.variance * self.fixed_K[X[:,0].astype('int')][:,X2[:,0].astype('int')]
def Kdiag(self, X):
return self.variance * self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')].diagonal()
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')])
else:
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K[X[:,0].astype('int')][:,X2[:,0].astype('int')])
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')])

View file

@ -315,10 +315,10 @@ class Exponential(Stationary):
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name) super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r): def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r) return self.variance * np.exp(-r)
def dK_dr(self, r): def dK_dr(self, r):
return -0.5*self.K_of_r(r) return -self.K_of_r(r)
# def sde(self): # def sde(self):
# """ # """

View file

@ -24,3 +24,5 @@ from .one_vs_all_sparse_classification import OneVsAllSparseClassification
from .dpgplvm import DPBayesianGPLVM from .dpgplvm import DPBayesianGPLVM
from .state_space_model import StateSpace from .state_space_model import StateSpace
from .ibp_lfm import IBPLFM

535
GPy/models/ibp_lfm.py Normal file
View file

@ -0,0 +1,535 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.sparse_gp_mpi import SparseGP_MPI
from .. import kern
from ..util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, pdinv
from ..util import diag
from ..core.parameterization import Param
from ..likelihoods import Gaussian
from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
from ..inference.latent_function_inference.posterior import Posterior
from GPy.core.parameterization.variational import VariationalPrior
from ..core.parameterization.parameterized import Parameterized
from paramz.transformations import Logexp, Logistic, __fixed__
log_2_pi = np.log(2*np.pi)
class VarDTC_minibatch_IBPLFM(VarDTC_minibatch):
'''
Modifications of VarDTC_minibatch for IBP LFM
'''
def __init__(self, batchsize=None, limit=3, mpi_comm=None):
super(VarDTC_minibatch_IBPLFM, self).__init__(batchsize, limit, mpi_comm)
def gatherPsiStat(self, kern, X, Z, Y, beta, Zp):
het_noise = beta.size > 1
assert beta.size == 1
trYYT = self.get_trYYT(Y)
if self.Y_speedup and not het_noise:
Y = self.get_YYTfactor(Y)
num_inducing = Z.shape[0]
num_data, output_dim = Y.shape
batchsize = num_data if self.batchsize is None else self.batchsize
psi2_full = np.zeros((num_inducing, num_inducing)) # MxM
psi1Y_full = np.zeros((output_dim, num_inducing)) # DxM
psi0_full = 0.
YRY_full = 0.
for n_start in range(0, num_data, batchsize):
n_end = min(batchsize+n_start, num_data)
if batchsize == num_data:
Y_slice = Y
X_slice = X
else:
Y_slice = Y[n_start:n_end]
X_slice = X[n_start:n_end]
if het_noise:
b = beta[n_start]
YRY_full += np.inner(Y_slice, Y_slice)*b
else:
b = beta
psi0 = kern._Kdiag(X_slice) #Kff^q
psi1 = kern.K(X_slice, Z) #Kfu
indX = X_slice.values
indX = np.int_(np.round(indX[:, -1]))
Zp = Zp.gamma.values
# Extend Zp across columns
indZ = Z.values
indZ = np.int_(np.round(indZ[:, -1])) - Zp.shape[0]
Zpq = Zp[:, indZ]
for d in np.unique(indX):
indd = indX == d
psi1d = psi1[indd, :]
Zpd = Zp[d, :]
Zp2 = Zpd[:, None]*Zpd[None, :] - np.diag(np.power(Zpd, 2)) + np.diag(Zpd)
psi2_full += (np.dot(psi1d.T, psi1d)*Zp2[np.ix_(indZ, indZ)])*b #Zp2*Kufd*Kfud*beta
psi0_full += np.sum(psi0*Zp[indX, :])*b
psi1Y_full += np.dot(Y_slice.T, psi1*Zpq[indX, :])*b
if not het_noise:
YRY_full = trYYT*beta
if self.mpi_comm is not None:
from mpi4py import MPI
psi0_all = np.array(psi0_full)
psi1Y_all = psi1Y_full.copy()
psi2_all = psi2_full.copy()
YRY_all = np.array(YRY_full)
self.mpi_comm.Allreduce([psi0_full, MPI.DOUBLE], [psi0_all, MPI.DOUBLE])
self.mpi_comm.Allreduce([psi1Y_full, MPI.DOUBLE], [psi1Y_all, MPI.DOUBLE])
self.mpi_comm.Allreduce([psi2_full, MPI.DOUBLE], [psi2_all, MPI.DOUBLE])
self.mpi_comm.Allreduce([YRY_full, MPI.DOUBLE], [YRY_all, MPI.DOUBLE])
return psi0_all, psi1Y_all, psi2_all, YRY_all
return psi0_full, psi1Y_full, psi2_full, YRY_full
def inference_likelihood(self, kern, X, Z, likelihood, Y, Zp):
"""
The first phase of inference:
Compute: log-likelihood, dL_dKmm
Cached intermediate results: Kmm, KmmInv,
"""
num_data, output_dim = Y.shape
input_dim = Z.shape[0]
if self.mpi_comm is not None:
from mpi4py import MPI
num_data_all = np.array(num_data,dtype=np.int32)
self.mpi_comm.Allreduce([np.int32(num_data), MPI.INT], [num_data_all, MPI.INT])
num_data = num_data_all
#see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1
if het_noise:
self.batchsize = 1
psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, Zp)
#======================================================================
# Compute Common Components
#======================================================================
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
if not np.isfinite(Kmm).all():
print(Kmm)
Lm = jitchol(Kmm)
LmInv = dtrtri(Lm)
LmInvPsi2LmInvT = np.dot(LmInv, np.dot(psi2_full, LmInv.T))
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
LL = jitchol(Lambda)
LLInv = dtrtri(LL)
logdet_L = 2.*np.sum(np.log(np.diag(LL)))
LmLLInv = np.dot(LLInv, LmInv)
b = np.dot(psi1Y_full, LmLLInv.T)
bbt = np.sum(np.square(b))
v = np.dot(b, LmLLInv).T
LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
tmp = -np.dot(np.dot(LLInv.T, LLinvPsi1TYYTPsi1LLinvT + output_dim*np.eye(input_dim)), LLInv)
dL_dpsi2R = .5*np.dot(np.dot(LmInv.T, tmp + output_dim*np.eye(input_dim)), LmInv)
# Cache intermediate results
self.midRes['dL_dpsi2R'] = dL_dpsi2R
self.midRes['v'] = v
#======================================================================
# Compute log-likelihood
#======================================================================
if het_noise:
logL_R = -np.sum(np.log(beta))
else:
logL_R = -num_data*np.log(beta)
logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)*.5 - output_dim*logdet_L*.5
#======================================================================
# Compute dL_dKmm
#======================================================================
dL_dKmm = dL_dpsi2R - .5*output_dim*np.dot(np.dot(LmInv.T, LmInvPsi2LmInvT), LmInv)
#======================================================================
# Compute the Posterior distribution of inducing points p(u|Y)
#======================================================================
if not self.Y_speedup or het_noise:
wd_inv = backsub_both_sides(Lm, np.eye(input_dim)- backsub_both_sides(LL, np.identity(input_dim), transpose='left'), transpose='left')
post = Posterior(woodbury_inv=wd_inv, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
else:
post = None
#======================================================================
# Compute dL_dthetaL for uncertian input and non-heter noise
#======================================================================
if not het_noise:
dL_dthetaL = .5*(YRY_full*beta + beta*output_dim*psi0_full - num_data*output_dim*beta) - beta*(dL_dpsi2R*psi2_full).sum() - beta*(v.T*psi1Y_full).sum()
self.midRes['dL_dthetaL'] = dL_dthetaL
return logL, dL_dKmm, post
def inference_minibatch(self, kern, X, Z, likelihood, Y, Zp):
"""
The second phase of inference: Computing the derivatives over a minibatch of Y
Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
return a flag showing whether it reached the end of Y (isEnd)
"""
num_data, output_dim = Y.shape
#see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
#self.YYTfactor = beta*self.get_YYTfactor(Y)
if self.Y_speedup and not het_noise:
YYT_factor = self.get_YYTfactor(Y)
else:
YYT_factor = Y
n_start = self.batch_pos
batchsize = num_data if self.batchsize is None else self.batchsize
n_end = min(batchsize+n_start, num_data)
if n_end == num_data:
isEnd = True
self.batch_pos = 0
else:
isEnd = False
self.batch_pos = n_end
if batchsize == num_data:
Y_slice = YYT_factor
X_slice = X
else:
Y_slice = YYT_factor[n_start:n_end]
X_slice = X[n_start:n_end]
psi0 = kern._Kdiag(X_slice) #Kffdiag
psi1 = kern.K(X_slice, Z) #Kfu
betapsi1 = np.einsum('n,nm->nm', beta, psi1)
X_slice = X_slice.values
Z = Z.values
Zp = Zp.gamma.values
indX = np.int_(X_slice[:, -1])
indZ = np.int_(Z[:, -1]) - Zp.shape[0]
betaY = beta*Y_slice
#======================================================================
# Load Intermediate Results
#======================================================================
dL_dpsi2R = self.midRes['dL_dpsi2R']
v = self.midRes['v']
#======================================================================
# Compute dL_dpsi
#======================================================================
dL_dpsi0 = -.5*output_dim*(beta * Zp[indX, :]) #XxQ #TODO: Check this gradient
dL_dpsi1 = np.dot(betaY, v.T)
dL_dEZp = psi1*dL_dpsi1
dL_dpsi1 = Zp[np.ix_(indX, indZ)]*dL_dpsi1
dL_dgamma = np.zeros(Zp.shape)
for d in np.unique(indX):
indd = indX == d
betapsi1d = betapsi1[indd, :]
psi1d = psi1[indd, :]
Zpd = Zp[d, :]
Zp2 = Zpd[:, None]*Zpd[None, :] - np.diag(np.power(Zpd, 2)) + np.diag(Zpd)
dL_dpsi1[indd, :] += np.dot(betapsi1d, Zp2[np.ix_(indZ, indZ)] * dL_dpsi2R)*2.
dL_EZp2 = dL_dpsi2R * (np.dot(psi1d.T, psi1d) * beta)*2. # Zpd*Kufd*Kfud*beta
#Gradient of Likelihood wrt gamma is calculated here
EZ = Zp[d, indZ]
for q in range(Zp.shape[1]):
EZt = EZ.copy()
indq = indZ == q
EZt[indq] = .5
dL_dgamma[d, q] = np.sum(dL_dEZp[np.ix_(indd, indq)]) + np.sum(dL_EZp2[:, indq]*EZt[:, None]) -\
.5*beta*(np.sum(psi0[indd, q]))
#======================================================================
# Compute dL_dthetaL
#======================================================================
if isEnd:
dL_dthetaL = self.midRes['dL_dthetaL']
else:
dL_dthetaL = 0.
grad_dict = {'dL_dKdiag': dL_dpsi0,
'dL_dKnm': dL_dpsi1,
'dL_dthetaL': dL_dthetaL,
'dL_dgamma': dL_dgamma}
return isEnd, (n_start, n_end), grad_dict
def update_gradients(model, mpi_comm=None):
if mpi_comm is None:
Y = model.Y
X = model.X
else:
Y = model.Y_local
X = model.X[model.N_range[0]:model.N_range[1]]
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y, model.Zp)
het_noise = model.likelihood.variance.size > 1
if het_noise:
dL_dthetaL = np.empty((model.Y.shape[0],))
else:
dL_dthetaL = np.float64(0.)
kern_grad = model.kern.gradient.copy()
kern_grad[:] = 0.
model.Z.gradient = 0.
gamma_gradient = model.Zp.gamma.copy()
gamma_gradient[:] = 0.
isEnd = False
while not isEnd:
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, X, model.Z, model.likelihood, Y, model.Zp)
if (n_range[1]-n_range[0]) == X.shape[0]:
X_slice = X
elif mpi_comm is None:
X_slice = model.X[n_range[0]:n_range[1]]
else:
X_slice = model.X[model.N_range[0]+n_range[0]:model.N_range[0]+n_range[1]]
#gradients w.r.t. kernel
model.kern.update_gradients_diag(grad_dict['dL_dKdiag'], X_slice)
kern_grad += model.kern.gradient
model.kern.update_gradients_full(grad_dict['dL_dKnm'], X_slice, model.Z)
kern_grad += model.kern.gradient
#gradients w.r.t. Z
model.Z.gradient += model.kern.gradients_X(grad_dict['dL_dKnm'].T, model.Z, X_slice)
#gradients w.r.t. posterior parameters of Zp
gamma_gradient += grad_dict['dL_dgamma']
if het_noise:
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
else:
dL_dthetaL += grad_dict['dL_dthetaL']
# Gather the gradients from multiple MPI nodes
if mpi_comm is not None:
from mpi4py import MPI
if het_noise:
raise "het_noise not implemented!"
kern_grad_all = kern_grad.copy()
Z_grad_all = model.Z.gradient.copy()
gamma_grad_all = gamma_gradient.copy()
mpi_comm.Allreduce([kern_grad, MPI.DOUBLE], [kern_grad_all, MPI.DOUBLE])
mpi_comm.Allreduce([model.Z.gradient, MPI.DOUBLE], [Z_grad_all, MPI.DOUBLE])
mpi_comm.Allreduce([gamma_gradient, MPI.DOUBLE], [gamma_grad_all, MPI.DOUBLE])
kern_grad = kern_grad_all
model.Z.gradient = Z_grad_all
gamma_gradient = gamma_grad_all
#gradients w.r.t. kernel
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
model.kern.gradient += kern_grad
#gradients w.r.t. Z
model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z)
#gradient w.r.t. gamma
model.Zp.gamma.gradient = gamma_gradient
# Update Log-likelihood
KL_div = model.variational_prior.KL_divergence(model.Zp)
# update for the KL divergence
model.variational_prior.update_gradients_KL(model.Zp)
model._log_marginal_likelihood += KL_div
# dL_dthetaL
model.likelihood.update_gradients(dL_dthetaL)
class IBPPosterior(Parameterized):
'''
The IBP distribution for variational approximations.
'''
def __init__(self, binary_prob, tau=None, name='Sensitivity space', *a, **kw):
"""
binary_prob : the probability of including a latent function over an output.
"""
super(IBPPosterior, self).__init__(name=name, *a, **kw)
self.gamma = Param("binary_prob", binary_prob, Logistic(1e-10, 1. - 1e-10))
self.link_parameter(self.gamma)
if tau is not None:
assert tau.size == 2*self.gamma_.shape[1]
self.tau = Param("tau", tau, Logexp())
else:
self.tau = Param("tau", np.ones((2, self.gamma.shape[1])), Logexp())
self.link_parameter(self.tau)
def set_gradients(self, grad):
self.gamma.gradient, self.tau.gradient = grad
def __getitem__(self, s):
pass
# if isinstance(s, (int, slice, tuple, list, np.ndarray)):
# import copy
# n = self.__new__(self.__class__, self.name)
# dc = self.__dict__.copy()
# dc['binary_prob'] = self.binary_prob[s]
# dc['tau'] = self.tau
# dc['parameters'] = copy.copy(self.parameters)
# n.__dict__.update(dc)
# n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
# n.parameters[dc['tau']._parent_index_] = dc['tau']
# n._gradient_array_ = None
# oversize = self.size - self.gamma.size - self.tau.size
# n.size = n.gamma.size + n.tau.size + oversize
# return n
# else:
# return super(IBPPosterior, self).__getitem__(s)
class IBPPrior(VariationalPrior):
def __init__(self, rank, alpha=2., name='IBPPrior', **kw):
super(IBPPrior, self).__init__(name=name, **kw)
from paramz.transformations import __fixed__
self.rank = rank
self.alpha = Param('alpha', alpha, __fixed__)
self.link_parameter(self.alpha)
def KL_divergence(self, variational_posterior):
from scipy.special import gamma, psi
eta, tau = variational_posterior.gamma.values, variational_posterior.tau.values
sum_eta = np.sum(eta, axis=0) #sum_d gamma(d,q)
D_seta = eta.shape[0] - sum_eta
ad = self.alpha/eta.shape[1]
psitau1 = psi(tau[0, :])
psitau2 = psi(tau[1, :])
sumtau = np.sum(tau, axis=0)
psitau = psi(sumtau)
# E[log p(z)]
part1 = np.sum(sum_eta*psitau1 + D_seta*psitau2 - eta.shape[0]*psitau)
# E[log p(pi)]
part1 += (ad - 1.)*np.sum(psitau1 - psitau) + eta.shape[1]*np.log(ad)
#H(z)
part2 = np.sum(-(1.-eta)*np.log(1.-eta) - eta*np.log(eta))
#H(pi)
part2 += np.sum(np.log(gamma(tau[0, :])*gamma(tau[1, :])/gamma(sumtau))-(tau[0, :]-1.)*psitau1-(tau[1, :]-1.)*psitau2\
+ (sumtau-2.)*psitau)
return part1+part2
def update_gradients_KL(self, variational_posterior):
eta, tau = variational_posterior.gamma.values, variational_posterior.tau.values
from scipy.special import psi, polygamma
dgamma = np.log(1. - eta) - np.log(eta) + psi(tau[0, :]) - psi(tau[1, :])
variational_posterior.gamma.gradient += dgamma
ad = self.alpha/self.rank
sumeta = np.sum(eta, axis=0)
sumtau = np.sum(tau, axis=0)
common = (-eta.shape[0] - (ad - 1.) + (sumtau - 2.))*polygamma(1, sumtau)
variational_posterior.tau.gradient[0, :] = (sumeta + ad - tau[0, :])*polygamma(1, tau[0, :]) + common
variational_posterior.tau.gradient[1, :] = ((eta.shape[0] - sumeta) - (tau[1, :] - 1.))*polygamma(1, tau[1, :])\
+ common
class IBPLFM(SparseGP_MPI):
"""
Indian Buffet Process for Latent Force Models
:param Y: observed data (np.ndarray) or GPy.likelihood
:type Y: np.ndarray| GPy.likelihood instance
:param X: input data (np.ndarray) [X:values, X:index], index refers to the number of the output
:type X: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
: param rank: number of latent functions
"""
def __init__(self, X, Y, input_dim=2, output_dim=1, rank=1, Gamma=None, num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='IBP for LFM', alpha=2., beta=2., connM=None, tau=None, mpi_comm=None, normalizer=False, variational_prior=None,**kwargs):
if kernel is None:
kernel = kern.EQ_ODE2(input_dim, output_dim, rank)
if Gamma is None:
gamma = np.empty((output_dim, rank)) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.1 * np.random.randn(output_dim, rank)
gamma[gamma>1.-1e-9] = 1.-1e-9
gamma[gamma<1e-9] = 1e-9
else:
gamma = Gamma.copy()
#TODO: create a vector of inducing points
if Z is None:
Z = np.random.permutation(X.copy())[:num_inducing]
assert Z.shape[1] == X.shape[1]
if likelihood is None:
likelihood = Gaussian()
if inference_method is None:
inference_method = VarDTC_minibatch_IBPLFM(mpi_comm=mpi_comm)
#Definition of variational terms
self.variational_prior = IBPPrior(rank=rank, alpha=alpha) if variational_prior is None else variational_prior
self.Zp = IBPPosterior(gamma, tau=tau)
super(IBPLFM, self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
self.link_parameter(self.Zp, index=0)
def set_Zp_gradients(self, Zp, Zp_grad):
"""Set the gradients of the posterior distribution of Zp in its specific form."""
Zp.gamma.gradient = Zp_grad
def get_Zp_gradients(self, Zp):
"""Get the gradients of the posterior distribution of Zp in its specific form."""
return Zp.gamma.gradient
def _propogate_Zp_val(self):
pass
def parameters_changed(self):
#super(IBPLFM,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch_IBPLFM):
update_gradients(self, mpi_comm=self.mpi_comm)
return
# Add the KL divergence term
self._log_marginal_likelihood += self.variational_prior.KL_divergence(self.Zp)
#TODO Change the following according to this variational distribution
#self.Zp.gamma.gradient = self.
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.Zp)

View file

@ -3237,6 +3237,7 @@ class ContDescrStateSpace(DescreteStateSpace):
AB = np.dot(AB, np.vstack((np.zeros((n,n)),np.eye(n)))) AB = np.dot(AB, np.vstack((np.zeros((n,n)),np.eye(n))))
Q_noise_1 = linalg.solve(AB[n:,:].T,AB[:n,:].T) Q_noise_1 = linalg.solve(AB[n:,:].T,AB[:n,:].T)
Q_noise_2 = P_inf - A.dot(P_inf).dot(A.T)
# The covariance matrix Q by matrix fraction decomposition <- # The covariance matrix Q by matrix fraction decomposition <-
if compute_derivatives: if compute_derivatives:
@ -3276,7 +3277,8 @@ class ContDescrStateSpace(DescreteStateSpace):
else: else:
dA = None dA = None
dQ = None dQ = None
Q_noise = Q_noise_1 Q_noise = Q_noise_2
# Innacuracies have been observed when Q_noise_1 was used.
#Q_noise = Q_noise_1 #Q_noise = Q_noise_1

View file

@ -15,19 +15,10 @@
# #
import numpy as np import numpy as np
from scipy import linalg
from scipy import stats from scipy import stats
from ..core import Model
from .. import kern
#from GPy.plotting.matplot_dep.models_plots import gpplot
#from GPy.plotting.matplot_dep.base_plots import x_frame1D
#from GPy.plotting.matplot_dep import Tango
#import pylab as pb
from GPy.core.parameterization.param import Param
import GPy
from .. import likelihoods from .. import likelihoods
#from . import state_space_setup as ss_setup
from ..core import Model
from . import state_space_main as ssm from . import state_space_main as ssm
from . import state_space_setup as ss_setup from . import state_space_setup as ss_setup
@ -37,12 +28,12 @@ class StateSpace(Model):
if len(X.shape) == 1: if len(X.shape) == 1:
X = np.atleast_2d(X).T X = np.atleast_2d(X).T
self.num_data, input_dim = X.shape self.num_data, self.input_dim = X.shape
if len(Y.shape) == 1: if len(Y.shape) == 1:
Y = np.atleast_2d(Y).T Y = np.atleast_2d(Y).T
assert input_dim==1, "State space methods are only for 1D data" assert self.input_dim==1, "State space methods are only for 1D data"
if len(Y.shape)==2: if len(Y.shape)==2:
num_data_Y, self.output_dim = Y.shape num_data_Y, self.output_dim = Y.shape
@ -168,7 +159,7 @@ class StateSpace(Model):
def log_likelihood(self): def log_likelihood(self):
return self._log_marginal_likelihood return self._log_marginal_likelihood
def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False): def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, **kw):
""" """
Performs the actual prediction for new X points. Performs the actual prediction for new X points.
Inner function. It is called only from inside this class. Inner function. It is called only from inside this class.
@ -270,22 +261,23 @@ class StateSpace(Model):
# Return the posterior of the state # Return the posterior of the state
return (m, V) return (m, V)
def predict(self, Xnew=None, filteronly=False): def predict(self, Xnew=None, filteronly=False, include_likelihood=True, **kw):
# Run the Kalman filter to get the state # Run the Kalman filter to get the state
(m, V) = self._raw_predict(Xnew,filteronly=filteronly) (m, V) = self._raw_predict(Xnew,filteronly=filteronly)
# Add the noise variance to the state variance # Add the noise variance to the state variance
V += float(self.Gaussian_noise.variance) if include_likelihood:
V += float(self.likelihood.variance)
# Lower and upper bounds # Lower and upper bounds
lower = m - 2*np.sqrt(V) #lower = m - 2*np.sqrt(V)
upper = m + 2*np.sqrt(V) #upper = m + 2*np.sqrt(V)
# Return mean and variance # Return mean and variance
return (m, V, lower, upper) return m, V
def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5)): def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), **kw):
mu, var = self._raw_predict(Xnew) mu, var = self._raw_predict(Xnew)
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles] return [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]

View file

@ -52,6 +52,17 @@ def inject_plotting():
GP.plot_f = gpy_plot.gp_plots.plot_f GP.plot_f = gpy_plot.gp_plots.plot_f
GP.plot_magnification = gpy_plot.latent_plots.plot_magnification GP.plot_magnification = gpy_plot.latent_plots.plot_magnification
from ..models import StateSpace
StateSpace.plot_data = gpy_plot.data_plots.plot_data
StateSpace.plot_data_error = gpy_plot.data_plots.plot_data_error
StateSpace.plot_errorbars_trainset = gpy_plot.data_plots.plot_errorbars_trainset
StateSpace.plot_mean = gpy_plot.gp_plots.plot_mean
StateSpace.plot_confidence = gpy_plot.gp_plots.plot_confidence
StateSpace.plot_density = gpy_plot.gp_plots.plot_density
StateSpace.plot_samples = gpy_plot.gp_plots.plot_samples
StateSpace.plot = gpy_plot.gp_plots.plot
StateSpace.plot_f = gpy_plot.gp_plots.plot_f
from ..core import SparseGP from ..core import SparseGP
SparseGP.plot_inducing = gpy_plot.data_plots.plot_inducing SparseGP.plot_inducing = gpy_plot.data_plots.plot_inducing

View file

@ -158,7 +158,7 @@ def _plot_data_error(self, canvas, which_data_rows='all',
return plots return plots
def plot_inducing(self, visible_dims=None, projection='2d', label='inducing', **plot_kwargs): def plot_inducing(self, visible_dims=None, projection='2d', label='inducing', legend=True, **plot_kwargs):
""" """
Plot the inducing inputs of a sparse gp model Plot the inducing inputs of a sparse gp model
@ -167,7 +167,7 @@ def plot_inducing(self, visible_dims=None, projection='2d', label='inducing', **
""" """
canvas, kwargs = pl().new_canvas(projection=projection, **plot_kwargs) canvas, kwargs = pl().new_canvas(projection=projection, **plot_kwargs)
plots = _plot_inducing(self, canvas, visible_dims, projection, label, **kwargs) plots = _plot_inducing(self, canvas, visible_dims, projection, label, **kwargs)
return pl().add_to_canvas(canvas, plots, legend=label is not None) return pl().add_to_canvas(canvas, plots, legend=legend)
def _plot_inducing(self, canvas, visible_dims, projection, label, **plot_kwargs): def _plot_inducing(self, canvas, visible_dims, projection, label, **plot_kwargs):
if visible_dims is None: if visible_dims is None:
@ -175,7 +175,7 @@ def _plot_inducing(self, canvas, visible_dims, projection, label, **plot_kwargs)
visible_dims = [i for i in sig_dims if i is not None] visible_dims = [i for i in sig_dims if i is not None]
free_dims = get_free_dims(self, visible_dims, None) free_dims = get_free_dims(self, visible_dims, None)
Z = self.Z[:, free_dims] Z = self.Z.values
plots = {} plots = {}
#one dimensional plotting #one dimensional plotting

View file

@ -112,28 +112,29 @@ def plot_latent_inducing(self,
which_indices=None, which_indices=None,
legend=False, legend=False,
plot_limits=None, plot_limits=None,
marker='^', marker=None,
num_samples=1000,
projection='2d', projection='2d',
**kwargs): **kwargs):
""" """
Plot a scatter plot of the inducing inputs. Plot a scatter plot of the inducing inputs.
:param array-like labels: a label for each data point (row) of the inputs :param [int] which_indices: which input dimensions to plot against each other
:param (int, int) which_indices: which input dimensions to plot against each other
:param bool legend: whether to plot the legend on the figure :param bool legend: whether to plot the legend on the figure
:param plot_limits: the plot limits for the plot :param plot_limits: the plot limits for the plot
:type plot_limits: (xmin, xmax, ymin, ymax) or ((xmin, xmax), (ymin, ymax)) :type plot_limits: (xmin, xmax, ymin, ymax) or ((xmin, xmax), (ymin, ymax))
:param str marker: markers to use - cycle if more labels then markers are given :param str marker: marker to use [default is custom arrow like]
:param kwargs: the kwargs for the scatter plots :param kwargs: the kwargs for the scatter plots
:param str projection: for now 2d or 3d projection (other projections can be implemented, see developer documentation)
""" """
canvas, projection, kwargs, sig_dims = _new_canvas(self, projection, kwargs, which_indices) canvas, projection, kwargs, sig_dims = _new_canvas(self, projection, kwargs, which_indices)
Z = self.Z.values if legend: label = 'inducing'
labels = np.array(['inducing'] * Z.shape[0]) else: label = None
if marker is not None:
kwargs['marker'] = marker kwargs['marker'] = marker
update_not_existing_kwargs(kwargs, pl().defaults.inducing_2d) # @UndefinedVariable update_not_existing_kwargs(kwargs, pl().defaults.inducing_2d) # @UndefinedVariable
scatters = _plot_latent_scatter(canvas, Z, sig_dims, labels, num_samples=num_samples, projection=projection, **kwargs) from .data_plots import _plot_inducing
scatters = _plot_inducing(self, canvas, sig_dims[:2], projection, label, **kwargs)
return pl().add_to_canvas(canvas, dict(scatter=scatters), legend=legend) return pl().add_to_canvas(canvas, dict(scatter=scatters), legend=legend)

View file

@ -190,6 +190,7 @@ def scatter_label_generator(labels, X, visible_dims, marker=None):
x = X[index, input_1] x = X[index, input_1]
y = X[index, input_2] y = X[index, input_2]
z = X[index, input_3] z = X[index, input_3]
yield x, y, z, this_label, index, m yield x, y, z, this_label, index, m
def subsample_X(X, labels, num_samples=1000): def subsample_X(X, labels, num_samples=1000):

View file

@ -45,7 +45,7 @@ it gives back an empty default, when defaults are not defined.
# Data plots: # Data plots:
data_1d = dict(lw=1.5, marker='x', color='k') data_1d = dict(lw=1.5, marker='x', color='k')
data_2d = dict(s=35, edgecolors='none', linewidth=0., cmap=cm.get_cmap('hot'), alpha=.5) data_2d = dict(s=35, edgecolors='none', linewidth=0., cmap=cm.get_cmap('hot'), alpha=.5)
inducing_1d = dict(lw=0, s=500, facecolors=Tango.colorsHex['darkRed']) inducing_1d = dict(lw=0, s=500, color=Tango.colorsHex['darkRed'])
inducing_2d = dict(s=17, edgecolor='k', linewidth=.4, color='white', alpha=.5, marker='^') inducing_2d = dict(s=17, edgecolor='k', linewidth=.4, color='white', alpha=.5, marker='^')
inducing_3d = dict(lw=.3, s=500, color=Tango.colorsHex['darkRed'], edgecolor='k') inducing_3d = dict(lw=.3, s=500, color=Tango.colorsHex['darkRed'], edgecolor='k')
xerrorbar = dict(color='k', fmt='none', elinewidth=.5, alpha=.5) xerrorbar = dict(color='k', fmt='none', elinewidth=.5, alpha=.5)

View file

@ -106,7 +106,7 @@ class MatplotlibPlots(AbstractPlottingLibrary):
return ax.plot(X, Y, color=color, zs=Z, label=label, **kwargs) return ax.plot(X, Y, color=color, zs=Z, label=label, **kwargs)
return ax.plot(X, Y, color=color, label=label, **kwargs) return ax.plot(X, Y, color=color, label=label, **kwargs)
def plot_axis_lines(self, ax, X, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs): def plot_axis_lines(self, ax, X, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
from matplotlib import transforms from matplotlib import transforms
from matplotlib.path import Path from matplotlib.path import Path
if 'marker' not in kwargs: if 'marker' not in kwargs:
@ -126,14 +126,14 @@ class MatplotlibPlots(AbstractPlottingLibrary):
bottom=bottom, label=label, color=color, bottom=bottom, label=label, color=color,
**kwargs) **kwargs)
def xerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs): def xerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
if not('linestyle' in kwargs or 'ls' in kwargs): if not('linestyle' in kwargs or 'ls' in kwargs):
kwargs['ls'] = 'none' kwargs['ls'] = 'none'
#if Z is not None: #if Z is not None:
# return ax.errorbar(X, Y, Z, xerr=error, ecolor=color, label=label, **kwargs) # return ax.errorbar(X, Y, Z, xerr=error, ecolor=color, label=label, **kwargs)
return ax.errorbar(X, Y, xerr=error, ecolor=color, label=label, **kwargs) return ax.errorbar(X, Y, xerr=error, ecolor=color, label=label, **kwargs)
def yerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs): def yerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
if not('linestyle' in kwargs or 'ls' in kwargs): if not('linestyle' in kwargs or 'ls' in kwargs):
kwargs['ls'] = 'none' kwargs['ls'] = 'none'
#if Z is not None: #if Z is not None:

View file

@ -131,14 +131,15 @@ class PlotlyPlots(AbstractPlottingLibrary):
#not matplotlib marker #not matplotlib marker
pass pass
marker_kwargs = marker_kwargs or {} marker_kwargs = marker_kwargs or {}
marker_kwargs.setdefault('symbol', marker) if 'symbol' not in marker_kwargs:
marker_kwargs['symbol'] = marker
if Z is not None: if Z is not None:
return Scatter3d(x=X, y=Y, z=Z, mode='markers', return Scatter3d(x=X, y=Y, z=Z, mode='markers',
showlegend=label is not None, showlegend=label is not None,
marker=Marker(color=color, colorscale=cmap, **marker_kwargs), marker=Marker(color=color, colorscale=cmap, **marker_kwargs),
name=label, **kwargs) name=label, **kwargs)
return Scatter(x=X, y=Y, mode='markers', showlegend=label is not None, return Scatter(x=X, y=Y, mode='markers', showlegend=label is not None,
marker=Marker(color=color, colorscale=cmap, **marker_kwargs or {}), marker=Marker(color=color, colorscale=cmap, **marker_kwargs),
name=label, **kwargs) name=label, **kwargs)
def plot(self, ax, X, Y, Z=None, color=None, label=None, line_kwargs=None, **kwargs): def plot(self, ax, X, Y, Z=None, color=None, label=None, line_kwargs=None, **kwargs):

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 135 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 196 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 186 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.9 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 118 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 116 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 108 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 178 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 115 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.2 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 182 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 180 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 206 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 86 KiB

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show more