From 65af6ee35e7800f380293d8339e2cd5e3ac33394 Mon Sep 17 00:00:00 2001 From: Martin Bubel Date: Mon, 16 Oct 2023 21:20:17 +0200 Subject: [PATCH] replace np.int by int --- GPy/kern/src/coregionalize.py | 97 +- GPy/kern/src/eq_ode1.py | 726 ++--- GPy/kern/src/eq_ode2.py | 1756 ++++++----- GPy/kern/src/todo/eq_ode1.py | 6 +- .../sparse_gp_coregionalized_regression.py | 66 +- GPy/models/ss_mrd.py | 406 ++- GPy/models/state_space_main.py | 2569 +++++++++++------ GPy/plotting/matplot_dep/base_plots.py | 139 +- GPy/plotting/matplot_dep/plot_definitions.py | 375 ++- GPy/testing/test_ep_likelihood.py | 2 +- GPy/testing/test_likelihood.py | 2 +- GPy/testing/test_model.py | 4 +- GPy/testing/test_pickle.py | 8 +- GPy/util/classification.py | 23 +- GPy/util/multioutput.py | 85 +- 15 files changed, 3889 insertions(+), 2375 deletions(-) diff --git a/GPy/kern/src/coregionalize.py b/GPy/kern/src/coregionalize.py index d05f5c6a..7f92d4f7 100644 --- a/GPy/kern/src/coregionalize.py +++ b/GPy/kern/src/coregionalize.py @@ -5,13 +5,16 @@ from .kern import Kern import numpy as np from ...core.parameterization import Param from paramz.transformations import Logexp -from ...util.config import config # for assesing whether to use cython +from ...util.config import config # for assesing whether to use cython try: from . import coregionalize_cython - use_coregionalize_cython = config.getboolean('cython', 'working') + + use_coregionalize_cython = config.getboolean("cython", "working") except ImportError: - print('warning in coregionalize: failed to import cython module: falling back to numpy') + print( + "warning in coregionalize: failed to import cython module: falling back to numpy" + ) use_coregionalize_cython = False @@ -43,22 +46,34 @@ class Coregionalize(Kern): .. note: see coregionalization examples in GPy.examples.regression for some usage. """ - def __init__(self, input_dim, output_dim, rank=1, W=None, kappa=None, active_dims=None, name='coregion'): + + def __init__( + self, + input_dim, + output_dim, + rank=1, + W=None, + kappa=None, + active_dims=None, + name="coregion", + ): super(Coregionalize, self).__init__(input_dim, active_dims, name=name) self.output_dim = output_dim self.rank = rank - if self.rank>output_dim: - print("Warning: Unusual choice of rank, it should normally be less than the output_dim.") + if self.rank > output_dim: + print( + "Warning: Unusual choice of rank, it should normally be less than the output_dim." + ) if W is None: - W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank) + W = 0.5 * np.random.randn(self.output_dim, self.rank) / np.sqrt(self.rank) else: - assert W.shape==(self.output_dim, self.rank) - self.W = Param('W', W) + assert W.shape == (self.output_dim, self.rank) + self.W = Param("W", W) if kappa is None: - kappa = 0.5*np.ones(self.output_dim) + kappa = 0.5 * np.ones(self.output_dim) else: - assert kappa.shape==(self.output_dim, ) - self.kappa = Param('kappa', kappa, Logexp()) + assert kappa.shape == (self.output_dim,) + self.kappa = Param("kappa", kappa, Logexp()) self.link_parameters(self.W, self.kappa) def parameters_changed(self): @@ -70,63 +85,69 @@ class Coregionalize(Kern): else: return self._K_numpy(X, X2) - def _K_numpy(self, X, X2=None): - index = np.asarray(X, dtype=np.int) + index = np.asarray(X, dtype=int) if X2 is None: - return self.B[index,index.T] + return self.B[index, index.T] else: - index2 = np.asarray(X2, dtype=np.int) - return self.B[index,index2.T] + index2 = np.asarray(X2, dtype=int) + return self.B[index, index2.T] def _K_cython(self, X, X2=None): if X2 is None: - return coregionalize_cython.K_symmetric(self.B, np.asarray(X, dtype=np.int64)[:,0]) - return coregionalize_cython.K_asymmetric(self.B, np.asarray(X, dtype=np.int64)[:,0], np.asarray(X2, dtype=np.int64)[:,0]) - + return coregionalize_cython.K_symmetric( + self.B, np.asarray(X, dtype=np.int64)[:, 0] + ) + return coregionalize_cython.K_asymmetric( + self.B, + np.asarray(X, dtype=np.int64)[:, 0], + np.asarray(X2, dtype=np.int64)[:, 0], + ) def Kdiag(self, X): - return np.diag(self.B)[np.asarray(X, dtype=np.int).flatten()] + return np.diag(self.B)[np.asarray(X, dtype=int).flatten()] def update_gradients_full(self, dL_dK, X, X2=None): - index = np.asarray(X, dtype=np.int) + index = np.asarray(X, dtype=int) if X2 is None: index2 = index else: - index2 = np.asarray(X2, dtype=np.int) + index2 = np.asarray(X2, dtype=int) - #attempt to use cython for a nasty double indexing loop: fall back to numpy + # attempt to use cython for a nasty double indexing loop: fall back to numpy if use_coregionalize_cython: dL_dK_small = self._gradient_reduce_cython(dL_dK, index, index2) else: dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2) - dkappa = np.diag(dL_dK_small).copy() dL_dK_small += dL_dK_small.T - dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0) + dW = (self.W[:, None, :] * dL_dK_small[:, :, None]).sum(0) self.W.gradient = dW self.kappa.gradient = dkappa def _gradient_reduce_numpy(self, dL_dK, index, index2): - index, index2 = index[:,0], index2[:,0] + index, index2 = index[:, 0], index2[:, 0] dL_dK_small = np.zeros_like(self.B) for i in range(self.output_dim): - tmp1 = dL_dK[index==i] + tmp1 = dL_dK[index == i] for j in range(self.output_dim): - dL_dK_small[j,i] = tmp1[:,index2==j].sum() + dL_dK_small[j, i] = tmp1[:, index2 == j].sum() return dL_dK_small def _gradient_reduce_cython(self, dL_dK, index, index2): - index, index2 = np.int64(index[:,0]), np.int64(index2[:,0]) - return coregionalize_cython.gradient_reduce(self.B.shape[0], dL_dK, index, index2) - + index, index2 = np.int64(index[:, 0]), np.int64(index2[:, 0]) + return coregionalize_cython.gradient_reduce( + self.B.shape[0], dL_dK, index, index2 + ) def update_gradients_diag(self, dL_dKdiag, X): - index = np.asarray(X, dtype=np.int).flatten() - dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in range(self.output_dim)]) - self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None] + index = np.asarray(X, dtype=int).flatten() + dL_dKdiag_small = np.array( + [dL_dKdiag[index == i].sum() for i in range(self.output_dim)] + ) + self.W.gradient = 2.0 * self.W * dL_dKdiag_small[:, None] self.kappa.gradient = dL_dKdiag_small def gradients_X(self, dL_dK, X, X2=None): @@ -154,8 +175,8 @@ class Coregionalize(Kern): @staticmethod def _build_from_input_dict(kernel_class, input_dict): - useGPU = input_dict.pop('useGPU', None) + useGPU = input_dict.pop("useGPU", None) # W and kappa must be converted back to numpy arrays - input_dict['W'] = np.array(input_dict['W']) - input_dict['kappa'] = np.array(input_dict['kappa']) + input_dict["W"] = np.array(input_dict["W"]) + input_dict["kappa"] = np.array(input_dict["kappa"]) return Coregionalize(**input_dict) diff --git a/GPy/kern/src/eq_ode1.py b/GPy/kern/src/eq_ode1.py index 9c19bead..4361ec23 100644 --- a/GPy/kern/src/eq_ode1.py +++ b/GPy/kern/src/eq_ode1.py @@ -8,6 +8,7 @@ from ...core.parameterization import Param from paramz.transformations import Logexp from paramz.caching import Cache_this + class EQ_ODE1(Kern): """ Covariance function for first order differential equation driven by an exponentiated quadratic covariance. @@ -17,210 +18,236 @@ class EQ_ODE1(Kern): \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} u_i(t-\delta_j) - d_jy_j(t) where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`u_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance. - + :param output_dim: number of outputs driven by latent function. :type output_dim: int - :param W: sensitivities of each output to the latent driving function. + :param W: sensitivities of each output to the latent driving function. :type W: ndarray (output_dim x rank). :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance. :type rank: int - :param decay: decay rates for the first order system. + :param decay: decay rates for the first order system. :type decay: array of length output_dim. :param delay: delay between latent force and output response. :type delay: array of length output_dim. :param kappa: diagonal term that allows each latent output to have an independent component to the response. :type kappa: array of length output_dim. - + .. Note: see first order differential equation examples in GPy.examples.regression for some usage. """ - def __init__(self, input_dim=2, output_dim=1, rank=1, W = None, lengthscale=None, decay=None, active_dims=None, name='eq_ode1'): + + def __init__( + self, + input_dim=2, + output_dim=1, + rank=1, + W=None, + lengthscale=None, + decay=None, + active_dims=None, + name="eq_ode1", + ): assert input_dim == 2, "only defined for 1 input dims" - super(EQ_ODE1, self).__init__(input_dim=input_dim, active_dims=active_dims, name=name) + super(EQ_ODE1, self).__init__( + input_dim=input_dim, active_dims=active_dims, name=name + ) self.rank = rank self.output_dim = output_dim if lengthscale is None: - lengthscale = .5 + np.random.rand(self.rank) + lengthscale = 0.5 + np.random.rand(self.rank) else: lengthscale = np.asarray(lengthscale) assert lengthscale.size in [1, self.rank], "Bad number of lengthscales" if lengthscale.size != self.rank: - lengthscale = np.ones(self.rank)*lengthscale - + lengthscale = np.ones(self.rank) * lengthscale + if W is None: - W = .5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank) + W = 0.5 * np.random.randn(self.output_dim, self.rank) / np.sqrt(self.rank) else: assert W.shape == (self.output_dim, self.rank) - + if decay is None: decay = np.ones(self.output_dim) else: decay = np.asarray(decay) assert decay.size in [1, self.output_dim], "Bad number of decay" if decay.size != self.output_dim: - decay = np.ones(self.output_dim)*decay + decay = np.ones(self.output_dim) * decay -# if kappa is None: -# self.kappa = np.ones(self.output_dim) -# else: -# kappa = np.asarray(kappa) -# assert kappa.size in [1, self.output_dim], "Bad number of kappa" -# if decay.size != self.output_dim: -# decay = np.ones(self.output_dim)*kappa + # if kappa is None: + # self.kappa = np.ones(self.output_dim) + # else: + # kappa = np.asarray(kappa) + # assert kappa.size in [1, self.output_dim], "Bad number of kappa" + # if decay.size != self.output_dim: + # decay = np.ones(self.output_dim)*kappa - #self.kappa = Param('kappa', kappa, Logexp()) - #self.delay = Param('delay', delay, Logexp()) - #self.is_normalized = True - #self.is_stationary = False - #self.gaussian_initial = False + # self.kappa = Param('kappa', kappa, Logexp()) + # self.delay = Param('delay', delay, Logexp()) + # self.is_normalized = True + # self.is_stationary = False + # self.gaussian_initial = False - self.lengthscale = Param('lengthscale', lengthscale, Logexp()) - self.decay = Param('decay', decay, Logexp()) - self.W = Param('W', W) + self.lengthscale = Param("lengthscale", lengthscale, Logexp()) + self.decay = Param("decay", decay, Logexp()) + self.W = Param("W", W) self.link_parameters(self.lengthscale, self.decay, self.W) @Cache_this(limit=3) def K(self, X, X2=None): - #This way is not working, indexes are lost after using k._slice_X - #index = np.asarray(X, dtype=np.int) - #index = index.reshape(index.size,) - if hasattr(X, 'values'): + # This way is not working, indexes are lost after using k._slice_X + # index = np.asarray(X, dtype=int) + # index = index.reshape(index.size,) + if hasattr(X, "values"): X = X.values index = np.int_(np.round(X[:, 1])) - index = index.reshape(index.size,) + index = index.reshape( + index.size, + ) X_flag = index[0] >= self.output_dim if X2 is None: if X_flag: - #Calculate covariance function for the latent functions + # Calculate covariance function for the latent functions index -= self.output_dim return self._Kuu(X, index) else: raise NotImplementedError else: - #This way is not working, indexes are lost after using k._slice_X - #index2 = np.asarray(X2, dtype=np.int) - #index2 = index2.reshape(index2.size,) - if hasattr(X2, 'values'): + # This way is not working, indexes are lost after using k._slice_X + # index2 = np.asarray(X2, dtype=int) + # index2 = index2.reshape(index2.size,) + if hasattr(X2, "values"): X2 = X2.values index2 = np.int_(np.round(X2[:, 1])) - index2 = index2.reshape(index2.size,) + index2 = index2.reshape( + index2.size, + ) X2_flag = index2[0] >= self.output_dim - #Calculate cross-covariance function + # Calculate cross-covariance function if not X_flag and X2_flag: index2 -= self.output_dim - return self._Kfu(X, index, X2, index2) #Kfu + return self._Kfu(X, index, X2, index2) # Kfu elif X_flag and not X2_flag: index -= self.output_dim - return self._Kfu(X2, index2, X, index).T #Kuf + return self._Kfu(X2, index2, X, index).T # Kuf elif X_flag and X2_flag: index -= self.output_dim index2 -= self.output_dim - return self._Kusu(X, index, X2, index2) #Ku_s u + return self._Kusu(X, index, X2, index2) # Ku_s u else: - raise NotImplementedError #Kf_s f + raise NotImplementedError # Kf_s f - #Calculate the covariance function for diag(Kff(X,X)) + # Calculate the covariance function for diag(Kff(X,X)) def Kdiag(self, X): - if hasattr(X, 'values'): + if hasattr(X, "values"): index = np.int_(np.round(X[:, 1].values)) else: index = np.int_(np.round(X[:, 1])) - index = index.reshape(index.size,) + index = index.reshape( + index.size, + ) X_flag = index[0] >= self.output_dim - - if X_flag: #Kuudiag - return np.ones(X[:,0].shape) - else: #Kffdiag + + if X_flag: # Kuudiag + return np.ones(X[:, 0].shape) + else: # Kffdiag kdiag = self._Kdiag(X) return np.sum(kdiag, axis=1) - + def _Kdiag(self, X): - #This way is not working, indexes are lost after using k._slice_X - #index = np.asarray(X, dtype=np.int) - #index = index.reshape(index.size,) - if hasattr(X, 'values'): + # This way is not working, indexes are lost after using k._slice_X + # index = np.asarray(X, dtype=int) + # index = index.reshape(index.size,) + if hasattr(X, "values"): X = X.values index = np.int_(X[:, 1]) - index = index.reshape(index.size,) - - #terms that move along t + index = index.reshape( + index.size, + ) + + # terms that move along t t = X[:, 0].reshape(X.shape[0], 1) - d = np.unique(index) #Output Indexes + d = np.unique(index) # Output Indexes B = self.decay.values[d] S = self.W.values[d, :] - #Index transformation + # Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] - + B = B.reshape(B.size, 1) - #Terms that move along q + # Terms that move along q lq = self.lengthscale.values.reshape(1, self.rank) - S2 = S*S - kdiag = np.empty((t.size, )) + S2 = S * S + kdiag = np.empty((t.size,)) - #Dx1 terms - c0 = (S2/B)*((.5*np.sqrt(np.pi))*lq) + # Dx1 terms + c0 = (S2 / B) * ((0.5 * np.sqrt(np.pi)) * lq) - #DxQ terms - nu = lq*(B*.5) - nu2 = nu*nu - #Nx1 terms - gamt = -2.*B - gamt = gamt[index]*t + # DxQ terms + nu = lq * (B * 0.5) + nu2 = nu * nu + # Nx1 terms + gamt = -2.0 * B + gamt = gamt[index] * t - #NxQ terms - t_lq = t/lq + # NxQ terms + t_lq = t / lq # Upsilon Calculations # Using wofz - #erfnu = erf(nu) - - upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :] ,t_lq+nu[index,:] )) - upm[t[:, 0] == 0, :] = 0. + # erfnu = erf(nu) - - upv = np.exp(nu2[index, :] + gamt + lnDifErf( -t_lq+nu[index,:], nu[index, :] ) ) - upv[t[:, 0] == 0, :] = 0. + upm = np.exp(nu2[index, :] + lnDifErf(nu[index, :], t_lq + nu[index, :])) + upm[t[:, 0] == 0, :] = 0.0 - #Covariance calculation - #kdiag = np.sum(c0[index, :]*(upm-upv), axis=1) - kdiag = c0[index, :]*(upm-upv) + upv = np.exp( + nu2[index, :] + gamt + lnDifErf(-t_lq + nu[index, :], nu[index, :]) + ) + upv[t[:, 0] == 0, :] = 0.0 + + # Covariance calculation + # kdiag = np.sum(c0[index, :]*(upm-upv), axis=1) + kdiag = c0[index, :] * (upm - upv) return kdiag - def update_gradients_full(self, dL_dK, X, X2 = None): - #index = np.asarray(X, dtype=np.int) - #index = index.reshape(index.size,) - if hasattr(X, 'values'): + def update_gradients_full(self, dL_dK, X, X2=None): + # index = np.asarray(X, dtype=int) + # index = index.reshape(index.size,) + if hasattr(X, "values"): X = X.values self.decay.gradient = np.zeros(self.decay.shape) self.W.gradient = np.zeros(self.W.shape) self.lengthscale.gradient = np.zeros(self.lengthscale.shape) index = np.int_(np.round(X[:, 1])) - index = index.reshape(index.size,) + index = index.reshape( + index.size, + ) X_flag = index[0] >= self.output_dim if X2 is None: - if X_flag: #Kuu or Kmm + if X_flag: # Kuu or Kmm index -= self.output_dim - tmp = dL_dK*self._gkuu_lq(X, index) + tmp = dL_dK * self._gkuu_lq(X, index) for q in np.unique(index): ind = np.where(index == q) self.lengthscale.gradient[q] = tmp[np.ix_(ind[0], ind[0])].sum() else: raise NotImplementedError - else: #Kfu or Knm - #index2 = np.asarray(X2, dtype=np.int) - #index2 = index2.reshape(index2.size,) - if hasattr(X2, 'values'): + else: # Kfu or Knm + # index2 = np.asarray(X2, dtype=int) + # index2 = index2.reshape(index2.size,) + if hasattr(X2, "values"): X2 = X2.values index2 = np.int_(np.round(X2[:, 1])) - index2 = index2.reshape(index2.size,) + index2 = index2.reshape( + index2.size, + ) X2_flag = index2[0] >= self.output_dim - if not X_flag and X2_flag: #Kfu + if not X_flag and X2_flag: # Kfu index2 -= self.output_dim - else: #Kuf - dL_dK = dL_dK.T #so we obtaing dL_Kfu + else: # Kuf + dL_dK = dL_dK.T # so we obtaing dL_Kfu indtemp = index - self.output_dim Xtemp = X X = X2 @@ -228,12 +255,12 @@ class EQ_ODE1(Kern): index = index2 index2 = indtemp glq, gSdq, gB = self._gkfu(X, index, X2, index2) - tmp = dL_dK*glq + tmp = dL_dK * glq for q in np.unique(index2): ind = np.where(index2 == q) self.lengthscale.gradient[q] = tmp[:, ind].sum() - tmpB = dL_dK*gB - tmp = dL_dK*gSdq + tmpB = dL_dK * gB + tmp = dL_dK * gSdq for d in np.unique(index): ind = np.where(index == d) self.decay.gradient[d] = tmpB[ind, :].sum() @@ -242,408 +269,463 @@ class EQ_ODE1(Kern): self.W.gradient[d, q] = tmp[np.ix_(ind[0], ind2[0])].sum() def update_gradients_diag(self, dL_dKdiag, X): - #index = np.asarray(X, dtype=np.int) - #index = index.reshape(index.size,) - if hasattr(X, 'values'): + # index = np.asarray(X, dtype=int) + # index = index.reshape(index.size,) + if hasattr(X, "values"): X = X.values self.decay.gradient = np.zeros(self.decay.shape) self.W.gradient = np.zeros(self.W.shape) self.lengthscale.gradient = np.zeros(self.lengthscale.shape) index = np.int_(X[:, 1]) - index = index.reshape(index.size,) - + index = index.reshape( + index.size, + ) + glq, gS, gB = self._gkdiag(X, index) if dL_dKdiag.size == X.shape[0]: dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1)) - tmp = dL_dKdiag*glq + tmp = dL_dKdiag * glq self.lengthscale.gradient = tmp.sum(0) - tmpB = dL_dKdiag*gB - tmp = dL_dKdiag*gS + tmpB = dL_dKdiag * gB + tmp = dL_dKdiag * gS for d in np.unique(index): ind = np.where(index == d) self.decay.gradient[d] = tmpB[ind, :].sum() self.W.gradient[d, :] = tmp[ind].sum(0) def gradients_X(self, dL_dK, X, X2=None): - #index = np.asarray(X, dtype=np.int) - #index = index.reshape(index.size,) - if hasattr(X, 'values'): + # index = np.asarray(X, dtype=int) + # index = index.reshape(index.size,) + if hasattr(X, "values"): X = X.values index = np.int_(np.round(X[:, 1])) - index = index.reshape(index.size,) + index = index.reshape( + index.size, + ) X_flag = index[0] >= self.output_dim - #If input_dim == 1, use this - #gX = np.zeros((X.shape[0], 1)) - #Cheat to allow gradient for input_dim==2 + # If input_dim == 1, use this + # gX = np.zeros((X.shape[0], 1)) + # Cheat to allow gradient for input_dim==2 gX = np.zeros(X.shape) - if X2 is None: #Kuu or Kmm + if X2 is None: # Kuu or Kmm if X_flag: index -= self.output_dim - gX[:, 0] = 2.*(dL_dK*self._gkuu_X(X, index)).sum(0) + gX[:, 0] = 2.0 * (dL_dK * self._gkuu_X(X, index)).sum(0) return gX else: raise NotImplementedError - else: #Kuf or Kmn - #index2 = np.asarray(X2, dtype=np.int) - #index2 = index2.reshape(index2.size,) - if hasattr(X2, 'values'): + else: # Kuf or Kmn + # index2 = np.asarray(X2, dtype=int) + # index2 = index2.reshape(index2.size,) + if hasattr(X2, "values"): X2 = X2.values index2 = np.int_(np.round(X2[:, 1])) - index2 = index2.reshape(index2.size,) + index2 = index2.reshape( + index2.size, + ) X2_flag = index2[0] >= self.output_dim - if X_flag and not X2_flag: #gradient of Kuf(Z, X) wrt Z + if X_flag and not X2_flag: # gradient of Kuf(Z, X) wrt Z index -= self.output_dim - gX[:, 0] = (dL_dK*self._gkfu_z(X2, index2, X, index).T).sum(1) + gX[:, 0] = (dL_dK * self._gkfu_z(X2, index2, X, index).T).sum(1) return gX else: raise NotImplementedError - #---------------------------------------# + # ---------------------------------------# # Helper functions # - #---------------------------------------# + # ---------------------------------------# - #Evaluation of squared exponential for LFM + # Evaluation of squared exponential for LFM def _Kuu(self, X, index): - index = index.reshape(index.size,) - t = X[:, 0].reshape(X.shape[0],) - lq = self.lengthscale.values.reshape(self.rank,) - lq2 = lq*lq - #Covariance matrix initialization + index = index.reshape( + index.size, + ) + t = X[:, 0].reshape( + X.shape[0], + ) + lq = self.lengthscale.values.reshape( + self.rank, + ) + lq2 = lq * lq + # Covariance matrix initialization kuu = np.zeros((t.size, t.size)) - #Assign 1. to diagonal terms - kuu[np.diag_indices(t.size)] = 1. - #Upper triangular indices + # Assign 1. to diagonal terms + kuu[np.diag_indices(t.size)] = 1.0 + # Upper triangular indices indtri1, indtri2 = np.triu_indices(t.size, 1) - #Block Diagonal indices among Upper Triangular indices + # Block Diagonal indices among Upper Triangular indices ind = np.where(index[indtri1] == index[indtri2]) indr = indtri1[ind] indc = indtri2[ind] r = t[indr] - t[indc] - r2 = r*r - #Calculation of covariance function - kuu[indr, indc] = np.exp(-r2/lq2[index[indr]]) - #Completion of lower triangular part + r2 = r * r + # Calculation of covariance function + kuu[indr, indc] = np.exp(-r2 / lq2[index[indr]]) + # Completion of lower triangular part kuu[indc, indr] = kuu[indr, indc] return kuu def _Kusu(self, X, index, X2, index2): - index = index.reshape(index.size,) - index2 = index2.reshape(index2.size,) - t = X[:, 0].reshape(X.shape[0],1) - t2 = X2[:, 0].reshape(1,X2.shape[0]) - lq = self.lengthscale.values.reshape(self.rank,) - #Covariance matrix initialization + index = index.reshape( + index.size, + ) + index2 = index2.reshape( + index2.size, + ) + t = X[:, 0].reshape(X.shape[0], 1) + t2 = X2[:, 0].reshape(1, X2.shape[0]) + lq = self.lengthscale.values.reshape( + self.rank, + ) + # Covariance matrix initialization kuu = np.zeros((t.size, t2.size)) for q in range(self.rank): ind1 = index == q ind2 = index2 == q - r = t[ind1]/lq[q] - t2[0,ind2]/lq[q] - r2 = r*r - #Calculation of covariance function + r = t[ind1] / lq[q] - t2[0, ind2] / lq[q] + r2 = r * r + # Calculation of covariance function kuu[np.ix_(ind1, ind2)] = np.exp(-r2) return kuu - #Evaluation of cross-covariance function + # Evaluation of cross-covariance function def _Kfu(self, X, index, X2, index2): - #terms that move along t + # terms that move along t t = X[:, 0].reshape(X.shape[0], 1) - d = np.unique(index) #Output Indexes + d = np.unique(index) # Output Indexes B = self.decay.values[d] S = self.W.values[d, :] - #Index transformation + # Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] - #Output related variables must be column-wise + # Output related variables must be column-wise B = B.reshape(B.size, 1) - #Input related variables must be row-wise + # Input related variables must be row-wise z = X2[:, 0].reshape(1, X2.shape[0]) lq = self.lengthscale.values.reshape((1, self.rank)) kfu = np.empty((t.size, z.size)) - #DxQ terms - c0 = S*((.5*np.sqrt(np.pi))*lq) - nu = B*(.5*lq) + # DxQ terms + c0 = S * ((0.5 * np.sqrt(np.pi)) * lq) + nu = B * (0.5 * lq) nu2 = nu**2 - #1xM terms - z_lq = z/lq[0, index2] - #NxM terms - tz = t-z - tz_lq = tz/lq[0, index2] + # 1xM terms + z_lq = z / lq[0, index2] + # NxM terms + tz = t - z + tz_lq = tz / lq[0, index2] # Upsilon Calculations fullind = np.ix_(index, index2) - upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind])) - upsi[t[:, 0] == 0, :] = 0. - #Covariance calculation - kfu = c0[fullind]*upsi + upsi = np.exp( + nu2[fullind] + - B[index] * tz + + lnDifErf(-tz_lq + nu[fullind], z_lq + nu[fullind]) + ) + upsi[t[:, 0] == 0, :] = 0.0 + # Covariance calculation + kfu = c0[fullind] * upsi return kfu - #Gradient of Kuu wrt lengthscale + # Gradient of Kuu wrt lengthscale def _gkuu_lq(self, X, index): - t = X[:, 0].reshape(X.shape[0],) - index = index.reshape(X.shape[0],) - lq = self.lengthscale.values.reshape(self.rank,) - lq2 = lq*lq - #Covariance matrix initialization + t = X[:, 0].reshape( + X.shape[0], + ) + index = index.reshape( + X.shape[0], + ) + lq = self.lengthscale.values.reshape( + self.rank, + ) + lq2 = lq * lq + # Covariance matrix initialization glq = np.zeros((t.size, t.size)) - #Upper triangular indices + # Upper triangular indices indtri1, indtri2 = np.triu_indices(t.size, 1) - #Block Diagonal indices among Upper Triangular indices + # Block Diagonal indices among Upper Triangular indices ind = np.where(index[indtri1] == index[indtri2]) indr = indtri1[ind] indc = indtri2[ind] r = t[indr] - t[indc] - r2 = r*r - r2_lq2 = r2/lq2[index[indr]] - #Calculation of covariance function + r2 = r * r + r2_lq2 = r2 / lq2[index[indr]] + # Calculation of covariance function er2_lq2 = np.exp(-r2_lq2) - #Gradient wrt lq - c = 2.*r2_lq2/lq[index[indr]] - glq[indr, indc] = er2_lq2*c - #Complete the lower triangular + # Gradient wrt lq + c = 2.0 * r2_lq2 / lq[index[indr]] + glq[indr, indc] = er2_lq2 * c + # Complete the lower triangular glq[indc, indr] = glq[indr, indc] return glq - #Be careful this derivative should be transpose it - def _gkuu_X(self, X, index): #Diagonal terms are always zero - t = X[:, 0].reshape(X.shape[0],) - index = index.reshape(index.size,) - lq = self.lengthscale.values.reshape(self.rank,) - lq2 = lq*lq - #Covariance matrix initialization + # Be careful this derivative should be transpose it + def _gkuu_X(self, X, index): # Diagonal terms are always zero + t = X[:, 0].reshape( + X.shape[0], + ) + index = index.reshape( + index.size, + ) + lq = self.lengthscale.values.reshape( + self.rank, + ) + lq2 = lq * lq + # Covariance matrix initialization gt = np.zeros((t.size, t.size)) - #Upper triangular indices - indtri1, indtri2 = np.triu_indices(t.size, 1) #Offset of 1 from the diagonal - #Block Diagonal indices among Upper Triangular indices + # Upper triangular indices + indtri1, indtri2 = np.triu_indices(t.size, 1) # Offset of 1 from the diagonal + # Block Diagonal indices among Upper Triangular indices ind = np.where(index[indtri1] == index[indtri2]) indr = indtri1[ind] indc = indtri2[ind] r = t[indr] - t[indc] - r2 = r*r - r2_lq2 = r2/(-lq2[index[indr]]) - #Calculation of covariance function + r2 = r * r + r2_lq2 = r2 / (-lq2[index[indr]]) + # Calculation of covariance function er2_lq2 = np.exp(r2_lq2) - #Gradient wrt t - c = 2.*r/lq2[index[indr]] - gt[indr, indc] = er2_lq2*c - #Complete the lower triangular + # Gradient wrt t + c = 2.0 * r / lq2[index[indr]] + gt[indr, indc] = er2_lq2 * c + # Complete the lower triangular gt[indc, indr] = -gt[indr, indc] return gt - #Gradients for Diagonal Kff + # Gradients for Diagonal Kff def _gkdiag(self, X, index): - index = index.reshape(index.size,) - #terms that move along t + index = index.reshape( + index.size, + ) + # terms that move along t d = np.unique(index) B = self.decay[d].values S = self.W[d, :].values - #Index transformation + # Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] - #Output related variables must be column-wise + # Output related variables must be column-wise t = X[:, 0].reshape(X.shape[0], 1) B = B.reshape(B.size, 1) - S2 = S*S + S2 = S * S - #Input related variables must be row-wise + # Input related variables must be row-wise lq = self.lengthscale.values.reshape(1, self.rank) gB = np.empty((t.size,)) glq = np.empty((t.size, lq.size)) gS = np.empty((t.size, lq.size)) - #Dx1 terms - c0 = S2*lq*np.sqrt(np.pi) + # Dx1 terms + c0 = S2 * lq * np.sqrt(np.pi) - #DxQ terms - nu = (.5*lq)*B - nu2 = nu*nu - - #Nx1 terms - gamt = -B[index]*t + # DxQ terms + nu = (0.5 * lq) * B + nu2 = nu * nu + + # Nx1 terms + gamt = -B[index] * t egamt = np.exp(gamt) - e2gamt = egamt*egamt + e2gamt = egamt * egamt - #NxQ terms - t_lq = t/lq - t2_lq2 = -t_lq*t_lq + # NxQ terms + t_lq = t / lq + t2_lq2 = -t_lq * t_lq - etlq2gamt = np.exp(t2_lq2 + gamt) #NXQ + etlq2gamt = np.exp(t2_lq2 + gamt) # NXQ ##Upsilon calculations - #erfnu = erf(nu) #TODO: This can be improved + # erfnu = erf(nu) #TODO: This can be improved - upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :], t_lq + nu[index, :]) ) - upm[t[:, 0] == 0, :] = 0. + upm = np.exp(nu2[index, :] + lnDifErf(nu[index, :], t_lq + nu[index, :])) + upm[t[:, 0] == 0, :] = 0.0 - upv = np.exp(nu2[index, :] + 2.*gamt + lnDifErf(-t_lq + nu[index, :], nu[index, :]) ) #egamt*upv - upv[t[:, 0] == 0, :] = 0. + upv = np.exp( + nu2[index, :] + 2.0 * gamt + lnDifErf(-t_lq + nu[index, :], nu[index, :]) + ) # egamt*upv + upv[t[:, 0] == 0, :] = 0.0 - #Gradient wrt S - c0_S = (S/B)*(lq*np.sqrt(np.pi)) + # Gradient wrt S + c0_S = (S / B) * (lq * np.sqrt(np.pi)) - gS = c0_S[index]*(upm - upv) + gS = c0_S[index] * (upm - upv) + + # For B + CB1 = (0.5 * lq) ** 2 - 0.5 / B**2 # DXQ + lq2_2B = (0.5 * lq**2) * (S2 / B) # DXQ + CB2 = 2.0 * etlq2gamt - e2gamt - 1.0 # NxQ - #For B - CB1 = (.5*lq)**2 - .5/B**2 #DXQ - lq2_2B = (.5*lq**2)*(S2/B) #DXQ - CB2 = 2.*etlq2gamt - e2gamt - 1. #NxQ - # gradient wrt B NxZ - gB = c0[index, :]*(CB1[index, :]*upm - (CB1[index, :] - t/B[index])*upv) + \ - lq2_2B[index, :]*CB2 + gB = ( + c0[index, :] * (CB1[index, :] * upm - (CB1[index, :] - t / B[index]) * upv) + + lq2_2B[index, :] * CB2 + ) - #Gradient wrt lengthscale - #DxQ terms - c0 = (.5*np.sqrt(np.pi))*(S2/B)*(1.+.5*(lq*B)**2) - Clq1 = S2*(lq*.5) - glq = c0[index]*(upm - upv) + Clq1[index]*CB2 + # Gradient wrt lengthscale + # DxQ terms + c0 = (0.5 * np.sqrt(np.pi)) * (S2 / B) * (1.0 + 0.5 * (lq * B) ** 2) + Clq1 = S2 * (lq * 0.5) + glq = c0[index] * (upm - upv) + Clq1[index] * CB2 return glq, gS, gB def _gkfu(self, X, index, Z, index2): - index = index.reshape(index.size,) - #TODO: reduce memory usage - #terms that move along t + index = index.reshape( + index.size, + ) + # TODO: reduce memory usage + # terms that move along t d = np.unique(index) B = self.decay[d].values S = self.W[d, :].values - #Index transformation + # Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] - #t column + # t column t = X[:, 0].reshape(X.shape[0], 1) B = B.reshape(B.size, 1) - #z row + # z row z = Z[:, 0].reshape(1, Z.shape[0]) - index2 = index2.reshape(index2.size,) + index2 = index2.reshape( + index2.size, + ) lq = self.lengthscale.values.reshape((1, self.rank)) - #kfu = np.empty((t.size, z.size)) + # kfu = np.empty((t.size, z.size)) glq = np.empty((t.size, z.size)) gSdq = np.empty((t.size, z.size)) gB = np.empty((t.size, z.size)) - #Dx1 terms - B_2 = B*.5 - S_pi = S*(.5*np.sqrt(np.pi)) - #DxQ terms - c0 = S_pi*lq #lq*Sdq*sqrt(pi) - nu = B*lq*.5 - nu2 = nu*nu + # Dx1 terms + B_2 = B * 0.5 + S_pi = S * (0.5 * np.sqrt(np.pi)) + # DxQ terms + c0 = S_pi * lq # lq*Sdq*sqrt(pi) + nu = B * lq * 0.5 + nu2 = nu * nu + + # 1xM terms + z_lq = z / lq[0, index2] + + # NxM terms + tz = t - z + tz_lq = tz / lq[0, index2] + etz_lq2 = -np.exp(-tz_lq * tz_lq) + ez_lq_Bt = np.exp(-z_lq * z_lq - B[index] * t) - #1xM terms - z_lq = z/lq[0, index2] - - #NxM terms - tz = t-z - tz_lq = tz/lq[0, index2] - etz_lq2 = -np.exp(-tz_lq*tz_lq) - ez_lq_Bt = np.exp(-z_lq*z_lq -B[index]*t) - # Upsilon calculations fullind = np.ix_(index, index2) - upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind] ) ) - upsi[t[:, 0] == 0., :] = 0. + upsi = np.exp( + nu2[fullind] + - B[index] * tz + + lnDifErf(-tz_lq + nu[fullind], z_lq + nu[fullind]) + ) + upsi[t[:, 0] == 0.0, :] = 0.0 - #Gradient wrt S - #DxQ term - Sa1 = lq*(.5*np.sqrt(np.pi)) + # Gradient wrt S + # DxQ term + Sa1 = lq * (0.5 * np.sqrt(np.pi)) - gSdq = Sa1[0,index2]*upsi + gSdq = Sa1[0, index2] * upsi - #Gradient wrt lq - la1 = S_pi*(1. + 2.*nu2) - Slq = S*lq - uplq = etz_lq2*(tz_lq/lq[0, index2] + B_2[index]) - uplq += ez_lq_Bt*(-z_lq/lq[0, index2] + B_2[index]) + # Gradient wrt lq + la1 = S_pi * (1.0 + 2.0 * nu2) + Slq = S * lq + uplq = etz_lq2 * (tz_lq / lq[0, index2] + B_2[index]) + uplq += ez_lq_Bt * (-z_lq / lq[0, index2] + B_2[index]) - glq = la1[fullind]*upsi - glq += Slq[fullind]*uplq + glq = la1[fullind] * upsi + glq += Slq[fullind] * uplq - #Gradient wrt B - Slq = Slq*lq - nulq = nu*lq + # Gradient wrt B + Slq = Slq * lq + nulq = nu * lq upBd = etz_lq2 + ez_lq_Bt - gB = c0[fullind]*(nulq[fullind] - tz)*upsi + .5*Slq[fullind]*upBd + gB = c0[fullind] * (nulq[fullind] - tz) * upsi + 0.5 * Slq[fullind] * upBd return glq, gSdq, gB - #TODO: reduce memory usage - def _gkfu_z(self, X, index, Z, index2): #Kfu(t,z) - index = index.reshape(index.size,) - #terms that move along t + # TODO: reduce memory usage + def _gkfu_z(self, X, index, Z, index2): # Kfu(t,z) + index = index.reshape( + index.size, + ) + # terms that move along t d = np.unique(index) B = self.decay[d].values S = self.W[d, :].values - #Index transformation + # Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] - #t column + # t column t = X[:, 0].reshape(X.shape[0], 1) B = B.reshape(B.size, 1) - #z row + # z row z = Z[:, 0].reshape(1, Z.shape[0]) - index2 = index2.reshape(index2.size,) + index2 = index2.reshape( + index2.size, + ) lq = self.lengthscale.values.reshape((1, self.rank)) - #kfu = np.empty((t.size, z.size)) + # kfu = np.empty((t.size, z.size)) gz = np.empty((t.size, z.size)) - #Dx1 terms - S_pi =S*(.5*np.sqrt(np.pi)) - #DxQ terms - #Slq = S*lq - c0 = S_pi*lq #lq*Sdq*sqrt(pi) - nu = (.5*lq)*B - nu2 = nu*nu + # Dx1 terms + S_pi = S * (0.5 * np.sqrt(np.pi)) + # DxQ terms + # Slq = S*lq + c0 = S_pi * lq # lq*Sdq*sqrt(pi) + nu = (0.5 * lq) * B + nu2 = nu * nu - #1xM terms - z_lq = z/lq[0, index2] - z_lq2 = -z_lq*z_lq - #NxQ terms - t_lq = t/lq - #NxM terms + # 1xM terms + z_lq = z / lq[0, index2] + z_lq2 = -z_lq * z_lq + # NxQ terms + t_lq = t / lq + # NxM terms zt_lq = z_lq - t_lq[:, index2] - zt_lq2 = -zt_lq*zt_lq + zt_lq2 = -zt_lq * zt_lq # Upsilon calculations fullind = np.ix_(index, index2) z2 = z_lq + nu[fullind] z1 = z2 - t_lq[:, index2] - upsi = np.exp(nu2[fullind] - B[index]*(t-z) + lnDifErf(z1,z2) ) - upsi[t[:, 0] == 0., :] = 0. + upsi = np.exp(nu2[fullind] - B[index] * (t - z) + lnDifErf(z1, z2)) + upsi[t[:, 0] == 0.0, :] = 0.0 - #Gradient wrt z - za1 = c0*B - #za2 = S_w - gz = za1[fullind]*upsi + S[fullind]*( np.exp(z_lq2 - B[index]*t) -np.exp(zt_lq2) ) + # Gradient wrt z + za1 = c0 * B + # za2 = S_w + gz = za1[fullind] * upsi + S[fullind] * ( + np.exp(z_lq2 - B[index] * t) - np.exp(zt_lq2) + ) return gz - -def lnDifErf(z1,z2): - #Z2 is always positive - logdiferf = np.zeros(z1.shape) - ind = np.where(z1>0.) - ind2 = np.where(z1<=0.) + + +def lnDifErf(z1, z2): + # Z2 is always positive + logdiferf = np.zeros(z1.shape) + ind = np.where(z1 > 0.0) + ind2 = np.where(z1 <= 0.0) if ind[0].shape > 0: z1i = z1[ind] - z12 = z1i*z1i + z12 = z1i * z1i z2i = z2[ind] - logdiferf[ind] = -z12 + np.log(erfcx(z1i) - erfcx(z2i)*np.exp(z12-z2i**2)) - + logdiferf[ind] = -z12 + np.log(erfcx(z1i) - erfcx(z2i) * np.exp(z12 - z2i**2)) + if ind2[0].shape > 0: z1i = z1[ind2] z2i = z2[ind2] logdiferf[ind2] = np.log(erf(z2i) - erf(z1i)) - - return logdiferf \ No newline at end of file + + return logdiferf diff --git a/GPy/kern/src/eq_ode2.py b/GPy/kern/src/eq_ode2.py index 0166c511..27b15b87 100644 --- a/GPy/kern/src/eq_ode2.py +++ b/GPy/kern/src/eq_ode2.py @@ -8,6 +8,7 @@ from ...core.parameterization import Param from paramz.transformations import Logexp from paramz.caching import Cache_this + class EQ_ODE2(Kern): """ Covariance function for second order differential equation driven by an exponentiated quadratic covariance. @@ -30,24 +31,38 @@ class EQ_ODE2(Kern): :type B: array of length output_dim. """ - #This code will only work for the sparseGP model, due to limitations in models for this kernel - def __init__(self, input_dim=2, output_dim=1, rank=1, W=None, lengthscale=None, C=None, B=None, active_dims=None, name='eq_ode2'): - #input_dim should be 1, but kern._slice_X is not returning index information required to evaluate kernels + + # This code will only work for the sparseGP model, due to limitations in models for this kernel + def __init__( + self, + input_dim=2, + output_dim=1, + rank=1, + W=None, + lengthscale=None, + C=None, + B=None, + active_dims=None, + name="eq_ode2", + ): + # input_dim should be 1, but kern._slice_X is not returning index information required to evaluate kernels assert input_dim == 2, "only defined for 1 input dims" - super(EQ_ODE2, self).__init__(input_dim=input_dim, active_dims=active_dims, name=name) + super(EQ_ODE2, self).__init__( + input_dim=input_dim, active_dims=active_dims, name=name + ) self.rank = rank self.output_dim = output_dim if lengthscale is None: - lengthscale = .5+np.random.rand(self.rank) + lengthscale = 0.5 + np.random.rand(self.rank) else: lengthscale = np.asarray(lengthscale) assert lengthscale.size in [1, self.rank], "Bad number of lengthscales" if lengthscale.size != self.rank: - lengthscale = np.ones(self.rank)*lengthscale + lengthscale = np.ones(self.rank) * lengthscale if W is None: - #W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank) + # W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank) W = np.ones((self.output_dim, self.rank)) else: assert W.shape == (self.output_dim, self.rank) @@ -58,270 +73,294 @@ class EQ_ODE2(Kern): if B is None: B = np.ones(self.output_dim) - self.C = Param('C', C, Logexp()) - self.B = Param('B', B, Logexp()) - self.lengthscale = Param('lengthscale', lengthscale, Logexp()) - self.W = Param('W', W) + self.C = Param("C", C, Logexp()) + self.B = Param("B", B, Logexp()) + self.lengthscale = Param("lengthscale", lengthscale, Logexp()) + self.W = Param("W", W) self.link_parameters(self.lengthscale, self.C, self.B, self.W) @Cache_this(limit=3) def K(self, X, X2=None): - #This way is not working, indexes are lost after using k._slice_X - #index = np.asarray(X, dtype=np.int) - #index = index.reshape(index.size,) - if hasattr(X, 'values'): + # This way is not working, indexes are lost after using k._slice_X + # index = np.asarray(X, dtype=int) + # index = index.reshape(index.size,) + if hasattr(X, "values"): X = X.values index = np.int_(np.round(X[:, 1])) - index = index.reshape(index.size,) + index = index.reshape( + index.size, + ) X_flag = index[0] >= self.output_dim if X2 is None: if X_flag: - #Calculate covariance function for the latent functions + # Calculate covariance function for the latent functions index -= self.output_dim return self._Kuu(X, index) - else: #Kff full + else: # Kff full raise NotImplementedError else: - #This way is not working, indexes are lost after using k._slice_X - #index2 = np.asarray(X2, dtype=np.int) - #index2 = index2.reshape(index2.size,) - if hasattr(X2, 'values'): + # This way is not working, indexes are lost after using k._slice_X + # index2 = np.asarray(X2, dtype=int) + # index2 = index2.reshape(index2.size,) + if hasattr(X2, "values"): X2 = X2.values index2 = np.int_(np.round(X2[:, 1])) - index2 = index2.reshape(index2.size,) + index2 = index2.reshape( + index2.size, + ) X2_flag = index2[0] >= self.output_dim - #Calculate cross-covariance function + # Calculate cross-covariance function if not X_flag and X2_flag: index2 -= self.output_dim - return self._Kfu(X, index, X2, index2) #Kfu + return self._Kfu(X, index, X2, index2) # Kfu elif X_flag and not X2_flag: index -= self.output_dim - return self._Kfu(X2, index2, X, index).T #Kuf + return self._Kfu(X2, index2, X, index).T # Kuf elif X_flag and X2_flag: index -= self.output_dim index2 -= self.output_dim - return self._Kusu(X, index, X2, index2) #Ku_s u + return self._Kusu(X, index, X2, index2) # Ku_s u else: - raise NotImplementedError #Kf_s f + raise NotImplementedError # Kf_s f - #Calculate the covariance function for diag(Kff(X,X)) + # Calculate the covariance function for diag(Kff(X,X)) def Kdiag(self, X): - if hasattr(X, 'values'): + if hasattr(X, "values"): index = np.int_(np.round(X[:, 1].values)) else: index = np.int_(np.round(X[:, 1])) - index = index.reshape(index.size,) + index = index.reshape( + index.size, + ) X_flag = index[0] >= self.output_dim - - if X_flag: #Kuudiag - return np.ones(X[:,0].shape) - else: #Kffdiag + + if X_flag: # Kuudiag + return np.ones(X[:, 0].shape) + else: # Kffdiag kdiag = self._Kdiag(X) return np.sum(kdiag, axis=1) - #Calculate the covariance function for diag(Kff(X,X)) + # Calculate the covariance function for diag(Kff(X,X)) def _Kdiag(self, X): - #This way is not working, indexes are lost after using k._slice_X - #index = np.asarray(X, dtype=np.int) - #index = index.reshape(index.size,) - if hasattr(X, 'values'): + # This way is not working, indexes are lost after using k._slice_X + # index = np.asarray(X, dtype=int) + # index = index.reshape(index.size,) + if hasattr(X, "values"): X = X.values index = np.int_(X[:, 1]) - index = index.reshape(index.size,) - - #terms that move along t + index = index.reshape( + index.size, + ) + + # terms that move along t t = X[:, 0].reshape(X.shape[0], 1) - d = np.unique(index) #Output Indexes + d = np.unique(index) # Output Indexes B = self.B.values[d] C = self.C.values[d] S = self.W.values[d, :] - #Index transformation + # Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] - #Check where wd becomes complex - wbool = C*C >= 4.*B + # Check where wd becomes complex + wbool = C * C >= 4.0 * B B = B.reshape(B.size, 1) C = C.reshape(C.size, 1) - alpha = .5*C - C2 = C*C + alpha = 0.5 * C + C2 = C * C wbool2 = wbool[index] ind2t = np.where(wbool2) ind3t = np.where(np.logical_not(wbool2)) - #Terms that move along q + # Terms that move along q lq = self.lengthscale.values.reshape(1, self.lengthscale.size) - S2 = S*S + S2 = S * S kdiag = np.empty((t.size, lq.size)) indD = np.arange(B.size) - #(1) When wd is real + # (1) When wd is real if np.any(np.logical_not(wbool)): - #Indexes of index and t related to (2) + # Indexes of index and t related to (2) t1 = t[ind3t] ind = index[ind3t] - d = np.asarray(np.where(np.logical_not(wbool))[0]) #Selection of outputs + d = np.asarray(np.where(np.logical_not(wbool))[0]) # Selection of outputs indd = indD.copy() indd[d] = np.arange(d.size) ind = indd[ind] - #Dx1 terms - S2lq = S2[d]*(.5*lq) - c0 = S2lq*np.sqrt(np.pi) - w = .5*np.sqrt(4.*B[d] - C2[d]) + # Dx1 terms + S2lq = S2[d] * (0.5 * lq) + c0 = S2lq * np.sqrt(np.pi) + w = 0.5 * np.sqrt(4.0 * B[d] - C2[d]) alphad = alpha[d] - w2 = w*w - gam = alphad + 1j*w - gamc = alphad - 1j*w - c1 = .5/(alphad*w2) - c2 = .5/(gam*w2) + w2 = w * w + gam = alphad + 1j * w + gamc = alphad - 1j * w + c1 = 0.5 / (alphad * w2) + c2 = 0.5 / (gam * w2) c = c1 - c2 - #DxQ terms - nu = lq*(gam*.5) - K01 = c0*c - #Nx1 terms - gamt = -gam[ind]*t1 - gamct = -gamc[ind]*t1 + # DxQ terms + nu = lq * (gam * 0.5) + K01 = c0 * c + # Nx1 terms + gamt = -gam[ind] * t1 + gamct = -gamc[ind] * t1 egamt = np.exp(gamt) - ec = egamt*c2[ind] - np.exp(gamct)*c1[ind] - #NxQ terms - t_lq = t1/lq + ec = egamt * c2[ind] - np.exp(gamct) * c1[ind] + # NxQ terms + t_lq = t1 / lq # Upsilon Calculations # Using wofz - wnu = wofz(1j*nu) + wnu = wofz(1j * nu) lwnu = np.log(wnu) - t2_lq2 = -t_lq*t_lq - upm = wnu[ind] - np.exp(t2_lq2 + gamt + np.log(wofz(1j*(t_lq + nu[ind])))) - upm[t1[:, 0] == 0, :] = 0. + t2_lq2 = -t_lq * t_lq + upm = wnu[ind] - np.exp(t2_lq2 + gamt + np.log(wofz(1j * (t_lq + nu[ind])))) + upm[t1[:, 0] == 0, :] = 0.0 - nu2 = nu*nu + nu2 = nu * nu z1 = nu[ind] - t_lq - indv1 = np.where(z1.real >= 0.) - indv2 = np.where(z1.real < 0.) + indv1 = np.where(z1.real >= 0.0) + indv2 = np.where(z1.real < 0.0) upv = -np.exp(lwnu[ind] + gamt) if indv1[0].shape > 0: - upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j*z1[indv1]))) + upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j * z1[indv1]))) if indv2[0].shape > 0: - upv[indv2] += np.exp(nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.))\ - - np.exp(t2_lq2[indv2] + np.log(wofz(-1j*z1[indv2]))) - upv[t1[:, 0] == 0, :] = 0. + upv[indv2] += np.exp( + nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.0) + ) - np.exp(t2_lq2[indv2] + np.log(wofz(-1j * z1[indv2]))) + upv[t1[:, 0] == 0, :] = 0.0 - #Covariance calculation - kdiag[ind3t] = np.real(K01[ind]*upm) - kdiag[ind3t] += np.real((c0[ind]*ec)*upv) + # Covariance calculation + kdiag[ind3t] = np.real(K01[ind] * upm) + kdiag[ind3t] += np.real((c0[ind] * ec) * upv) - #(2) When w_d is complex + # (2) When w_d is complex if np.any(wbool): t1 = t[ind2t] ind = index[ind2t] - #Index transformation + # Index transformation d = np.asarray(np.where(wbool)[0]) indd = indD.copy() indd[d] = np.arange(d.size) ind = indd[ind] - #Dx1 terms - S2lq = S2[d]*(lq*.25) - c0 = S2lq*np.sqrt(np.pi) - w = .5*np.sqrt(C2[d] - 4.*B[d]) + # Dx1 terms + S2lq = S2[d] * (lq * 0.25) + c0 = S2lq * np.sqrt(np.pi) + w = 0.5 * np.sqrt(C2[d] - 4.0 * B[d]) alphad = alpha[d] gam = alphad - w gamc = alphad + w - w2 = -w*w - c1 = .5/(alphad*w2) - c21 = .5/(gam*w2) - c22 = .5/(gamc*w2) + w2 = -w * w + c1 = 0.5 / (alphad * w2) + c21 = 0.5 / (gam * w2) + c22 = 0.5 / (gamc * w2) c = c1 - c21 c2 = c1 - c22 - #DxQ terms - K011 = c0*c - K012 = c0*c2 - nu = lq*(.5*gam) - nuc = lq*(.5*gamc) - #Nx1 terms - gamt = -gam[ind]*t1 - gamct = -gamc[ind]*t1 + # DxQ terms + K011 = c0 * c + K012 = c0 * c2 + nu = lq * (0.5 * gam) + nuc = lq * (0.5 * gamc) + # Nx1 terms + gamt = -gam[ind] * t1 + gamct = -gamc[ind] * t1 egamt = np.exp(gamt) egamct = np.exp(gamct) - ec = egamt*c21[ind] - egamct*c1[ind] - ec2 = egamct*c22[ind] - egamt*c1[ind] - #NxQ terms - t_lq = t1/lq + ec = egamt * c21[ind] - egamct * c1[ind] + ec2 = egamct * c22[ind] - egamt * c1[ind] + # NxQ terms + t_lq = t1 / lq - #Upsilon Calculations using wofz - t2_lq2 = -t_lq*t_lq #Required when using wofz - wnu = wofz(1j*nu).real + # Upsilon Calculations using wofz + t2_lq2 = -t_lq * t_lq # Required when using wofz + wnu = wofz(1j * nu).real lwnu = np.log(wnu) - upm = wnu[ind] - np.exp(t2_lq2 + gamt + np.log(wofz(1j*(t_lq + nu[ind])).real)) - upm[t1[:, 0] == 0., :] = 0. + upm = wnu[ind] - np.exp( + t2_lq2 + gamt + np.log(wofz(1j * (t_lq + nu[ind])).real) + ) + upm[t1[:, 0] == 0.0, :] = 0.0 - nu2 = nu*nu + nu2 = nu * nu z1 = nu[ind] - t_lq - indv1 = np.where(z1 >= 0.) - indv2 = np.where(z1 < 0.) + indv1 = np.where(z1 >= 0.0) + indv2 = np.where(z1 < 0.0) upv = -np.exp(lwnu[ind] + gamt) if indv1[0].shape > 0: - upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real)) + upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real)) if indv2[0].shape > 0: - upv[indv2] += np.exp(nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.))\ - - np.exp(t2_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real)) - upv[t1[:, 0] == 0, :] = 0. + upv[indv2] += np.exp( + nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.0) + ) - np.exp(t2_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real)) + upv[t1[:, 0] == 0, :] = 0.0 - wnuc = wofz(1j*nuc).real + wnuc = wofz(1j * nuc).real lwnuc = np.log(wnuc) - upmc = wnuc[ind] - np.exp(t2_lq2 + gamct + np.log(wofz(1j*(t_lq + nuc[ind])).real)) - upmc[t1[:, 0] == 0., :] = 0. + upmc = wnuc[ind] - np.exp( + t2_lq2 + gamct + np.log(wofz(1j * (t_lq + nuc[ind])).real) + ) + upmc[t1[:, 0] == 0.0, :] = 0.0 - nuc2 = nuc*nuc + nuc2 = nuc * nuc z1 = nuc[ind] - t_lq - indv1 = np.where(z1 >= 0.) - indv2 = np.where(z1 < 0.) - upvc = - np.exp(lwnuc[ind] + gamct) + indv1 = np.where(z1 >= 0.0) + indv2 = np.where(z1 < 0.0) + upvc = -np.exp(lwnuc[ind] + gamct) if indv1[0].shape > 0: - upvc[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real)) + upvc[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real)) if indv2[0].shape > 0: - upvc[indv2] += np.exp(nuc2[ind[indv2[0]], indv2[1]] + gamct[indv2[0], 0] + np.log(2.))\ - - np.exp(t2_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real)) - upvc[t1[:, 0] == 0, :] = 0. + upvc[indv2] += np.exp( + nuc2[ind[indv2[0]], indv2[1]] + gamct[indv2[0], 0] + np.log(2.0) + ) - np.exp(t2_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real)) + upvc[t1[:, 0] == 0, :] = 0.0 - #Covariance calculation - kdiag[ind2t] = K011[ind]*upm + K012[ind]*upmc + (c0[ind]*ec)*upv + (c0[ind]*ec2)*upvc + # Covariance calculation + kdiag[ind2t] = ( + K011[ind] * upm + + K012[ind] * upmc + + (c0[ind] * ec) * upv + + (c0[ind] * ec2) * upvc + ) return kdiag - def update_gradients_full(self, dL_dK, X, X2 = None): - #index = np.asarray(X, dtype=np.int) - #index = index.reshape(index.size,) - if hasattr(X, 'values'): + def update_gradients_full(self, dL_dK, X, X2=None): + # index = np.asarray(X, dtype=int) + # index = index.reshape(index.size,) + if hasattr(X, "values"): X = X.values self.B.gradient = np.zeros(self.B.shape) self.C.gradient = np.zeros(self.C.shape) self.W.gradient = np.zeros(self.W.shape) self.lengthscale.gradient = np.zeros(self.lengthscale.shape) index = np.int_(X[:, 1]) - index = index.reshape(index.size,) + index = index.reshape( + index.size, + ) X_flag = index[0] >= self.output_dim if X2 is None: - if X_flag: #Kuu or Kmm + if X_flag: # Kuu or Kmm index -= self.output_dim - tmp = dL_dK*self._gkuu_lq(X, index) + tmp = dL_dK * self._gkuu_lq(X, index) for q in np.unique(index): ind = np.where(index == q) self.lengthscale.gradient[q] = tmp[np.ix_(ind[0], ind[0])].sum() else: raise NotImplementedError - else: #Kfu or Knm - #index2 = np.asarray(X2, dtype=np.int) - #index2 = index2.reshape(index2.size,) - if hasattr(X2, 'values'): + else: # Kfu or Knm + # index2 = np.asarray(X2, dtype=int) + # index2 = index2.reshape(index2.size,) + if hasattr(X2, "values"): X2 = X2.values index2 = np.int_(X2[:, 1]) - index2 = index2.reshape(index2.size,) + index2 = index2.reshape( + index2.size, + ) X2_flag = index2[0] >= self.output_dim if not X_flag and X2_flag: index2 -= self.output_dim else: - dL_dK = dL_dK.T #so we obtaing dL_Kfu + dL_dK = dL_dK.T # so we obtaing dL_Kfu indtemp = index - self.output_dim Xtemp = X X = X2 @@ -329,13 +368,13 @@ class EQ_ODE2(Kern): index = index2 index2 = indtemp glq, gSdq, gB, gC = self._gkfu(X, index, X2, index2) - tmp = dL_dK*glq + tmp = dL_dK * glq for q in np.unique(index2): ind = np.where(index2 == q) self.lengthscale.gradient[q] = tmp[:, ind].sum() - tmpB = dL_dK*gB - tmpC = dL_dK*gC - tmp = dL_dK*gSdq + tmpB = dL_dK * gB + tmpC = dL_dK * gC + tmp = dL_dK * gSdq for d in np.unique(index): ind = np.where(index == d) self.B.gradient[d] = tmpB[ind, :].sum() @@ -345,25 +384,27 @@ class EQ_ODE2(Kern): self.W.gradient[d, q] = tmp[np.ix_(ind[0], ind2[0])].sum() def update_gradients_diag(self, dL_dKdiag, X): - #index = np.asarray(X, dtype=np.int) - #index = index.reshape(index.size,) - if hasattr(X, 'values'): + # index = np.asarray(X, dtype=int) + # index = index.reshape(index.size,) + if hasattr(X, "values"): X = X.values self.B.gradient = np.zeros(self.B.shape) self.C.gradient = np.zeros(self.C.shape) self.W.gradient = np.zeros(self.W.shape) self.lengthscale.gradient = np.zeros(self.lengthscale.shape) index = np.int_(X[:, 1]) - index = index.reshape(index.size,) - + index = index.reshape( + index.size, + ) + glq, gS, gB, gC = self._gkdiag(X, index) if dL_dKdiag.size == X.shape[0]: dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1)) - tmp = dL_dKdiag*glq + tmp = dL_dKdiag * glq self.lengthscale.gradient = tmp.sum(0) - tmpB = dL_dKdiag*gB - tmpC = dL_dKdiag*gC - tmp = dL_dKdiag*gS + tmpB = dL_dKdiag * gB + tmpC = dL_dKdiag * gC + tmp = dL_dKdiag * gS for d in np.unique(index): ind = np.where(index == d) self.B.gradient[d] = tmpB[ind, :].sum() @@ -371,107 +412,123 @@ class EQ_ODE2(Kern): self.W.gradient[d, :] = tmp[ind].sum(0) def gradients_X(self, dL_dK, X, X2=None): - #index = np.asarray(X, dtype=np.int) - #index = index.reshape(index.size,) - if hasattr(X, 'values'): + # index = np.asarray(X, dtype=int) + # index = index.reshape(index.size,) + if hasattr(X, "values"): X = X.values index = np.int_(X[:, 1]) - index = index.reshape(index.size,) + index = index.reshape( + index.size, + ) X_flag = index[0] >= self.output_dim - #If input_dim == 1, use this - #gX = np.zeros((X.shape[0], 1)) - #Cheat to allow gradient for input_dim==2 + # If input_dim == 1, use this + # gX = np.zeros((X.shape[0], 1)) + # Cheat to allow gradient for input_dim==2 gX = np.zeros(X.shape) - if X2 is None: #Kuu or Kmm + if X2 is None: # Kuu or Kmm if X_flag: index -= self.output_dim - gX[:, 0] = 2.*(dL_dK*self._gkuu_X(X, index)).sum(0) + gX[:, 0] = 2.0 * (dL_dK * self._gkuu_X(X, index)).sum(0) return gX else: raise NotImplementedError - else: #Kuf or Kmn - #index2 = np.asarray(X2, dtype=np.int) - #index2 = index2.reshape(index2.size,) - if hasattr(X2, 'values'): + else: # Kuf or Kmn + # index2 = np.asarray(X2, dtype=int) + # index2 = index2.reshape(index2.size,) + if hasattr(X2, "values"): X2 = X2.values index2 = np.int_(X2[:, 1]) - index2 = index2.reshape(index2.size,) + index2 = index2.reshape( + index2.size, + ) X2_flag = index2[0] >= self.output_dim - if X_flag and not X2_flag: #gradient of Kuf(Z, X) wrt Z + if X_flag and not X2_flag: # gradient of Kuf(Z, X) wrt Z index -= self.output_dim - gX[:, 0] = (dL_dK*self._gkfu_z(X2, index2, X, index).T).sum(1) + gX[:, 0] = (dL_dK * self._gkfu_z(X2, index2, X, index).T).sum(1) return gX else: raise NotImplementedError - #---------------------------------------# + # ---------------------------------------# # Helper functions # - #---------------------------------------# + # ---------------------------------------# - #Evaluation of squared exponential for LFM + # Evaluation of squared exponential for LFM def _Kuu(self, X, index): - index = index.reshape(index.size,) - t = X[:, 0].reshape(X.shape[0],) - lq = self.lengthscale.values.reshape(self.rank,) - lq2 = lq*lq - #Covariance matrix initialization + index = index.reshape( + index.size, + ) + t = X[:, 0].reshape( + X.shape[0], + ) + lq = self.lengthscale.values.reshape( + self.rank, + ) + lq2 = lq * lq + # Covariance matrix initialization kuu = np.zeros((t.size, t.size)) - #Assign 1. to diagonal terms - kuu[np.diag_indices(t.size)] = 1. - #Upper triangular indices + # Assign 1. to diagonal terms + kuu[np.diag_indices(t.size)] = 1.0 + # Upper triangular indices indtri1, indtri2 = np.triu_indices(t.size, 1) - #Block Diagonal indices among Upper Triangular indices + # Block Diagonal indices among Upper Triangular indices ind = np.where(index[indtri1] == index[indtri2]) indr = indtri1[ind] indc = indtri2[ind] r = t[indr] - t[indc] - r2 = r*r - #Calculation of covariance function - kuu[indr, indc] = np.exp(-r2/lq2[index[indr]]) - #Completation of lower triangular part + r2 = r * r + # Calculation of covariance function + kuu[indr, indc] = np.exp(-r2 / lq2[index[indr]]) + # Completation of lower triangular part kuu[indc, indr] = kuu[indr, indc] return kuu def _Kusu(self, X, index, X2, index2): - index = index.reshape(index.size,) - index2 = index2.reshape(index2.size,) - t = X[:, 0].reshape(X.shape[0],1) - t2 = X2[:, 0].reshape(1,X2.shape[0]) - lq = self.lengthscale.values.reshape(self.rank,) - #Covariance matrix initialization + index = index.reshape( + index.size, + ) + index2 = index2.reshape( + index2.size, + ) + t = X[:, 0].reshape(X.shape[0], 1) + t2 = X2[:, 0].reshape(1, X2.shape[0]) + lq = self.lengthscale.values.reshape( + self.rank, + ) + # Covariance matrix initialization kuu = np.zeros((t.size, t2.size)) for q in range(self.rank): ind1 = index == q ind2 = index2 == q - r = t[ind1]/lq[q] - t2[0,ind2]/lq[q] - r2 = r*r - #Calculation of covariance function + r = t[ind1] / lq[q] - t2[0, ind2] / lq[q] + r2 = r * r + # Calculation of covariance function kuu[np.ix_(ind1, ind2)] = np.exp(-r2) return kuu - #Evaluation of cross-covariance function + # Evaluation of cross-covariance function def _Kfu(self, X, index, X2, index2): - #terms that move along t + # terms that move along t t = X[:, 0].reshape(X.shape[0], 1) - d = np.unique(index) #Output Indexes + d = np.unique(index) # Output Indexes B = self.B.values[d] C = self.C.values[d] S = self.W.values[d, :] - #Index transformation + # Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] - #Check where wd becomes complex - wbool = C*C >= 4.*B - #Output related variables must be column-wise + # Check where wd becomes complex + wbool = C * C >= 4.0 * B + # Output related variables must be column-wise C = C.reshape(C.size, 1) B = B.reshape(B.size, 1) - C2 = C*C - #Input related variables must be row-wise + C2 = C * C + # Input related variables must be row-wise z = X2[:, 0].reshape(1, X2.shape[0]) lq = self.lengthscale.values.reshape((1, self.rank)) - #print np.max(z), np.max(z/lq[0, index2]) - alpha = .5*C + # print np.max(z), np.max(z/lq[0, index2]) + alpha = 0.5 * C wbool2 = wbool[index] ind2t = np.where(wbool2) @@ -480,196 +537,214 @@ class EQ_ODE2(Kern): kfu = np.empty((t.size, z.size)) indD = np.arange(B.size) - #(1) when wd is real + # (1) when wd is real if np.any(np.logical_not(wbool)): - #Indexes of index and t related to (2) + # Indexes of index and t related to (2) t1 = t[ind3t] ind = index[ind3t] - #Index transformation + # Index transformation d = np.asarray(np.where(np.logical_not(wbool))[0]) indd = indD.copy() indd[d] = np.arange(d.size) ind = indd[ind] - #Dx1 terms - w = .5*np.sqrt(4.*B[d] - C2[d]) + # Dx1 terms + w = 0.5 * np.sqrt(4.0 * B[d] - C2[d]) alphad = alpha[d] - gam = alphad - 1j*w + gam = alphad - 1j * w - #DxQ terms - Slq = (S[d]/w)*(.5*lq) - c0 = Slq*np.sqrt(np.pi) - nu = gam*(.5*lq) - #1xM terms - z_lq = z/lq[0, index2] - #NxQ terms - t_lq = t1/lq - #NxM terms + # DxQ terms + Slq = (S[d] / w) * (0.5 * lq) + c0 = Slq * np.sqrt(np.pi) + nu = gam * (0.5 * lq) + # 1xM terms + z_lq = z / lq[0, index2] + # NxQ terms + t_lq = t1 / lq + # NxM terms zt_lq = z_lq - t_lq[:, index2] # Upsilon Calculations - #Using wofz - tz = t1-z + # Using wofz + tz = t1 - z fullind = np.ix_(ind, index2) - zt_lq2 = -zt_lq*zt_lq - z_lq2 = -z_lq*z_lq - gamt = -gam[ind]*t1 + zt_lq2 = -zt_lq * zt_lq + z_lq2 = -z_lq * z_lq + gamt = -gam[ind] * t1 - upsi = - np.exp(z_lq2 + gamt + np.log(wofz(1j*(z_lq + nu[fullind])))) + upsi = -np.exp(z_lq2 + gamt + np.log(wofz(1j * (z_lq + nu[fullind])))) z1 = zt_lq + nu[fullind] - indv1 = np.where(z1.real >= 0.) - indv2 = np.where(z1.real < 0.) + indv1 = np.where(z1.real >= 0.0) + indv2 = np.where(z1.real < 0.0) if indv1[0].shape > 0: - upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]))) + upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]))) if indv2[0].shape > 0: - nua2 = nu[ind[indv2[0]], index2[indv2[1]]]**2 - upsi[indv2] += np.exp(nua2 - gam[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\ - - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]))) - upsi[t1[:, 0] == 0., :] = 0. + nua2 = nu[ind[indv2[0]], index2[indv2[1]]] ** 2 + upsi[indv2] += np.exp( + nua2 - gam[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0) + ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]))) + upsi[t1[:, 0] == 0.0, :] = 0.0 - #Covariance calculation - kfu[ind3t] = c0[fullind]*upsi.imag + # Covariance calculation + kfu[ind3t] = c0[fullind] * upsi.imag - #(2) when wd is complex + # (2) when wd is complex if np.any(wbool): - #Indexes of index and t related to (2) + # Indexes of index and t related to (2) t1 = t[ind2t] ind = index[ind2t] - #Index transformation + # Index transformation d = np.asarray(np.where(wbool)[0]) indd = indD.copy() indd[d] = np.arange(d.size) ind = indd[ind] - #Dx1 terms - w = .5*np.sqrt(C2[d] - 4.*B[d]) + # Dx1 terms + w = 0.5 * np.sqrt(C2[d] - 4.0 * B[d]) alphad = alpha[d] gam = alphad - w gamc = alphad + w - #DxQ terms - Slq = S[d]*(lq*.25) - c0 = -Slq*(np.sqrt(np.pi)/w) - nu = gam*(lq*.5) - nuc = gamc*(lq*.5) - #1xM terms - z_lq = z/lq[0, index2] - #NxQ terms - t_lq = t1/lq[0, index2] - #NxM terms + # DxQ terms + Slq = S[d] * (lq * 0.25) + c0 = -Slq * (np.sqrt(np.pi) / w) + nu = gam * (lq * 0.5) + nuc = gamc * (lq * 0.5) + # 1xM terms + z_lq = z / lq[0, index2] + # NxQ terms + t_lq = t1 / lq[0, index2] + # NxM terms zt_lq = z_lq - t_lq # Upsilon Calculations - tz = t1-z - z_lq2 = -z_lq*z_lq - zt_lq2 = -zt_lq*zt_lq - gamt = -gam[ind]*t1 - gamct = -gamc[ind]*t1 + tz = t1 - z + z_lq2 = -z_lq * z_lq + zt_lq2 = -zt_lq * zt_lq + gamt = -gam[ind] * t1 + gamct = -gamc[ind] * t1 fullind = np.ix_(ind, index2) - upsi = np.exp(z_lq2 + gamt + np.log(wofz(1j*(z_lq + nu[fullind])).real))\ - - np.exp(z_lq2 + gamct + np.log(wofz(1j*(z_lq + nuc[fullind])).real)) + upsi = np.exp( + z_lq2 + gamt + np.log(wofz(1j * (z_lq + nu[fullind])).real) + ) - np.exp(z_lq2 + gamct + np.log(wofz(1j * (z_lq + nuc[fullind])).real)) z1 = zt_lq + nu[fullind] - indv1 = np.where(z1 >= 0.) - indv2 = np.where(z1 < 0.) + indv1 = np.where(z1 >= 0.0) + indv2 = np.where(z1 < 0.0) if indv1[0].shape > 0: - upsi[indv1] -= np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real)) + upsi[indv1] -= np.exp(zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real)) if indv2[0].shape > 0: - nua2 = nu[ind[indv2[0]], index2[indv2[1]]]**2 - upsi[indv2] -= np.exp(nua2 - gam[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\ - - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real)) + nua2 = nu[ind[indv2[0]], index2[indv2[1]]] ** 2 + upsi[indv2] -= np.exp( + nua2 - gam[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0) + ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real)) z1 = zt_lq + nuc[fullind] - indv1 = np.where(z1 >= 0.) - indv2 = np.where(z1 < 0.) + indv1 = np.where(z1 >= 0.0) + indv2 = np.where(z1 < 0.0) if indv1[0].shape > 0: - upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real)) + upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real)) if indv2[0].shape > 0: - nuac2 = nuc[ind[indv2[0]], index2[indv2[1]]]**2 - upsi[indv2] += np.exp(nuac2 - gamc[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\ - - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real)) - upsi[t1[:, 0] == 0., :] = 0. + nuac2 = nuc[ind[indv2[0]], index2[indv2[1]]] ** 2 + upsi[indv2] += np.exp( + nuac2 - gamc[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0) + ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real)) + upsi[t1[:, 0] == 0.0, :] = 0.0 - kfu[ind2t] = c0[np.ix_(ind, index2)]*upsi + kfu[ind2t] = c0[np.ix_(ind, index2)] * upsi return kfu - #Gradient of Kuu wrt lengthscale + # Gradient of Kuu wrt lengthscale def _gkuu_lq(self, X, index): - t = X[:, 0].reshape(X.shape[0],) - index = index.reshape(X.shape[0],) - lq = self.lengthscale.values.reshape(self.rank,) - lq2 = lq*lq - #Covariance matrix initialization + t = X[:, 0].reshape( + X.shape[0], + ) + index = index.reshape( + X.shape[0], + ) + lq = self.lengthscale.values.reshape( + self.rank, + ) + lq2 = lq * lq + # Covariance matrix initialization glq = np.zeros((t.size, t.size)) - #Upper triangular indices + # Upper triangular indices indtri1, indtri2 = np.triu_indices(t.size, 1) - #Block Diagonal indices among Upper Triangular indices + # Block Diagonal indices among Upper Triangular indices ind = np.where(index[indtri1] == index[indtri2]) indr = indtri1[ind] indc = indtri2[ind] r = t[indr] - t[indc] - r2 = r*r - r2_lq2 = r2/lq2[index[indr]] - #Calculation of covariance function + r2 = r * r + r2_lq2 = r2 / lq2[index[indr]] + # Calculation of covariance function er2_lq2 = np.exp(-r2_lq2) - #Gradient wrt lq - c = 2.*r2_lq2/lq[index[indr]] - glq[indr, indc] = er2_lq2*c - #Complete the lower triangular + # Gradient wrt lq + c = 2.0 * r2_lq2 / lq[index[indr]] + glq[indr, indc] = er2_lq2 * c + # Complete the lower triangular glq[indc, indr] = glq[indr, indc] return glq - #Be careful this derivative should be transpose it - def _gkuu_X(self, X, index): #Diagonal terms are always zero - t = X[:, 0].reshape(X.shape[0],) - index = index.reshape(index.size,) - lq = self.lengthscale.values.reshape(self.rank,) - lq2 = lq*lq - #Covariance matrix initialization + # Be careful this derivative should be transpose it + def _gkuu_X(self, X, index): # Diagonal terms are always zero + t = X[:, 0].reshape( + X.shape[0], + ) + index = index.reshape( + index.size, + ) + lq = self.lengthscale.values.reshape( + self.rank, + ) + lq2 = lq * lq + # Covariance matrix initialization gt = np.zeros((t.size, t.size)) - #Upper triangular indices - indtri1, indtri2 = np.triu_indices(t.size, 1) #Offset of 1 from the diagonal - #Block Diagonal indices among Upper Triangular indices + # Upper triangular indices + indtri1, indtri2 = np.triu_indices(t.size, 1) # Offset of 1 from the diagonal + # Block Diagonal indices among Upper Triangular indices ind = np.where(index[indtri1] == index[indtri2]) indr = indtri1[ind] indc = indtri2[ind] r = t[indr] - t[indc] - r2 = r*r - r2_lq2 = r2/(-lq2[index[indr]]) - #Calculation of covariance function + r2 = r * r + r2_lq2 = r2 / (-lq2[index[indr]]) + # Calculation of covariance function er2_lq2 = np.exp(r2_lq2) - #Gradient wrt t - c = 2.*r/lq2[index[indr]] - gt[indr, indc] = er2_lq2*c - #Complete the lower triangular + # Gradient wrt t + c = 2.0 * r / lq2[index[indr]] + gt[indr, indc] = er2_lq2 * c + # Complete the lower triangular gt[indc, indr] = -gt[indr, indc] return gt - #Gradients for Diagonal Kff + # Gradients for Diagonal Kff def _gkdiag(self, X, index): - index = index.reshape(index.size,) - #terms that move along t + index = index.reshape( + index.size, + ) + # terms that move along t d = np.unique(index) B = self.B[d].values C = self.C[d].values S = self.W[d, :].values - #Index transformation + # Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] - #Check where wd becomes complex - wbool = C*C >= 4.*B - #Output related variables must be column-wise + # Check where wd becomes complex + wbool = C * C >= 4.0 * B + # Output related variables must be column-wise t = X[:, 0].reshape(X.shape[0], 1) B = B.reshape(B.size, 1) C = C.reshape(C.size, 1) - alpha = .5*C - C2 = C*C - S2 = S*S + alpha = 0.5 * C + C2 = C * C + S2 = S * S wbool2 = wbool[index] ind2t = np.where(wbool2) ind3t = np.where(np.logical_not(wbool2)) - #Input related variables must be row-wise + # Input related variables must be row-wise lq = self.lengthscale.values.reshape(1, self.rank) - lq2 = lq*lq + lq2 = lq * lq gB = np.empty((t.size, lq.size)) gC = np.empty((t.size, lq.size)) @@ -677,694 +752,851 @@ class EQ_ODE2(Kern): gS = np.empty((t.size, lq.size)) indD = np.arange(B.size) - #(1) When wd is real + # (1) When wd is real if np.any(np.logical_not(wbool)): - #Indexes of index and t related to (1) + # Indexes of index and t related to (1) t1 = t[ind3t] ind = index[ind3t] - #Index transformation + # Index transformation d = np.asarray(np.where(np.logical_not(wbool))[0]) indd = indD.copy() indd[d] = np.arange(d.size) ind = indd[ind] - #Dx1 terms - S2lq = S2[d]*(.5*lq) - c0 = S2lq*np.sqrt(np.pi) + # Dx1 terms + S2lq = S2[d] * (0.5 * lq) + c0 = S2lq * np.sqrt(np.pi) - w = .5*np.sqrt(4.*B[d] - C2[d]) + w = 0.5 * np.sqrt(4.0 * B[d] - C2[d]) alphad = alpha[d] - alpha2 = alphad*alphad - w2 = w*w - gam = alphad + 1j*w - gam2 = gam*gam - gamc = alphad - 1j*w - c1 = 0.5/alphad - c2 = 0.5/gam + alpha2 = alphad * alphad + w2 = w * w + gam = alphad + 1j * w + gam2 = gam * gam + gamc = alphad - 1j * w + c1 = 0.5 / alphad + c2 = 0.5 / gam c = c1 - c2 - #DxQ terms - c0 = c0/w2 - nu = (.5*lq)*gam - #Nx1 terms - gamt = -gam[ind]*t1 - gamct = -gamc[ind]*t1 + # DxQ terms + c0 = c0 / w2 + nu = (0.5 * lq) * gam + # Nx1 terms + gamt = -gam[ind] * t1 + gamct = -gamc[ind] * t1 egamt = np.exp(gamt) egamct = np.exp(gamct) - ec = egamt*c2[ind] - egamct*c1[ind] + ec = egamt * c2[ind] - egamct * c1[ind] - #NxQ terms - t_lq = t1/lq - t2_lq2 = -t_lq*t_lq - t_lq2 = t_lq/lq + # NxQ terms + t_lq = t1 / lq + t2_lq2 = -t_lq * t_lq + t_lq2 = t_lq / lq et2_lq2 = np.exp(t2_lq2) etlq2gamt = np.exp(t2_lq2 + gamt) ##Upsilon calculations - #Using wofz - wnu = wofz(1j*nu) + # Using wofz + wnu = wofz(1j * nu) lwnu = np.log(wnu) - t2_lq2 = -t_lq*t_lq - upm = wnu[ind] - np.exp(t2_lq2 + gamt + np.log(wofz(1j*(t_lq + nu[ind])))) - upm[t1[:, 0] == 0, :] = 0. + t2_lq2 = -t_lq * t_lq + upm = wnu[ind] - np.exp(t2_lq2 + gamt + np.log(wofz(1j * (t_lq + nu[ind])))) + upm[t1[:, 0] == 0, :] = 0.0 - nu2 = nu*nu + nu2 = nu * nu z1 = nu[ind] - t_lq - indv1 = np.where(z1.real >= 0.) - indv2 = np.where(z1.real < 0.) + indv1 = np.where(z1.real >= 0.0) + indv2 = np.where(z1.real < 0.0) upv = -np.exp(lwnu[ind] + gamt) if indv1[0].shape > 0: - upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j*z1[indv1]))) + upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j * z1[indv1]))) if indv2[0].shape > 0: - upv[indv2] += np.exp(nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.))\ - - np.exp(t2_lq2[indv2] + np.log(wofz(-1j*z1[indv2]))) - upv[t1[:, 0] == 0, :] = 0. + upv[indv2] += np.exp( + nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.0) + ) - np.exp(t2_lq2[indv2] + np.log(wofz(-1j * z1[indv2]))) + upv[t1[:, 0] == 0, :] = 0.0 - #Gradient wrt S - Slq = S[d]*lq #For grad wrt S - c0_S = Slq*np.sqrt(np.pi)/w2 - K01 = c0_S*c + # Gradient wrt S + Slq = S[d] * lq # For grad wrt S + c0_S = Slq * np.sqrt(np.pi) / w2 + K01 = c0_S * c - gS[ind3t] = np.real(K01[ind]*upm) + np.real((c0_S[ind]*ec)*upv) + gS[ind3t] = np.real(K01[ind] * upm) + np.real((c0_S[ind] * ec) * upv) - #For B and C - upmd = etlq2gamt - 1. + # For B and C + upmd = etlq2gamt - 1.0 upvd = egamt - et2_lq2 # gradient wrt B - dw_dB = 0.5/w - dgam_dB = 1j*dw_dB + dw_dB = 0.5 / w + dgam_dB = 1j * dw_dB - Ba1 = c0*(0.5*dgam_dB/gam2 + (0.5*lq2*gam*dgam_dB - 2.*dw_dB/w)*c) - Ba2_1 = c0*(dgam_dB*(0.5/gam2 - 0.25*lq2) + dw_dB/(w*gam)) - Ba2_2 = c0*dgam_dB/gam - Ba3 = c0*(-0.25*lq2*gam*dgam_dB/alphad + dw_dB/(w*alphad)) - Ba4_1 = (S2lq*lq)*dgam_dB/w2 - Ba4 = Ba4_1*c + Ba1 = c0 * ( + 0.5 * dgam_dB / gam2 + (0.5 * lq2 * gam * dgam_dB - 2.0 * dw_dB / w) * c + ) + Ba2_1 = c0 * (dgam_dB * (0.5 / gam2 - 0.25 * lq2) + dw_dB / (w * gam)) + Ba2_2 = c0 * dgam_dB / gam + Ba3 = c0 * (-0.25 * lq2 * gam * dgam_dB / alphad + dw_dB / (w * alphad)) + Ba4_1 = (S2lq * lq) * dgam_dB / w2 + Ba4 = Ba4_1 * c - gB[ind3t] = np.real(Ba1[ind]*upm) - np.real(((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv)\ - + np.real(Ba4[ind]*upmd) + np.real((Ba4_1[ind]*ec)*upvd) + gB[ind3t] = ( + np.real(Ba1[ind] * upm) + - np.real( + ((Ba2_1[ind] + Ba2_2[ind] * t1) * egamt - Ba3[ind] * egamct) * upv + ) + + np.real(Ba4[ind] * upmd) + + np.real((Ba4_1[ind] * ec) * upvd) + ) # gradient wrt C - dw_dC = - alphad*dw_dB - dgam_dC = 0.5 + 1j*dw_dC + dw_dC = -alphad * dw_dB + dgam_dC = 0.5 + 1j * dw_dC - Ca1 = c0*(-0.25/alpha2 + 0.5*dgam_dC/gam2 + (0.5*lq2*gam*dgam_dC - 2.*dw_dC/w)*c) - Ca2_1 = c0*(dgam_dC*(0.5/gam2 - 0.25*lq2) + dw_dC/(w*gam)) - Ca2_2 = c0*dgam_dC/gam - Ca3_1 = c0*(0.25/alpha2 - 0.25*lq2*gam*dgam_dC/alphad + dw_dC/(w*alphad)) - Ca3_2 = 0.5*c0/alphad - Ca4_1 = (S2lq*lq)*dgam_dC/w2 - Ca4 = Ca4_1*c + Ca1 = c0 * ( + -0.25 / alpha2 + + 0.5 * dgam_dC / gam2 + + (0.5 * lq2 * gam * dgam_dC - 2.0 * dw_dC / w) * c + ) + Ca2_1 = c0 * (dgam_dC * (0.5 / gam2 - 0.25 * lq2) + dw_dC / (w * gam)) + Ca2_2 = c0 * dgam_dC / gam + Ca3_1 = c0 * ( + 0.25 / alpha2 + - 0.25 * lq2 * gam * dgam_dC / alphad + + dw_dC / (w * alphad) + ) + Ca3_2 = 0.5 * c0 / alphad + Ca4_1 = (S2lq * lq) * dgam_dC / w2 + Ca4 = Ca4_1 * c - gC[ind3t] = np.real(Ca1[ind]*upm) - np.real(((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv)\ - + np.real(Ca4[ind]*upmd) + np.real((Ca4_1[ind]*ec)*upvd) + gC[ind3t] = ( + np.real(Ca1[ind] * upm) + - np.real( + ( + (Ca2_1[ind] + Ca2_2[ind] * t1) * egamt + - (Ca3_1[ind] + Ca3_2[ind] * t1) * egamct + ) + * upv + ) + + np.real(Ca4[ind] * upmd) + + np.real((Ca4_1[ind] * ec) * upvd) + ) - #Gradient wrt lengthscale - #DxQ terms - la = (1./lq + nu*gam)*c0 - la1 = la*c + # Gradient wrt lengthscale + # DxQ terms + la = (1.0 / lq + nu * gam) * c0 + la1 = la * c - c0l = (S2[d]/w2)*lq - la3 = c0l*c - gam_2 = .5*gam - glq[ind3t] = (la1[ind]*upm).real + ((la[ind]*ec)*upv).real\ - + (la3[ind]*(-gam_2[ind] + etlq2gamt*(-t_lq2 + gam_2[ind]))).real\ - + ((c0l[ind]*ec)*(-et2_lq2*(t_lq2 + gam_2[ind]) + egamt*gam_2[ind])).real + c0l = (S2[d] / w2) * lq + la3 = c0l * c + gam_2 = 0.5 * gam + glq[ind3t] = ( + (la1[ind] * upm).real + + ((la[ind] * ec) * upv).real + + (la3[ind] * (-gam_2[ind] + etlq2gamt * (-t_lq2 + gam_2[ind]))).real + + ( + (c0l[ind] * ec) + * (-et2_lq2 * (t_lq2 + gam_2[ind]) + egamt * gam_2[ind]) + ).real + ) - #(2) When w_d is complex + # (2) When w_d is complex if np.any(wbool): t1 = t[ind2t] ind = index[ind2t] - #Index transformation + # Index transformation d = np.asarray(np.where(wbool)[0]) indd = indD.copy() indd[d] = np.arange(d.size) ind = indd[ind] - #Dx1 terms - S2lq = S2[d]*(.25*lq) - c0 = S2lq*np.sqrt(np.pi) - w = .5*np.sqrt(C2[d]-4.*B[d]) - w2 = -w*w + # Dx1 terms + S2lq = S2[d] * (0.25 * lq) + c0 = S2lq * np.sqrt(np.pi) + w = 0.5 * np.sqrt(C2[d] - 4.0 * B[d]) + w2 = -w * w alphad = alpha[d] - alpha2 = alphad*alphad + alpha2 = alphad * alphad gam = alphad - w gamc = alphad + w - gam2 = gam*gam - gamc2 = gamc*gamc - c1 = .5/alphad - c21 = .5/gam - c22 = .5/gamc + gam2 = gam * gam + gamc2 = gamc * gamc + c1 = 0.5 / alphad + c21 = 0.5 / gam + c22 = 0.5 / gamc c = c1 - c21 c2 = c1 - c22 - #DxQ terms - c0 = c0/w2 - nu = .5*lq*gam - nuc = .5*lq*gamc + # DxQ terms + c0 = c0 / w2 + nu = 0.5 * lq * gam + nuc = 0.5 * lq * gamc - #Nx1 terms - gamt = -gam[ind]*t1 - gamct = -gamc[ind]*t1 + # Nx1 terms + gamt = -gam[ind] * t1 + gamct = -gamc[ind] * t1 egamt = np.exp(gamt) egamct = np.exp(gamct) - ec = egamt*c21[ind] - egamct*c1[ind] - ec2 = egamct*c22[ind] - egamt*c1[ind] - #NxQ terms - t_lq = t1/lq - t2_lq2 = -t_lq*t_lq + ec = egamt * c21[ind] - egamct * c1[ind] + ec2 = egamct * c22[ind] - egamt * c1[ind] + # NxQ terms + t_lq = t1 / lq + t2_lq2 = -t_lq * t_lq et2_lq2 = np.exp(t2_lq2) etlq2gamct = np.exp(t2_lq2 + gamct) etlq2gamt = np.exp(t2_lq2 + gamt) - #Upsilon Calculations using wofz - t2_lq2 = -t_lq*t_lq #Required when using wofz - wnu = np.real(wofz(1j*nu)) + # Upsilon Calculations using wofz + t2_lq2 = -t_lq * t_lq # Required when using wofz + wnu = np.real(wofz(1j * nu)) lwnu = np.log(wnu) - upm = wnu[ind] - np.exp(t2_lq2 + gamt + np.log(wofz(1j*(t_lq + nu[ind])).real)) - upm[t1[:, 0] == 0., :] = 0. + upm = wnu[ind] - np.exp( + t2_lq2 + gamt + np.log(wofz(1j * (t_lq + nu[ind])).real) + ) + upm[t1[:, 0] == 0.0, :] = 0.0 - nu2 = nu*nu + nu2 = nu * nu z1 = nu[ind] - t_lq - indv1 = np.where(z1 >= 0.) - indv2 = np.where(z1 < 0.) + indv1 = np.where(z1 >= 0.0) + indv2 = np.where(z1 < 0.0) upv = -np.exp(lwnu[ind] + gamt) if indv1[0].shape > 0: - upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real)) + upv[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real)) if indv2[0].shape > 0: - upv[indv2] += np.exp(nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.)) - np.exp(t2_lq2[indv2]\ - + np.log(wofz(-1j*z1[indv2]).real)) - upv[t1[:, 0] == 0, :] = 0. + upv[indv2] += np.exp( + nu2[ind[indv2[0]], indv2[1]] + gamt[indv2[0], 0] + np.log(2.0) + ) - np.exp(t2_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real)) + upv[t1[:, 0] == 0, :] = 0.0 - wnuc = wofz(1j*nuc).real - upmc = wnuc[ind] - np.exp(t2_lq2 + gamct + np.log(wofz(1j*(t_lq + nuc[ind])).real)) - upmc[t1[:, 0] == 0., :] = 0. + wnuc = wofz(1j * nuc).real + upmc = wnuc[ind] - np.exp( + t2_lq2 + gamct + np.log(wofz(1j * (t_lq + nuc[ind])).real) + ) + upmc[t1[:, 0] == 0.0, :] = 0.0 lwnuc = np.log(wnuc) - nuc2 = nuc*nuc + nuc2 = nuc * nuc z1 = nuc[ind] - t_lq - indv1 = np.where(z1 >= 0.) - indv2 = np.where(z1 < 0.) + indv1 = np.where(z1 >= 0.0) + indv2 = np.where(z1 < 0.0) upvc = -np.exp(lwnuc[ind] + gamct) if indv1[0].shape > 0: - upvc[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real)) + upvc[indv1] += np.exp(t2_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real)) if indv2[0].shape > 0: - upvc[indv2] += np.exp(nuc2[ind[indv2[0]], indv2[1]] + gamct[indv2[0], 0] + np.log(2.)) - np.exp(t2_lq2[indv2]\ - + np.log(wofz(-1j*z1[indv2]).real)) - upvc[t1[:, 0] == 0, :] = 0. + upvc[indv2] += np.exp( + nuc2[ind[indv2[0]], indv2[1]] + gamct[indv2[0], 0] + np.log(2.0) + ) - np.exp(t2_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real)) + upvc[t1[:, 0] == 0, :] = 0.0 - #Gradient wrt S - #NxQ terms - c0_S = (S[d]/w2)*(lq*(np.sqrt(np.pi)*.5)) + # Gradient wrt S + # NxQ terms + c0_S = (S[d] / w2) * (lq * (np.sqrt(np.pi) * 0.5)) - K011 = c0_S*c - K012 = c0_S*c2 + K011 = c0_S * c + K012 = c0_S * c2 - gS[ind2t] = K011[ind]*upm + K012[ind]*upmc + (c0_S[ind]*ec)*upv + (c0_S[ind]*ec2)*upvc + gS[ind2t] = ( + K011[ind] * upm + + K012[ind] * upmc + + (c0_S[ind] * ec) * upv + + (c0_S[ind] * ec2) * upvc + ) - #Is required to cache this, C gradient also required them - upmd = -1. + etlq2gamt + # Is required to cache this, C gradient also required them + upmd = -1.0 + etlq2gamt upvd = -et2_lq2 + egamt - upmdc = -1. + etlq2gamct + upmdc = -1.0 + etlq2gamct upvdc = -et2_lq2 + egamct # Gradient wrt B - dgam_dB = 0.5/w + dgam_dB = 0.5 / w dgamc_dB = -dgam_dB - Ba1 = c0*(0.5*dgam_dB/gam2 + (0.5*lq2*gam*dgam_dB - 1./w2)*c) - Ba3 = c0*(-0.25*lq2*gam*dgam_dB/alphad + 0.5/(w2*alphad)) - Ba4_1 = (S2lq*lq)*dgam_dB/w2 - Ba4 = Ba4_1*c - Ba2_1 = c0*(dgam_dB*(0.5/gam2 - 0.25*lq2) + 0.5/(w2*gam)) - Ba2_2 = c0*dgam_dB/gam + Ba1 = c0 * ( + 0.5 * dgam_dB / gam2 + (0.5 * lq2 * gam * dgam_dB - 1.0 / w2) * c + ) + Ba3 = c0 * (-0.25 * lq2 * gam * dgam_dB / alphad + 0.5 / (w2 * alphad)) + Ba4_1 = (S2lq * lq) * dgam_dB / w2 + Ba4 = Ba4_1 * c + Ba2_1 = c0 * (dgam_dB * (0.5 / gam2 - 0.25 * lq2) + 0.5 / (w2 * gam)) + Ba2_2 = c0 * dgam_dB / gam - Ba1c = c0*(0.5*dgamc_dB/gamc2 + (0.5*lq2*gamc*dgamc_dB - 1./w2)*c2) - Ba3c = c0*(-0.25*lq2*gamc*dgamc_dB/alphad + 0.5/(w2*alphad)) - Ba4_1c = (S2lq*lq)*dgamc_dB/w2 - Ba4c = Ba4_1c*c2 - Ba2_1c = c0*(dgamc_dB*(0.5/gamc2 - 0.25*lq2) + 0.5/(w2*gamc)) - Ba2_2c = c0*dgamc_dB/gamc + Ba1c = c0 * ( + 0.5 * dgamc_dB / gamc2 + (0.5 * lq2 * gamc * dgamc_dB - 1.0 / w2) * c2 + ) + Ba3c = c0 * (-0.25 * lq2 * gamc * dgamc_dB / alphad + 0.5 / (w2 * alphad)) + Ba4_1c = (S2lq * lq) * dgamc_dB / w2 + Ba4c = Ba4_1c * c2 + Ba2_1c = c0 * (dgamc_dB * (0.5 / gamc2 - 0.25 * lq2) + 0.5 / (w2 * gamc)) + Ba2_2c = c0 * dgamc_dB / gamc - gB[ind2t] = Ba1[ind]*upm - ((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv\ - + Ba4[ind]*upmd + (Ba4_1[ind]*ec)*upvd\ - + Ba1c[ind]*upmc - ((Ba2_1c[ind] + Ba2_2c[ind]*t1)*egamct - Ba3c[ind]*egamt)*upvc\ - + Ba4c[ind]*upmdc + (Ba4_1c[ind]*ec2)*upvdc + gB[ind2t] = ( + Ba1[ind] * upm + - ((Ba2_1[ind] + Ba2_2[ind] * t1) * egamt - Ba3[ind] * egamct) * upv + + Ba4[ind] * upmd + + (Ba4_1[ind] * ec) * upvd + + Ba1c[ind] * upmc + - ((Ba2_1c[ind] + Ba2_2c[ind] * t1) * egamct - Ba3c[ind] * egamt) * upvc + + Ba4c[ind] * upmdc + + (Ba4_1c[ind] * ec2) * upvdc + ) ##Gradient wrt C - dw_dC = 0.5*alphad/w + dw_dC = 0.5 * alphad / w dgam_dC = 0.5 - dw_dC dgamc_dC = 0.5 + dw_dC - S2lq2 = S2lq*lq + S2lq2 = S2lq * lq - Ca1 = c0*(-0.25/alpha2 + 0.5*dgam_dC/gam2 + (0.5*lq2*gam*dgam_dC + alphad/w2)*c) - Ca2_1 = c0*(dgam_dC*(0.5/gam2 - 0.25*lq2) - 0.5*alphad/(w2*gam)) - Ca2_2 = c0*dgam_dC/gam - Ca3_1 = c0*(0.25/alpha2 - 0.25*lq2*gam*dgam_dC/alphad - 0.5/w2) - Ca3_2 = 0.5*c0/alphad - Ca4_1 = S2lq2*(dgam_dC/w2) - Ca4 = Ca4_1*c + Ca1 = c0 * ( + -0.25 / alpha2 + + 0.5 * dgam_dC / gam2 + + (0.5 * lq2 * gam * dgam_dC + alphad / w2) * c + ) + Ca2_1 = c0 * ( + dgam_dC * (0.5 / gam2 - 0.25 * lq2) - 0.5 * alphad / (w2 * gam) + ) + Ca2_2 = c0 * dgam_dC / gam + Ca3_1 = c0 * ( + 0.25 / alpha2 - 0.25 * lq2 * gam * dgam_dC / alphad - 0.5 / w2 + ) + Ca3_2 = 0.5 * c0 / alphad + Ca4_1 = S2lq2 * (dgam_dC / w2) + Ca4 = Ca4_1 * c - Ca1c = c0*(-0.25/alpha2 + 0.5*dgamc_dC/gamc2 + (0.5*lq2*gamc*dgamc_dC + alphad/w2)*c2) - Ca2_1c = c0*(dgamc_dC*(0.5/gamc2 - 0.25*lq2) - 0.5*alphad/(w2*gamc)) - Ca2_2c = c0*dgamc_dC/gamc - Ca3_1c = c0*(0.25/alpha2 - 0.25*lq2*gamc*dgamc_dC/alphad - 0.5/w2) - Ca3_2c = 0.5*c0/alphad - Ca4_1c = S2lq2*(dgamc_dC/w2) - Ca4c = Ca4_1c*c2 + Ca1c = c0 * ( + -0.25 / alpha2 + + 0.5 * dgamc_dC / gamc2 + + (0.5 * lq2 * gamc * dgamc_dC + alphad / w2) * c2 + ) + Ca2_1c = c0 * ( + dgamc_dC * (0.5 / gamc2 - 0.25 * lq2) - 0.5 * alphad / (w2 * gamc) + ) + Ca2_2c = c0 * dgamc_dC / gamc + Ca3_1c = c0 * ( + 0.25 / alpha2 - 0.25 * lq2 * gamc * dgamc_dC / alphad - 0.5 / w2 + ) + Ca3_2c = 0.5 * c0 / alphad + Ca4_1c = S2lq2 * (dgamc_dC / w2) + Ca4c = Ca4_1c * c2 - gC[ind2t] = Ca1[ind]*upm - ((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv\ - + Ca4[ind]*upmd + (Ca4_1[ind]*ec)*upvd\ - + Ca1c[ind]*upmc - ((Ca2_1c[ind] + Ca2_2c[ind]*t1)*egamct - (Ca3_1c[ind] + Ca3_2c[ind]*t1)*egamt)*upvc\ - + Ca4c[ind]*upmdc + (Ca4_1c[ind]*ec2)*upvdc + gC[ind2t] = ( + Ca1[ind] * upm + - ( + (Ca2_1[ind] + Ca2_2[ind] * t1) * egamt + - (Ca3_1[ind] + Ca3_2[ind] * t1) * egamct + ) + * upv + + Ca4[ind] * upmd + + (Ca4_1[ind] * ec) * upvd + + Ca1c[ind] * upmc + - ( + (Ca2_1c[ind] + Ca2_2c[ind] * t1) * egamct + - (Ca3_1c[ind] + Ca3_2c[ind] * t1) * egamt + ) + * upvc + + Ca4c[ind] * upmdc + + (Ca4_1c[ind] * ec2) * upvdc + ) - #Gradient wrt lengthscale - #DxQ terms - la = (1./lq + nu*gam)*c0 - lac = (1./lq + nuc*gamc)*c0 - la1 = la*c - la1c = lac*c2 - t_lq2 = t_lq/lq - c0l = (S2[d]/w2)*(.5*lq) - la3 = c0l*c - la3c = c0l*c2 - gam_2 = .5*gam - gamc_2 = .5*gamc - glq[ind2t] = la1c[ind]*upmc + (lac[ind]*ec2)*upvc\ - + la3c[ind]*(-gamc_2[ind] + etlq2gamct*(-t_lq2 + gamc_2[ind]))\ - + (c0l[ind]*ec2)*(-et2_lq2*(t_lq2 + gamc_2[ind]) + egamct*gamc_2[ind])\ - + la1[ind]*upm + (la[ind]*ec)*upv\ - + la3[ind]*(-gam_2[ind] + etlq2gamt*(-t_lq2 + gam_2[ind]))\ - + (c0l[ind]*ec)*(-et2_lq2*(t_lq2 + gam_2[ind]) + egamt*gam_2[ind]) + # Gradient wrt lengthscale + # DxQ terms + la = (1.0 / lq + nu * gam) * c0 + lac = (1.0 / lq + nuc * gamc) * c0 + la1 = la * c + la1c = lac * c2 + t_lq2 = t_lq / lq + c0l = (S2[d] / w2) * (0.5 * lq) + la3 = c0l * c + la3c = c0l * c2 + gam_2 = 0.5 * gam + gamc_2 = 0.5 * gamc + glq[ind2t] = ( + la1c[ind] * upmc + + (lac[ind] * ec2) * upvc + + la3c[ind] * (-gamc_2[ind] + etlq2gamct * (-t_lq2 + gamc_2[ind])) + + (c0l[ind] * ec2) + * (-et2_lq2 * (t_lq2 + gamc_2[ind]) + egamct * gamc_2[ind]) + + la1[ind] * upm + + (la[ind] * ec) * upv + + la3[ind] * (-gam_2[ind] + etlq2gamt * (-t_lq2 + gam_2[ind])) + + (c0l[ind] * ec) + * (-et2_lq2 * (t_lq2 + gam_2[ind]) + egamt * gam_2[ind]) + ) return glq, gS, gB, gC def _gkfu(self, X, index, Z, index2): - index = index.reshape(index.size,) - #TODO: reduce memory usage - #terms that move along t + index = index.reshape( + index.size, + ) + # TODO: reduce memory usage + # terms that move along t d = np.unique(index) B = self.B[d].values C = self.C[d].values S = self.W[d, :].values - #Index transformation + # Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] - #Check where wd becomes complex - wbool = C*C >= 4.*B - #t column + # Check where wd becomes complex + wbool = C * C >= 4.0 * B + # t column t = X[:, 0].reshape(X.shape[0], 1) C = C.reshape(C.size, 1) B = B.reshape(B.size, 1) - C2 = C*C - #z row + C2 = C * C + # z row z = Z[:, 0].reshape(1, Z.shape[0]) - index2 = index2.reshape(index2.size,) + index2 = index2.reshape( + index2.size, + ) lq = self.lengthscale.values.reshape((1, self.rank)) - lq2 = lq*lq + lq2 = lq * lq - alpha = .5*C + alpha = 0.5 * C wbool2 = wbool[index] ind2t = np.where(wbool2) ind3t = np.where(np.logical_not(wbool2)) - #kfu = np.empty((t.size, z.size)) + # kfu = np.empty((t.size, z.size)) glq = np.empty((t.size, z.size)) gSdq = np.empty((t.size, z.size)) gB = np.empty((t.size, z.size)) gC = np.empty((t.size, z.size)) indD = np.arange(B.size) - #(1) when wd is real + # (1) when wd is real if np.any(np.logical_not(wbool)): - #Indexes of index and t related to (2) + # Indexes of index and t related to (2) t1 = t[ind3t] ind = index[ind3t] - #Index transformation + # Index transformation d = np.asarray(np.where(np.logical_not(wbool))[0]) indd = indD.copy() indd[d] = np.arange(d.size) ind = indd[ind] - #Dx1 terms - w = .5*np.sqrt(4.*B[d] - C2[d]) + # Dx1 terms + w = 0.5 * np.sqrt(4.0 * B[d] - C2[d]) alphad = alpha[d] - gam = alphad - 1j*w - gam_2 = .5*gam - S_w = S[d]/w - S_wpi = S_w*(.5*np.sqrt(np.pi)) - #DxQ terms - c0 = S_wpi*lq #lq*Sdq*sqrt(pi)/(2w) - nu = gam*lq - nu2 = 1.+.5*(nu*nu) - nu *= .5 + gam = alphad - 1j * w + gam_2 = 0.5 * gam + S_w = S[d] / w + S_wpi = S_w * (0.5 * np.sqrt(np.pi)) + # DxQ terms + c0 = S_wpi * lq # lq*Sdq*sqrt(pi)/(2w) + nu = gam * lq + nu2 = 1.0 + 0.5 * (nu * nu) + nu *= 0.5 - #1xM terms - z_lq = z/lq[0, index2] - z_lq2 = -z_lq*z_lq - #NxQ terms - t_lq = t1/lq - #DxM terms - gamt = -gam[ind]*t1 - #NxM terms + # 1xM terms + z_lq = z / lq[0, index2] + z_lq2 = -z_lq * z_lq + # NxQ terms + t_lq = t1 / lq + # DxM terms + gamt = -gam[ind] * t1 + # NxM terms zt_lq = z_lq - t_lq[:, index2] - zt_lq2 = -zt_lq*zt_lq + zt_lq2 = -zt_lq * zt_lq ezt_lq2 = -np.exp(zt_lq2) ezgamt = np.exp(z_lq2 + gamt) # Upsilon calculations fullind = np.ix_(ind, index2) - upsi = - np.exp(z_lq2 + gamt + np.log(wofz(1j*(z_lq + nu[fullind])))) - tz = t1-z + upsi = -np.exp(z_lq2 + gamt + np.log(wofz(1j * (z_lq + nu[fullind])))) + tz = t1 - z z1 = zt_lq + nu[fullind] - indv1 = np.where(z1.real >= 0.) - indv2 = np.where(z1.real < 0.) + indv1 = np.where(z1.real >= 0.0) + indv2 = np.where(z1.real < 0.0) if indv1[0].shape > 0: - upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]))) + upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]))) if indv2[0].shape > 0: - nua2 = nu[ind[indv2[0]], index2[indv2[1]]]**2 - upsi[indv2] += np.exp(nua2 - gam[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\ - - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]))) - upsi[t1[:, 0] == 0., :] = 0. + nua2 = nu[ind[indv2[0]], index2[indv2[1]]] ** 2 + upsi[indv2] += np.exp( + nua2 - gam[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0) + ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]))) + upsi[t1[:, 0] == 0.0, :] = 0.0 - #Gradient wrt S - #DxQ term - Sa1 = lq*(.5*np.sqrt(np.pi))/w + # Gradient wrt S + # DxQ term + Sa1 = lq * (0.5 * np.sqrt(np.pi)) / w - gSdq[ind3t] = Sa1[np.ix_(ind, index2)]*upsi.imag + gSdq[ind3t] = Sa1[np.ix_(ind, index2)] * upsi.imag - #Gradient wrt lq - la1 = S_wpi*nu2 - la2 = S_w*lq - uplq = ezt_lq2*(gam_2[ind]) - uplq += ezgamt*(-z_lq/lq[0, index2] + gam_2[ind]) + # Gradient wrt lq + la1 = S_wpi * nu2 + la2 = S_w * lq + uplq = ezt_lq2 * (gam_2[ind]) + uplq += ezgamt * (-z_lq / lq[0, index2] + gam_2[ind]) - glq[ind3t] = (la1[np.ix_(ind, index2)]*upsi).imag - glq[ind3t] += la2[np.ix_(ind, index2)]*uplq.imag + glq[ind3t] = (la1[np.ix_(ind, index2)] * upsi).imag + glq[ind3t] += la2[np.ix_(ind, index2)] * uplq.imag - #Gradient wrt B - #Dx1 terms - dw_dB = .5/w - dgam_dB = -1j*dw_dB - #DxQ terms - Ba1 = -c0*dw_dB/w #DXQ - Ba2 = c0*dgam_dB #DxQ - Ba3 = lq2*gam_2 #DxQ - Ba4 = (dgam_dB*S_w)*(.5*lq2) #DxQ + # Gradient wrt B + # Dx1 terms + dw_dB = 0.5 / w + dgam_dB = -1j * dw_dB + # DxQ terms + Ba1 = -c0 * dw_dB / w # DXQ + Ba2 = c0 * dgam_dB # DxQ + Ba3 = lq2 * gam_2 # DxQ + Ba4 = (dgam_dB * S_w) * (0.5 * lq2) # DxQ - gB[ind3t] = ((Ba1[np.ix_(ind, index2)] + Ba2[np.ix_(ind, index2)]*(Ba3[np.ix_(ind, index2)] - (t1-z)))*upsi).imag\ - + (Ba4[np.ix_(ind, index2)]*(ezt_lq2 + ezgamt)).imag + gB[ind3t] = ( + ( + Ba1[np.ix_(ind, index2)] + + Ba2[np.ix_(ind, index2)] * (Ba3[np.ix_(ind, index2)] - (t1 - z)) + ) + * upsi + ).imag + (Ba4[np.ix_(ind, index2)] * (ezt_lq2 + ezgamt)).imag - #Gradient wrt C (it uses some calculations performed in B) - #Dx1 terms - dw_dC = -.5*alphad/w - dgam_dC = 0.5 - 1j*dw_dC - #DxQ terms - Ca1 = -c0*dw_dC/w #DXQ - Ca2 = c0*dgam_dC #DxQ - Ca4 = (dgam_dC*S_w)*(.5*lq2) #DxQ + # Gradient wrt C (it uses some calculations performed in B) + # Dx1 terms + dw_dC = -0.5 * alphad / w + dgam_dC = 0.5 - 1j * dw_dC + # DxQ terms + Ca1 = -c0 * dw_dC / w # DXQ + Ca2 = c0 * dgam_dC # DxQ + Ca4 = (dgam_dC * S_w) * (0.5 * lq2) # DxQ - gC[ind3t] = ((Ca1[np.ix_(ind, index2)] + Ca2[np.ix_(ind, index2)]*(Ba3[np.ix_(ind, index2)] - (t1-z)))*upsi).imag\ - + (Ca4[np.ix_(ind, index2)]*(ezt_lq2 + ezgamt)).imag + gC[ind3t] = ( + ( + Ca1[np.ix_(ind, index2)] + + Ca2[np.ix_(ind, index2)] * (Ba3[np.ix_(ind, index2)] - (t1 - z)) + ) + * upsi + ).imag + (Ca4[np.ix_(ind, index2)] * (ezt_lq2 + ezgamt)).imag - #(2) when wd is complex + # (2) when wd is complex if np.any(wbool): - #Indexes of index and t related to (2) + # Indexes of index and t related to (2) t1 = t[ind2t] ind = index[ind2t] - #Index transformation + # Index transformation d = np.asarray(np.where(wbool)[0]) indd = indD.copy() indd[d] = np.arange(d.size) ind = indd[ind] - #Dx1 terms - w = .5*np.sqrt(C2[d] - 4.*B[d]) - w2 = w*w + # Dx1 terms + w = 0.5 * np.sqrt(C2[d] - 4.0 * B[d]) + w2 = w * w alphad = alpha[d] gam = alphad - w gamc = alphad + w - #DxQ terms - S_w= -S[d]/w #minus is given by j*j - S_wpi = S_w*(.25*np.sqrt(np.pi)) + # DxQ terms + S_w = -S[d] / w # minus is given by j*j + S_wpi = S_w * (0.25 * np.sqrt(np.pi)) - c0 = S_wpi*lq - gam_2 = .5*gam - gamc_2 = .5*gamc - nu = gam*lq - nuc = gamc*lq - nu2 = 1.+.5*(nu*nu) - nuc2 = 1.+.5*(nuc*nuc) - nu *= .5 - nuc *= .5 - #1xM terms - z_lq = z/lq[0, index2] - z_lq2 = -z_lq*z_lq - #Nx1 - gamt = -gam[ind]*t1 - gamct = -gamc[ind]*t1 - #NxQ terms - t_lq = t1/lq[0, index2] - #NxM terms + c0 = S_wpi * lq + gam_2 = 0.5 * gam + gamc_2 = 0.5 * gamc + nu = gam * lq + nuc = gamc * lq + nu2 = 1.0 + 0.5 * (nu * nu) + nuc2 = 1.0 + 0.5 * (nuc * nuc) + nu *= 0.5 + nuc *= 0.5 + # 1xM terms + z_lq = z / lq[0, index2] + z_lq2 = -z_lq * z_lq + # Nx1 + gamt = -gam[ind] * t1 + gamct = -gamc[ind] * t1 + # NxQ terms + t_lq = t1 / lq[0, index2] + # NxM terms zt_lq = z_lq - t_lq - zt_lq2 = -zt_lq*zt_lq + zt_lq2 = -zt_lq * zt_lq ezt_lq2 = -np.exp(zt_lq2) ezgamt = np.exp(z_lq2 + gamt) ezgamct = np.exp(z_lq2 + gamct) # Upsilon calculations fullind = np.ix_(ind, index2) - upsi1 = - np.exp(z_lq2 + gamct + np.log(wofz(1j*(z_lq + nuc[fullind])).real)) - tz = t1-z + upsi1 = -np.exp( + z_lq2 + gamct + np.log(wofz(1j * (z_lq + nuc[fullind])).real) + ) + tz = t1 - z z1 = zt_lq + nuc[fullind] - indv1 = np.where(z1 >= 0.) - indv2 = np.where(z1 < 0.) + indv1 = np.where(z1 >= 0.0) + indv2 = np.where(z1 < 0.0) if indv1[0].shape > 0: - upsi1[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real)) + upsi1[indv1] += np.exp( + zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real) + ) if indv2[0].shape > 0: - nuac2 = nuc[ind[indv2[0]], index2[indv2[1]]]**2 - upsi1[indv2] += np.exp(nuac2 - gamc[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\ - - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real)) - upsi1[t1[:, 0] == 0., :] = 0. + nuac2 = nuc[ind[indv2[0]], index2[indv2[1]]] ** 2 + upsi1[indv2] += np.exp( + nuac2 - gamc[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0) + ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real)) + upsi1[t1[:, 0] == 0.0, :] = 0.0 - upsi2 = - np.exp(z_lq2 + gamt + np.log(wofz(1j*(z_lq + nu[fullind])).real)) + upsi2 = -np.exp(z_lq2 + gamt + np.log(wofz(1j * (z_lq + nu[fullind])).real)) z1 = zt_lq + nu[fullind] - indv1 = np.where(z1 >= 0.) - indv2 = np.where(z1 < 0.) + indv1 = np.where(z1 >= 0.0) + indv2 = np.where(z1 < 0.0) if indv1[0].shape > 0: - upsi2[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real)) + upsi2[indv1] += np.exp( + zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real) + ) if indv2[0].shape > 0: - nua2 = nu[ind[indv2[0]], index2[indv2[1]]]**2 - upsi2[indv2] += np.exp(nua2 - gam[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\ - - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real)) - upsi2[t1[:, 0] == 0., :] = 0. + nua2 = nu[ind[indv2[0]], index2[indv2[1]]] ** 2 + upsi2[indv2] += np.exp( + nua2 - gam[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0) + ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real)) + upsi2[t1[:, 0] == 0.0, :] = 0.0 - #Gradient wrt lq - la1 = S_wpi*nu2 - la1c = S_wpi*nuc2 - la2 = S_w*(.5*lq) - uplq = ezt_lq2*(gamc_2[ind]) + ezgamct*(-z_lq/lq[0, index2] + gamc_2[ind])\ - - ezt_lq2*(gam_2[ind]) - ezgamt*(-z_lq/lq[0, index2] + gam_2[ind]) + # Gradient wrt lq + la1 = S_wpi * nu2 + la1c = S_wpi * nuc2 + la2 = S_w * (0.5 * lq) + uplq = ( + ezt_lq2 * (gamc_2[ind]) + + ezgamct * (-z_lq / lq[0, index2] + gamc_2[ind]) + - ezt_lq2 * (gam_2[ind]) + - ezgamt * (-z_lq / lq[0, index2] + gam_2[ind]) + ) - glq[ind2t] = la1c[np.ix_(ind, index2)]*upsi1 - la1[np.ix_(ind, index2)]*upsi2\ - + la2[np.ix_(ind, index2)]*uplq + glq[ind2t] = ( + la1c[np.ix_(ind, index2)] * upsi1 + - la1[np.ix_(ind, index2)] * upsi2 + + la2[np.ix_(ind, index2)] * uplq + ) + # Gradient wrt S + Sa1 = (lq * (-0.25 * np.sqrt(np.pi))) / w - #Gradient wrt S - Sa1 = (lq*(-.25*np.sqrt(np.pi)))/w + gSdq[ind2t] = Sa1[np.ix_(ind, index2)] * (upsi1 - upsi2) - gSdq[ind2t] = Sa1[np.ix_(ind, index2)]*(upsi1 - upsi2) - - #Gradient wrt B - #Dx1 terms - dgam_dB = .5/w + # Gradient wrt B + # Dx1 terms + dgam_dB = 0.5 / w dgamc_dB = -dgam_dB - #DxQ terms - Ba1 = .5*(c0/w2) - Ba2 = c0*dgam_dB - Ba3 = lq2*gam_2 - Ba4 = (dgam_dB*S_w)*(.25*lq2) + # DxQ terms + Ba1 = 0.5 * (c0 / w2) + Ba2 = c0 * dgam_dB + Ba3 = lq2 * gam_2 + Ba4 = (dgam_dB * S_w) * (0.25 * lq2) - Ba2c = c0*dgamc_dB - Ba3c = lq2*gamc_2 - Ba4c = (dgamc_dB*S_w)*(.25*lq2) + Ba2c = c0 * dgamc_dB + Ba3c = lq2 * gamc_2 + Ba4c = (dgamc_dB * S_w) * (0.25 * lq2) - gB[ind2t] = (Ba1[np.ix_(ind, index2)] + Ba2c[np.ix_(ind, index2)]*(Ba3c[np.ix_(ind, index2)] - (t1-z)))*upsi1\ - + Ba4c[np.ix_(ind, index2)]*(ezt_lq2 + ezgamct)\ - - (Ba1[np.ix_(ind, index2)] + Ba2[np.ix_(ind, index2)]*(Ba3[np.ix_(ind, index2)] - (t1-z)))*upsi2\ - - Ba4[np.ix_(ind, index2)]*(ezt_lq2 + ezgamt) + gB[ind2t] = ( + ( + Ba1[np.ix_(ind, index2)] + + Ba2c[np.ix_(ind, index2)] * (Ba3c[np.ix_(ind, index2)] - (t1 - z)) + ) + * upsi1 + + Ba4c[np.ix_(ind, index2)] * (ezt_lq2 + ezgamct) + - ( + Ba1[np.ix_(ind, index2)] + + Ba2[np.ix_(ind, index2)] * (Ba3[np.ix_(ind, index2)] - (t1 - z)) + ) + * upsi2 + - Ba4[np.ix_(ind, index2)] * (ezt_lq2 + ezgamt) + ) - #Gradient wrt C - #Dx1 terms - dgam_dC = 0.5 - .5*(alphad/w) - dgamc_dC = 0.5 + .5*(alphad/w) - #DxQ terms - Ca1 = -c0*(.5*alphad/w2) - Ca2 = c0*dgam_dC - Ca4 = (dgam_dC*S_w)*(.25*lq2) + # Gradient wrt C + # Dx1 terms + dgam_dC = 0.5 - 0.5 * (alphad / w) + dgamc_dC = 0.5 + 0.5 * (alphad / w) + # DxQ terms + Ca1 = -c0 * (0.5 * alphad / w2) + Ca2 = c0 * dgam_dC + Ca4 = (dgam_dC * S_w) * (0.25 * lq2) - Ca2c = c0*dgamc_dC - Ca4c = (dgamc_dC*S_w)*(.25*lq2) + Ca2c = c0 * dgamc_dC + Ca4c = (dgamc_dC * S_w) * (0.25 * lq2) - gC[ind2t] = (Ca1[np.ix_(ind, index2)] + Ca2c[np.ix_(ind, index2)]*(Ba3c[np.ix_(ind, index2)] - (t1-z)))*upsi1\ - + Ca4c[np.ix_(ind, index2)]*(ezt_lq2 + ezgamct)\ - - (Ca1[np.ix_(ind, index2)] + Ca2[np.ix_(ind, index2)]*(Ba3[np.ix_(ind, index2)] - (t1-z)))*upsi2\ - - Ca4[np.ix_(ind, index2)]*(ezt_lq2 + ezgamt) + gC[ind2t] = ( + ( + Ca1[np.ix_(ind, index2)] + + Ca2c[np.ix_(ind, index2)] * (Ba3c[np.ix_(ind, index2)] - (t1 - z)) + ) + * upsi1 + + Ca4c[np.ix_(ind, index2)] * (ezt_lq2 + ezgamct) + - ( + Ca1[np.ix_(ind, index2)] + + Ca2[np.ix_(ind, index2)] * (Ba3[np.ix_(ind, index2)] - (t1 - z)) + ) + * upsi2 + - Ca4[np.ix_(ind, index2)] * (ezt_lq2 + ezgamt) + ) return glq, gSdq, gB, gC - #TODO: reduce memory usage - def _gkfu_z(self, X, index, Z, index2): #Kfu(t,z) - index = index.reshape(index.size,) - #terms that move along t + # TODO: reduce memory usage + def _gkfu_z(self, X, index, Z, index2): # Kfu(t,z) + index = index.reshape( + index.size, + ) + # terms that move along t d = np.unique(index) B = self.B[d].values C = self.C[d].values S = self.W[d, :].values - #Index transformation + # Index transformation indd = np.arange(self.output_dim) indd[d] = np.arange(d.size) index = indd[index] - #Check where wd becomes complex - wbool = C*C >= 4.*B + # Check where wd becomes complex + wbool = C * C >= 4.0 * B wbool2 = wbool[index] ind2t = np.where(wbool2) ind3t = np.where(np.logical_not(wbool2)) - #t column + # t column t = X[:, 0].reshape(X.shape[0], 1) C = C.reshape(C.size, 1) B = B.reshape(B.size, 1) - C2 = C*C - alpha = .5*C - #z row + C2 = C * C + alpha = 0.5 * C + # z row z = Z[:, 0].reshape(1, Z.shape[0]) - index2 = index2.reshape(index2.size,) + index2 = index2.reshape( + index2.size, + ) lq = self.lengthscale.values.reshape((1, self.rank)) - #kfu = np.empty((t.size, z.size)) + # kfu = np.empty((t.size, z.size)) gz = np.empty((t.size, z.size)) indD = np.arange(B.size) - #(1) when wd is real + # (1) when wd is real if np.any(np.logical_not(wbool)): - #Indexes of index and t related to (2) + # Indexes of index and t related to (2) t1 = t[ind3t] ind = index[ind3t] - #TODO: Find a better way of doing this - #Index transformation + # TODO: Find a better way of doing this + # Index transformation d = np.asarray(np.where(np.logical_not(wbool))[0]) indd = indD.copy() indd[d] = np.arange(d.size) ind = indd[ind] - #Dx1 terms - w = .5*np.sqrt(4.*B[d] - C2[d]) + # Dx1 terms + w = 0.5 * np.sqrt(4.0 * B[d] - C2[d]) alphad = alpha[d] - gam = alphad - 1j*w - S_w = S[d]/w - S_wpi =S_w*(.5*np.sqrt(np.pi)) - #DxQ terms - c0 = S_wpi*lq #lq*Sdq*sqrt(pi)/(2w) - nu = (.5*gam)*lq + gam = alphad - 1j * w + S_w = S[d] / w + S_wpi = S_w * (0.5 * np.sqrt(np.pi)) + # DxQ terms + c0 = S_wpi * lq # lq*Sdq*sqrt(pi)/(2w) + nu = (0.5 * gam) * lq - #1xM terms - z_lq = z/lq[0, index2] - z_lq2 = -z_lq*z_lq - #NxQ terms - t_lq = t1/lq - #DxM terms - gamt = -gam[ind]*t1 - #NxM terms + # 1xM terms + z_lq = z / lq[0, index2] + z_lq2 = -z_lq * z_lq + # NxQ terms + t_lq = t1 / lq + # DxM terms + gamt = -gam[ind] * t1 + # NxM terms zt_lq = z_lq - t_lq[:, index2] - zt_lq2 = -zt_lq*zt_lq - #ezt_lq2 = -np.exp(zt_lq2) + zt_lq2 = -zt_lq * zt_lq + # ezt_lq2 = -np.exp(zt_lq2) ezgamt = np.exp(z_lq2 + gamt) # Upsilon calculations fullind = np.ix_(ind, index2) - upsi = - np.exp(z_lq2 + gamt + np.log(wofz(1j*(z_lq + nu[fullind])))) - tz = t1-z + upsi = -np.exp(z_lq2 + gamt + np.log(wofz(1j * (z_lq + nu[fullind])))) + tz = t1 - z z1 = zt_lq + nu[fullind] - indv1 = np.where(z1.real >= 0.) - indv2 = np.where(z1.real < 0.) + indv1 = np.where(z1.real >= 0.0) + indv2 = np.where(z1.real < 0.0) if indv1[0].shape > 0: - upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]))) + upsi[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]))) if indv2[0].shape > 0: - nua2 = nu[ind[indv2[0]], index2[indv2[1]]]**2 - upsi[indv2] += np.exp(nua2 - gam[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\ - - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]))) - upsi[t1[:, 0] == 0., :] = 0. + nua2 = nu[ind[indv2[0]], index2[indv2[1]]] ** 2 + upsi[indv2] += np.exp( + nua2 - gam[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0) + ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]))) + upsi[t1[:, 0] == 0.0, :] = 0.0 - #Gradient wrt z - za1 = c0*gam - #za2 = S_w - gz[ind3t] = (za1[np.ix_(ind, index2)]*upsi).imag + S_w[np.ix_(ind, index2)]*ezgamt.imag + # Gradient wrt z + za1 = c0 * gam + # za2 = S_w + gz[ind3t] = (za1[np.ix_(ind, index2)] * upsi).imag + S_w[ + np.ix_(ind, index2) + ] * ezgamt.imag - #(2) when wd is complex + # (2) when wd is complex if np.any(wbool): - #Indexes of index and t related to (2) + # Indexes of index and t related to (2) t1 = t[ind2t] ind = index[ind2t] - #Index transformation + # Index transformation d = np.asarray(np.where(wbool)[0]) indd = indD.copy() indd[d] = np.arange(d.size) ind = indd[ind] - #Dx1 terms - w = .5*np.sqrt(C2[d] - 4.*B[d]) + # Dx1 terms + w = 0.5 * np.sqrt(C2[d] - 4.0 * B[d]) alphad = alpha[d] gam = alphad - w gamc = alphad + w - #DxQ terms - S_w = -S[d]/w #minus is given by j*j - S_wpi = S_w*(.25*np.sqrt(np.pi)) - c0 = S_wpi*lq - nu = .5*gam*lq - nuc = .5*gamc*lq + # DxQ terms + S_w = -S[d] / w # minus is given by j*j + S_wpi = S_w * (0.25 * np.sqrt(np.pi)) + c0 = S_wpi * lq + nu = 0.5 * gam * lq + nuc = 0.5 * gamc * lq - #1xM terms - z_lq = z/lq[0, index2] - z_lq2 = -z_lq*z_lq - #Nx1 - gamt = -gam[ind]*t1 - gamct = -gamc[ind]*t1 - #NxQ terms - t_lq = t1/lq - #NxM terms + # 1xM terms + z_lq = z / lq[0, index2] + z_lq2 = -z_lq * z_lq + # Nx1 + gamt = -gam[ind] * t1 + gamct = -gamc[ind] * t1 + # NxQ terms + t_lq = t1 / lq + # NxM terms zt_lq = z_lq - t_lq[:, index2] ezgamt = np.exp(z_lq2 + gamt) ezgamct = np.exp(z_lq2 + gamct) # Upsilon calculations - zt_lq2 = -zt_lq*zt_lq + zt_lq2 = -zt_lq * zt_lq fullind = np.ix_(ind, index2) - upsi1 = - np.exp(z_lq2 + gamct + np.log(wofz(1j*(z_lq + nuc[fullind])).real)) - tz = t1-z + upsi1 = -np.exp( + z_lq2 + gamct + np.log(wofz(1j * (z_lq + nuc[fullind])).real) + ) + tz = t1 - z z1 = zt_lq + nuc[fullind] - indv1 = np.where(z1 >= 0.) - indv2 = np.where(z1 < 0.) + indv1 = np.where(z1 >= 0.0) + indv2 = np.where(z1 < 0.0) if indv1[0].shape > 0: - upsi1[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real)) + upsi1[indv1] += np.exp( + zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real) + ) if indv2[0].shape > 0: - nuac2 = nuc[ind[indv2[0]], index2[indv2[1]]]**2 - upsi1[indv2] += np.exp(nuac2 - gamc[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\ - - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real)) - upsi1[t1[:, 0] == 0., :] = 0. + nuac2 = nuc[ind[indv2[0]], index2[indv2[1]]] ** 2 + upsi1[indv2] += np.exp( + nuac2 - gamc[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0) + ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real)) + upsi1[t1[:, 0] == 0.0, :] = 0.0 - upsi2 = - np.exp(z_lq2 + gamt + np.log(wofz(1j*(z_lq + nu[fullind])).real)) + upsi2 = -np.exp(z_lq2 + gamt + np.log(wofz(1j * (z_lq + nu[fullind])).real)) z1 = zt_lq + nu[fullind] - indv1 = np.where(z1 >= 0.) - indv2 = np.where(z1 < 0.) + indv1 = np.where(z1 >= 0.0) + indv2 = np.where(z1 < 0.0) if indv1[0].shape > 0: - upsi2[indv1] += np.exp(zt_lq2[indv1] + np.log(wofz(1j*z1[indv1]).real)) + upsi2[indv1] += np.exp( + zt_lq2[indv1] + np.log(wofz(1j * z1[indv1]).real) + ) if indv2[0].shape > 0: - nua2 = nu[ind[indv2[0]], index2[indv2[1]]]**2 - upsi2[indv2] += np.exp(nua2 - gam[ind[indv2[0]], 0]*tz[indv2] + np.log(2.))\ - - np.exp(zt_lq2[indv2] + np.log(wofz(-1j*z1[indv2]).real)) - upsi2[t1[:, 0] == 0., :] = 0. + nua2 = nu[ind[indv2[0]], index2[indv2[1]]] ** 2 + upsi2[indv2] += np.exp( + nua2 - gam[ind[indv2[0]], 0] * tz[indv2] + np.log(2.0) + ) - np.exp(zt_lq2[indv2] + np.log(wofz(-1j * z1[indv2]).real)) + upsi2[t1[:, 0] == 0.0, :] = 0.0 - #Gradient wrt z - za1 = c0*gam - za1c = c0*gamc - za2 = .5*S_w - gz[ind2t] = za1c[np.ix_(ind, index2)]*upsi1 - za1[np.ix_(ind, index2)]*upsi2\ - + za2[np.ix_(ind, index2)]*(ezgamct - ezgamt) + # Gradient wrt z + za1 = c0 * gam + za1c = c0 * gamc + za2 = 0.5 * S_w + gz[ind2t] = ( + za1c[np.ix_(ind, index2)] * upsi1 + - za1[np.ix_(ind, index2)] * upsi2 + + za2[np.ix_(ind, index2)] * (ezgamct - ezgamt) + ) return gz diff --git a/GPy/kern/src/todo/eq_ode1.py b/GPy/kern/src/todo/eq_ode1.py index bf0ca7e4..7104a8e9 100644 --- a/GPy/kern/src/todo/eq_ode1.py +++ b/GPy/kern/src/todo/eq_ode1.py @@ -121,7 +121,7 @@ class Eq_ode1(Kernpart): target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat)) def Kdiag(self,index,target): - #target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()] + #target += np.diag(self.B)[np.asarray(index,dtype=int).flatten()] pass def _param_grad_helper(self,dL_dK,X,X2,target): @@ -203,7 +203,7 @@ class Eq_ode1(Kernpart): self._t = X[:, 0] if not X.shape[1] == 2: raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices') - self._index = np.asarray(X[:, 1],dtype=np.int) + self._index = np.asarray(X[:, 1],dtype=int) # Sort indices so that outputs are in blocks for computational # convenience. self._order = self._index.argsort() @@ -220,7 +220,7 @@ class Eq_ode1(Kernpart): if not X2.shape[1] == 2: raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices') self._t2 = X2[:, 0] - self._index2 = np.asarray(X2[:, 1],dtype=np.int) + self._index2 = np.asarray(X2[:, 1],dtype=int) self._order2 = self._index2.argsort() self._index2 = self._index2[self._order2] self._t2 = self._t2[self._order2] diff --git a/GPy/models/sparse_gp_coregionalized_regression.py b/GPy/models/sparse_gp_coregionalized_regression.py index 2a19d52c..43e782bf 100644 --- a/GPy/models/sparse_gp_coregionalized_regression.py +++ b/GPy/models/sparse_gp_coregionalized_regression.py @@ -7,6 +7,7 @@ from ..inference.latent_function_inference import VarDTC from .. import kern from .. import util + class SparseGPCoregionalizedRegression(SparseGP): """ Sparse Gaussian Process model for heteroscedastic multioutput regression @@ -34,34 +35,65 @@ class SparseGPCoregionalizedRegression(SparseGP): :type kernel_name: string """ - def __init__(self, X_list, Y_list, Z_list=[], kernel=None, likelihoods_list=None, num_inducing=10, X_variance=None, name='SGPCR',W_rank=1,kernel_name='coreg'): - - #Input and Output - X,Y,self.output_index = util.multioutput.build_XY(X_list,Y_list) + def __init__( + self, + X_list, + Y_list, + Z_list=[], + kernel=None, + likelihoods_list=None, + num_inducing=10, + X_variance=None, + name="SGPCR", + W_rank=1, + kernel_name="coreg", + ): + # Input and Output + X, Y, self.output_index = util.multioutput.build_XY(X_list, Y_list) Ny = len(Y_list) - #Kernel + # Kernel if kernel is None: - kernel = kern.RBF(X.shape[1]-1) - - kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=kernel, W_rank=W_rank, name=kernel_name) + kernel = kern.RBF(X.shape[1] - 1) - #Likelihood - likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list) + kernel = util.multioutput.ICM( + input_dim=X.shape[1] - 1, + num_outputs=Ny, + kernel=kernel, + W_rank=W_rank, + name=kernel_name, + ) - #Inducing inputs list + # Likelihood + likelihood = util.multioutput.build_likelihood( + Y_list, self.output_index, likelihoods_list + ) + + # Inducing inputs list if len(Z_list): - assert len(Z_list) == Ny, 'Number of outputs do not match length of inducing inputs list.' + assert ( + len(Z_list) == Ny + ), "Number of outputs do not match length of inducing inputs list." else: - if isinstance(num_inducing,np.int): + if isinstance(num_inducing, int): num_inducing = [num_inducing] * Ny num_inducing = np.asarray(num_inducing) - assert num_inducing.size == Ny, 'Number of outputs do not match length of inducing inputs list.' - for ni,Xi in zip(num_inducing,X_list): + assert ( + num_inducing.size == Ny + ), "Number of outputs do not match length of inducing inputs list." + for ni, Xi in zip(num_inducing, X_list): i = np.random.permutation(Xi.shape[0])[:ni] Z_list.append(Xi[i].copy()) Z, _, Iz = util.multioutput.build_XY(Z_list) - super(SparseGPCoregionalizedRegression, self).__init__(X, Y, Z, kernel, likelihood, inference_method=VarDTC(), Y_metadata={'output_index':self.output_index}) - self['.*inducing'][:,-1].fix() + super(SparseGPCoregionalizedRegression, self).__init__( + X, + Y, + Z, + kernel, + likelihood, + inference_method=VarDTC(), + Y_metadata={"output_index": self.output_index}, + ) + self[".*inducing"][:, -1].fix() diff --git a/GPy/models/ss_mrd.py b/GPy/models/ss_mrd.py index 0aa472c7..c4dbec78 100644 --- a/GPy/models/ss_mrd.py +++ b/GPy/models/ss_mrd.py @@ -5,52 +5,110 @@ The Maniforld Relevance Determination model with the spike-and-slab prior import numpy as np from ..core import Model from .ss_gplvm import SSGPLVM -from GPy.core.parameterization.variational import SpikeAndSlabPrior,NormalPosterior,VariationalPrior +from GPy.core.parameterization.variational import ( + SpikeAndSlabPrior, + NormalPosterior, + VariationalPrior, +) from ..util.misc import param_to_array from ..kern import RBF from ..core import Param from numpy.linalg.linalg import LinAlgError + class SSMRD(Model): - - def __init__(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx = 'PCA_concat', initz = 'permute', - num_inducing=10, Zs=None, kernels=None, inference_methods=None, likelihoods=None, group_spike=True, - pi=0.5, name='ss_mrd', Ynames=None, mpi_comm=None, IBP=False, alpha=2., taus=None, ): + def __init__( + self, + Ylist, + input_dim, + X=None, + X_variance=None, + Gammas=None, + initx="PCA_concat", + initz="permute", + num_inducing=10, + Zs=None, + kernels=None, + inference_methods=None, + likelihoods=None, + group_spike=True, + pi=0.5, + name="ss_mrd", + Ynames=None, + mpi_comm=None, + IBP=False, + alpha=2.0, + taus=None, + ): super(SSMRD, self).__init__(name) self.mpi_comm = mpi_comm self._PROPAGATE_ = False - + # initialize X for individual models - X, X_variance, Gammas, fracs = self._init_X(Ylist, input_dim, X, X_variance, Gammas, initx) + X, X_variance, Gammas, fracs = self._init_X( + Ylist, input_dim, X, X_variance, Gammas, initx + ) self.X = NormalPosterior(means=X, variances=X_variance) - + if kernels is None: - kernels = [RBF(input_dim, lengthscale=1./fracs, ARD=True) for i in range(len(Ylist))] + kernels = [ + RBF(input_dim, lengthscale=1.0 / fracs, ARD=True) + for i in range(len(Ylist)) + ] if Zs is None: - Zs = [None]* len(Ylist) + Zs = [None] * len(Ylist) if likelihoods is None: - likelihoods = [None]* len(Ylist) + likelihoods = [None] * len(Ylist) if inference_methods is None: - inference_methods = [None]* len(Ylist) - + inference_methods = [None] * len(Ylist) + if IBP: - self.var_priors = [IBPPrior_SSMRD(len(Ylist),input_dim,alpha=alpha) for i in range(len(Ylist))] + self.var_priors = [ + IBPPrior_SSMRD(len(Ylist), input_dim, alpha=alpha) + for i in range(len(Ylist)) + ] else: - self.var_priors = [SpikeAndSlabPrior_SSMRD(nModels=len(Ylist),pi=pi,learnPi=False, group_spike=group_spike) for i in range(len(Ylist))] - self.models = [SSGPLVM(y, input_dim, X=X.copy(), X_variance=X_variance.copy(), Gamma=Gammas[i], num_inducing=num_inducing,Z=Zs[i], learnPi=False, group_spike=group_spike, - kernel=kernels[i],inference_method=inference_methods[i],likelihood=likelihoods[i], variational_prior=self.var_priors[i], IBP=IBP, tau=None if taus is None else taus[i], - name='model_'+str(i), mpi_comm=mpi_comm, sharedX=True) for i,y in enumerate(Ylist)] - self.link_parameters(*(self.models+[self.X])) - + self.var_priors = [ + SpikeAndSlabPrior_SSMRD( + nModels=len(Ylist), pi=pi, learnPi=False, group_spike=group_spike + ) + for i in range(len(Ylist)) + ] + self.models = [ + SSGPLVM( + y, + input_dim, + X=X.copy(), + X_variance=X_variance.copy(), + Gamma=Gammas[i], + num_inducing=num_inducing, + Z=Zs[i], + learnPi=False, + group_spike=group_spike, + kernel=kernels[i], + inference_method=inference_methods[i], + likelihood=likelihoods[i], + variational_prior=self.var_priors[i], + IBP=IBP, + tau=None if taus is None else taus[i], + name="model_" + str(i), + mpi_comm=mpi_comm, + sharedX=True, + ) + for i, y in enumerate(Ylist) + ] + self.link_parameters(*(self.models + [self.X])) + def _propogate_X_val(self): - if self._PROPAGATE_: return + if self._PROPAGATE_: + return for m in self.models: m.X.mean.values[:] = self.X.mean.values m.X.variance.values[:] = self.X.variance.values varp_list = [m.X for m in self.models] [vp._update_inernal(varp_list) for vp in self.var_priors] - self._PROPAGATE_=True - + self._PROPAGATE_ = True + def _collate_X_gradient(self): self._PROPAGATE_ = False self.X.mean.gradient[:] = 0 @@ -58,86 +116,92 @@ class SSMRD(Model): for m in self.models: self.X.mean.gradient += m.X.mean.gradient self.X.variance.gradient += m.X.variance.gradient - + def parameters_changed(self): super(SSMRD, self).parameters_changed() [m.parameters_changed() for m in self.models] - self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models]) + self._log_marginal_likelihood = sum( + [m._log_marginal_likelihood for m in self.models] + ) self._collate_X_gradient() def log_likelihood(self): return self._log_marginal_likelihood - - def _init_X(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx='PCA_concat'): - + + def _init_X( + self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx="PCA_concat" + ): # Divide latent dimensions - idx = np.empty((input_dim,),dtype=np.int) - residue = (input_dim)%(len(Ylist)) + idx = np.empty((input_dim,), dtype=int) + residue = (input_dim) % (len(Ylist)) for i in range(len(Ylist)): if i < residue: - size = input_dim/len(Ylist)+1 - idx[i*size:(i+1)*size] = i + size = input_dim / len(Ylist) + 1 + idx[i * size : (i + 1) * size] = i else: - size = input_dim/len(Ylist) - idx[i*size+residue:(i+1)*size+residue] = i - + size = input_dim / len(Ylist) + idx[i * size + residue : (i + 1) * size + residue] = i + if X is None: - if initx == 'PCA_concat': - X = np.empty((Ylist[0].shape[0],input_dim)) + if initx == "PCA_concat": + X = np.empty((Ylist[0].shape[0], input_dim)) fracs = np.empty((input_dim,)) from ..util.initialization import initialize_latent + for i in range(len(Ylist)): Y = Ylist[i] - dim = (idx==i).sum() - if dim>0: - x, fr = initialize_latent('PCA', dim, Y) - X[:,idx==i] = x - fracs[idx==i] = fr - elif initx=='PCA_joint': + dim = (idx == i).sum() + if dim > 0: + x, fr = initialize_latent("PCA", dim, Y) + X[:, idx == i] = x + fracs[idx == i] = fr + elif initx == "PCA_joint": y = np.hstack(Ylist) from ..util.initialization import initialize_latent - X, fracs = initialize_latent('PCA', input_dim, y) + + X, fracs = initialize_latent("PCA", input_dim, y) else: X = np.random.randn(Ylist[0].shape[0], input_dim) fracs = np.ones(input_dim) else: fracs = np.ones(input_dim) - - - if X_variance is None: # The variance of the variational approximation (S) - X_variance = np.random.uniform(0,.1,X.shape) - + + if X_variance is None: # The variance of the variational approximation (S) + X_variance = np.random.uniform(0, 0.1, X.shape) + if Gammas is None: Gammas = [] for x in X: - gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation + gamma = np.empty_like( + X + ) # The posterior probabilities of the binary variable in the variational approximation gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim) - gamma[gamma>1.-1e-9] = 1.-1e-9 - gamma[gamma<1e-9] = 1e-9 + gamma[gamma > 1.0 - 1e-9] = 1.0 - 1e-9 + gamma[gamma < 1e-9] = 1e-9 Gammas.append(gamma) return X, X_variance, Gammas, fracs @Model.optimizer_array.setter def optimizer_array(self, p): if self.mpi_comm != None: - if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0: - self.mpi_comm.Bcast(np.int32(1),root=0) - self.mpi_comm.Bcast(p, root=0) - Model.optimizer_array.fset(self,p) - + if self._IN_OPTIMIZATION_ and self.mpi_comm.rank == 0: + self.mpi_comm.Bcast(np.int32(1), root=0) + self.mpi_comm.Bcast(p, root=0) + Model.optimizer_array.fset(self, p) + def optimize(self, optimizer=None, start=None, **kwargs): self._IN_OPTIMIZATION_ = True - if self.mpi_comm==None: - super(SSMRD, self).optimize(optimizer,start,**kwargs) - elif self.mpi_comm.rank==0: - super(SSMRD, self).optimize(optimizer,start,**kwargs) - self.mpi_comm.Bcast(np.int32(-1),root=0) - elif self.mpi_comm.rank>0: + if self.mpi_comm == None: + super(SSMRD, self).optimize(optimizer, start, **kwargs) + elif self.mpi_comm.rank == 0: + super(SSMRD, self).optimize(optimizer, start, **kwargs) + self.mpi_comm.Bcast(np.int32(-1), root=0) + elif self.mpi_comm.rank > 0: x = self.optimizer_array.copy() - flag = np.empty(1,dtype=np.int32) + flag = np.empty(1, dtype=np.int32) while True: - self.mpi_comm.Bcast(flag,root=0) - if flag==1: + self.mpi_comm.Bcast(flag, root=0) + if flag == 1: try: self.optimizer_array = x self._fail_count = 0 @@ -145,29 +209,51 @@ class SSMRD(Model): if self._fail_count >= self._allowed_failures: raise self._fail_count += 1 - elif flag==-1: + elif flag == -1: break else: self._IN_OPTIMIZATION_ = False raise Exception("Unrecognizable flag for synchronization!") self._IN_OPTIMIZATION_ = False - + class SpikeAndSlabPrior_SSMRD(SpikeAndSlabPrior): - def __init__(self, nModels, pi=0.5, learnPi=False, group_spike=True, variance = 1.0, name='SSMRDPrior', **kw): + def __init__( + self, + nModels, + pi=0.5, + learnPi=False, + group_spike=True, + variance=1.0, + name="SSMRDPrior", + **kw + ): self.nModels = nModels self._b_prob_all = 0.5 - super(SpikeAndSlabPrior_SSMRD, self).__init__(pi=pi,learnPi=learnPi,group_spike=group_spike,variance=variance, name=name, **kw) - + super(SpikeAndSlabPrior_SSMRD, self).__init__( + pi=pi, + learnPi=learnPi, + group_spike=group_spike, + variance=variance, + name=name, + **kw + ) + def _update_inernal(self, varp_list): """Make an update of the internal status by gathering the variational posteriors for all the individual models.""" # The probability for the binary variable for the same latent dimension of any of the models is on. if self.group_spike: - self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group) - [np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]] + self._b_prob_all = 1.0 - param_to_array(varp_list[0].gamma_group) + [ + np.multiply(self._b_prob_all, 1.0 - vp.gamma_group, self._b_prob_all) + for vp in varp_list[1:] + ] else: - self._b_prob_all = 1.-param_to_array(varp_list[0].binary_prob) - [np.multiply(self._b_prob_all, 1.-vp.binary_prob, self._b_prob_all) for vp in varp_list[1:]] + self._b_prob_all = 1.0 - param_to_array(varp_list[0].binary_prob) + [ + np.multiply(self._b_prob_all, 1.0 - vp.binary_prob, self._b_prob_all) + for vp in varp_list[1:] + ] def KL_divergence(self, variational_posterior): mu = variational_posterior.mean @@ -176,16 +262,20 @@ class SpikeAndSlabPrior_SSMRD(SpikeAndSlabPrior): gamma = variational_posterior.binary_prob[0] else: gamma = variational_posterior.binary_prob - if len(self.pi.shape)==2: - idx = np.unique(gamma._raveled_index()/gamma.shape[-1]) + if len(self.pi.shape) == 2: + idx = np.unique(gamma._raveled_index() / gamma.shape[-1]) pi = self.pi[idx] else: pi = self.pi - var_mean = np.square(mu)/self.variance - var_S = (S/self.variance - np.log(S)) - var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum() - return var_gamma +((1.-self._b_prob_all)*(np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels) + var_mean = np.square(mu) / self.variance + var_S = S / self.variance - np.log(S) + var_gamma = (gamma * np.log(gamma / pi)).sum() + ( + (1 - gamma) * np.log((1 - gamma) / (1 - pi)) + ).sum() + return var_gamma + ( + (1.0 - self._b_prob_all) * (np.log(self.variance) - 1.0 + var_mean + var_S) + ).sum() / (2.0 * self.nModels) def update_gradients_KL(self, variational_posterior): mu = variational_posterior.mean @@ -195,63 +285,141 @@ class SpikeAndSlabPrior_SSMRD(SpikeAndSlabPrior): gamma = variational_posterior.binary_prob.values[0] else: gamma = variational_posterior.binary_prob.values - if len(self.pi.shape)==2: - idx = np.unique(gamma._raveled_index()/gamma.shape[-1]) + if len(self.pi.shape) == 2: + idx = np.unique(gamma._raveled_index() / gamma.shape[-1]) pi = self.pi[idx] else: pi = self.pi if self.group_spike: - tmp = self._b_prob_all/(1.-gamma) - variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/N +tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2. + tmp = self._b_prob_all / (1.0 - gamma) + variational_posterior.binary_prob.gradient -= ( + np.log((1 - pi) / pi * gamma / (1.0 - gamma)) / N + + tmp + * ( + (np.square(mu) + S) / self.variance + - np.log(S) + + np.log(self.variance) + - 1.0 + ) + / 2.0 + ) else: - variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2. - mu.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels) - S.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels) + variational_posterior.binary_prob.gradient -= ( + np.log((1 - pi) / pi * gamma / (1.0 - gamma)) + + ( + (np.square(mu) + S) / self.variance + - np.log(S) + + np.log(self.variance) + - 1.0 + ) + / 2.0 + ) + mu.gradient -= (1.0 - self._b_prob_all) * mu / (self.variance * self.nModels) + S.gradient -= ( + (1.0 / self.variance - 1.0 / S) + * (1.0 - self._b_prob_all) + / (2.0 * self.nModels) + ) if self.learnPi: - raise 'Not Supported!' + raise "Not Supported!" + class IBPPrior_SSMRD(VariationalPrior): - def __init__(self, nModels, input_dim, alpha =2., tau=None, name='IBPPrior', **kw): + def __init__(self, nModels, input_dim, alpha=2.0, tau=None, name="IBPPrior", **kw): super(IBPPrior_SSMRD, self).__init__(name=name, **kw) - from paramz.transformations import Logexp, __fixed__ + from paramz.transformations import Logexp, __fixed__ + self.nModels = nModels self._b_prob_all = 0.5 self.input_dim = input_dim - self.variance = 1. - self.alpha = Param('alpha', alpha, __fixed__) + self.variance = 1.0 + self.alpha = Param("alpha", alpha, __fixed__) self.link_parameter(self.alpha) - + def _update_inernal(self, varp_list): """Make an update of the internal status by gathering the variational posteriors for all the individual models.""" # The probability for the binary variable for the same latent dimension of any of the models is on. - self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group) - [np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]] + self._b_prob_all = 1.0 - param_to_array(varp_list[0].gamma_group) + [ + np.multiply(self._b_prob_all, 1.0 - vp.gamma_group, self._b_prob_all) + for vp in varp_list[1:] + ] def KL_divergence(self, variational_posterior): - mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values - - var_mean = np.square(mu)/self.variance - var_S = (S/self.variance - np.log(S)) - part1 = ((1.-self._b_prob_all)* (np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels) - - ad = self.alpha/self.input_dim - from scipy.special import betaln,digamma - part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + (betaln(ad,1.)*self.input_dim -betaln(tau[:,0], tau[:,1]).sum())/self.nModels \ - + (( (tau[:,0]-ad)/self.nModels -gamma)*digamma(tau[:,0])).sum() + \ - (((tau[:,1]-1.)/self.nModels+gamma-1.)*digamma(tau[:,1])).sum() + (((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*digamma(tau.sum(axis=1))).sum() - return part1+part2 + mu, S, gamma, tau = ( + variational_posterior.mean.values, + variational_posterior.variance.values, + variational_posterior.gamma_group.values, + variational_posterior.tau.values, + ) + + var_mean = np.square(mu) / self.variance + var_S = S / self.variance - np.log(S) + part1 = ( + (1.0 - self._b_prob_all) * (np.log(self.variance) - 1.0 + var_mean + var_S) + ).sum() / (2.0 * self.nModels) + + ad = self.alpha / self.input_dim + from scipy.special import betaln, digamma + + part2 = ( + (gamma * np.log(gamma)).sum() + + ((1.0 - gamma) * np.log(1.0 - gamma)).sum() + + (betaln(ad, 1.0) * self.input_dim - betaln(tau[:, 0], tau[:, 1]).sum()) + / self.nModels + + (((tau[:, 0] - ad) / self.nModels - gamma) * digamma(tau[:, 0])).sum() + + ( + ((tau[:, 1] - 1.0) / self.nModels + gamma - 1.0) * digamma(tau[:, 1]) + ).sum() + + ( + ((1.0 + ad - tau[:, 0] - tau[:, 1]) / self.nModels + 1.0) + * digamma(tau.sum(axis=1)) + ).sum() + ) + return part1 + part2 def update_gradients_KL(self, variational_posterior): - mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values + mu, S, gamma, tau = ( + variational_posterior.mean.values, + variational_posterior.variance.values, + variational_posterior.gamma_group.values, + variational_posterior.tau.values, + ) - variational_posterior.mean.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels) - variational_posterior.variance.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels) - from scipy.special import digamma,polygamma - tmp = self._b_prob_all/(1.-gamma) - dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data - variational_posterior.binary_prob.gradient -= dgamma+tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2. - ad = self.alpha/self.input_dim - common = ((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*polygamma(1,tau.sum(axis=1)) - variational_posterior.tau.gradient[:,0] = -(((tau[:,0]-ad)/self.nModels -gamma)*polygamma(1,tau[:,0])+common) - variational_posterior.tau.gradient[:,1] = -(((tau[:,1]-1.)/self.nModels+gamma-1.)*polygamma(1,tau[:,1])+common) + variational_posterior.mean.gradient -= ( + (1.0 - self._b_prob_all) * mu / (self.variance * self.nModels) + ) + variational_posterior.variance.gradient -= ( + (1.0 / self.variance - 1.0 / S) + * (1.0 - self._b_prob_all) + / (2.0 * self.nModels) + ) + from scipy.special import digamma, polygamma + + tmp = self._b_prob_all / (1.0 - gamma) + dgamma = ( + np.log(gamma / (1.0 - gamma)) + digamma(tau[:, 1]) - digamma(tau[:, 0]) + ) / variational_posterior.num_data + variational_posterior.binary_prob.gradient -= ( + dgamma + + tmp + * ( + (np.square(mu) + S) / self.variance + - np.log(S) + + np.log(self.variance) + - 1.0 + ) + / 2.0 + ) + ad = self.alpha / self.input_dim + common = ((1.0 + ad - tau[:, 0] - tau[:, 1]) / self.nModels + 1.0) * polygamma( + 1, tau.sum(axis=1) + ) + variational_posterior.tau.gradient[:, 0] = -( + ((tau[:, 0] - ad) / self.nModels - gamma) * polygamma(1, tau[:, 0]) + common + ) + variational_posterior.tau.gradient[:, 1] = -( + ((tau[:, 1] - 1.0) / self.nModels + gamma - 1.0) * polygamma(1, tau[:, 1]) + + common + ) diff --git a/GPy/models/state_space_main.py b/GPy/models/state_space_main.py index 6ed2fbeb..fb6693ec 100644 --- a/GPy/models/state_space_main.py +++ b/GPy/models/state_space_main.py @@ -16,6 +16,7 @@ import warnings try: from . import state_space_setup + setup_available = True except ImportError as e: setup_available = False @@ -25,13 +26,14 @@ print_verbose = False try: import state_space_cython + cython_code_available = True if print_verbose: print("state_space: cython is available") except ImportError as e: cython_code_available = False -#cython_code_available = False +# cython_code_available = False # Use cython by default use_cython = False if setup_available: @@ -49,7 +51,6 @@ tmp_buffer = None class Dynamic_Callables_Python(object): - def f_a(self, k, m, A): """ p_a: function (k, x_{k-1}, A_{k}). Dynamic function. @@ -113,6 +114,7 @@ class Dynamic_Callables_Python(object): raise NotImplemented("reset is not implemented!") + if use_cython: Dynamic_Callables_Class = state_space_cython.Dynamic_Callables_Cython else: @@ -183,9 +185,9 @@ class Measurement_Callables_Python(object): raise NotImplemented("reset is not implemented!") + if use_cython: - Measurement_Callables_Class = state_space_cython.\ - Measurement_Callables_Cython + Measurement_Callables_Class = state_space_cython.Measurement_Callables_Cython else: Measurement_Callables_Class = Measurement_Callables_Python @@ -194,6 +196,7 @@ class R_handling_Python(Measurement_Callables_Class): """ The calss handles noise matrix R. """ + def __init__(self, R, index, R_time_var_index, unique_R_number, dR=None): """ Input: @@ -225,7 +228,7 @@ class R_handling_Python(Measurement_Callables_Class): self.R_time_var_index = int(R_time_var_index) self.dR = dR - if (len(np.unique(index)) > unique_R_number): + if len(np.unique(index)) > unique_R_number: self.svd_each_time = True else: self.svd_each_time = False @@ -248,32 +251,39 @@ class R_handling_Python(Measurement_Callables_Class): ind = int(self.index[self.R_time_var_index, k]) R = self.R[:, :, ind] - if (R.shape[0] == 1): # 1-D case handle simplier. No storage + if R.shape[0] == 1: # 1-D case handle simplier. No storage # of the result, just compute it each time. - inv_square_root = np.sqrt(1.0/R) + inv_square_root = np.sqrt(1.0 / R) else: if self.svd_each_time: + (U, S, Vh) = sp.linalg.svd( + R, + full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=True, + ) - (U, S, Vh) = sp.linalg.svd(R, full_matrices=False, - compute_uv=True, overwrite_a=False, - check_finite=True) - - inv_square_root = U * 1.0/np.sqrt(S) + inv_square_root = U * 1.0 / np.sqrt(S) else: if ind in self.R_square_root: inv_square_root = self.R_square_root[ind] else: - (U, S, Vh) = sp.linalg.svd(R, full_matrices=False, - compute_uv=True, - overwrite_a=False, - check_finite=True) + (U, S, Vh) = sp.linalg.svd( + R, + full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=True, + ) - inv_square_root = U * 1.0/np.sqrt(S) + inv_square_root = U * 1.0 / np.sqrt(S) self.R_square_root[ind] = inv_square_root return inv_square_root + if use_cython: R_handling_Class = state_space_cython.R_handling_Cython else: @@ -281,11 +291,20 @@ else: class Std_Measurement_Callables_Python(R_handling_Class): - - def __init__(self, H, H_time_var_index, R, index, R_time_var_index, - unique_R_number, dH=None, dR=None): - super(Std_Measurement_Callables_Python, - self).__init__(R, index, R_time_var_index, unique_R_number, dR) + def __init__( + self, + H, + H_time_var_index, + R, + index, + R_time_var_index, + unique_R_number, + dH=None, + dR=None, + ): + super(Std_Measurement_Callables_Python, self).__init__( + R, index, R_time_var_index, unique_R_number, dR + ) self.H = H self.H_time_var_index = int(H_time_var_index) @@ -319,15 +338,16 @@ class Std_Measurement_Callables_Python(R_handling_Class): return self.dH # the same dirivative on each iteration + if use_cython: - Std_Measurement_Callables_Class = state_space_cython.\ - Std_Measurement_Callables_Cython + Std_Measurement_Callables_Class = ( + state_space_cython.Std_Measurement_Callables_Cython + ) else: Std_Measurement_Callables_Class = Std_Measurement_Callables_Python class Q_handling_Python(Dynamic_Callables_Class): - def __init__(self, Q, index, Q_time_var_index, unique_Q_number, dQ=None): """ Input: @@ -360,7 +380,7 @@ class Q_handling_Python(Dynamic_Callables_Class): self.Q_time_var_index = Q_time_var_index self.dQ = dQ - if (len(np.unique(index)) > unique_Q_number): + if len(np.unique(index)) > unique_Q_number: self.svd_each_time = True else: self.svd_each_time = False @@ -391,27 +411,31 @@ class Q_handling_Python(Dynamic_Callables_Class): ind = self.index[self.Q_time_var_index, k] Q = self.Q[:, :, ind] - if (Q.shape[0] == 1): # 1-D case handle simplier. No storage + if Q.shape[0] == 1: # 1-D case handle simplier. No storage # of the result, just compute it each time. square_root = np.sqrt(Q) else: if self.svd_each_time: - - (U, S, Vh) = sp.linalg.svd(Q, full_matrices=False, - compute_uv=True, - overwrite_a=False, - check_finite=True) + (U, S, Vh) = sp.linalg.svd( + Q, + full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=True, + ) square_root = U * np.sqrt(S) else: - if ind in self.Q_square_root: square_root = self.Q_square_root[ind] else: - (U, S, Vh) = sp.linalg.svd(Q, full_matrices=False, - compute_uv=True, - overwrite_a=False, - check_finite=True) + (U, S, Vh) = sp.linalg.svd( + Q, + full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=True, + ) square_root = U * np.sqrt(S) @@ -419,6 +443,7 @@ class Q_handling_Python(Dynamic_Callables_Class): return square_root + if use_cython: Q_handling_Class = state_space_cython.Q_handling_Cython else: @@ -426,11 +451,20 @@ else: class Std_Dynamic_Callables_Python(Q_handling_Class): - - def __init__(self, A, A_time_var_index, Q, index, Q_time_var_index, - unique_Q_number, dA=None, dQ=None): - super(Std_Dynamic_Callables_Python, - self).__init__(Q, index, Q_time_var_index, unique_Q_number, dQ) + def __init__( + self, + A, + A_time_var_index, + Q, + index, + Q_time_var_index, + unique_Q_number, + dA=None, + dQ=None, + ): + super(Std_Dynamic_Callables_Python, self).__init__( + Q, index, Q_time_var_index, unique_Q_number, dQ + ) self.A = A self.A_time_var_index = np.asarray(A_time_var_index, np.int_) @@ -438,11 +472,11 @@ class Std_Dynamic_Callables_Python(Q_handling_Class): def f_a(self, k, m, A): """ - f_a: function (k, x_{k-1}, A_{k}). Dynamic function. - k (iteration number), starts at 0 - x_{k-1} State from the previous step - A_{k} Jacobian matrices of f_a. In the linear case it is exactly - A_{k}. + f_a: function (k, x_{k-1}, A_{k}). Dynamic function. + k (iteration number), starts at 0 + x_{k-1} State from the previous step + A_{k} Jacobian matrices of f_a. In the linear case it is exactly + A_{k}. """ return np.dot(A, m) @@ -471,16 +505,15 @@ class Std_Dynamic_Callables_Python(Q_handling_Class): return self + if use_cython: - Std_Dynamic_Callables_Class = state_space_cython.\ - Std_Dynamic_Callables_Cython + Std_Dynamic_Callables_Class = state_space_cython.Std_Dynamic_Callables_Cython else: Std_Dynamic_Callables_Class = Std_Dynamic_Callables_Python class AddMethodToClass(object): - - def __init__(self, func=None, tp='staticmethod'): + def __init__(self, func=None, tp="staticmethod"): """ Input: -------------- @@ -495,16 +528,18 @@ class AddMethodToClass(object): self.tp = tp def __get__(self, obj, klass=None, *args, **kwargs): - - if self.tp == 'staticmethod': + if self.tp == "staticmethod": return self.func - elif self.tp == 'normal': + elif self.tp == "normal": + def newfunc(obj, *args, **kwargs): return self.func - elif self.tp == 'classmethod': + elif self.tp == "classmethod": + def newfunc(klass, *args, **kwargs): return self.func + return newfunc @@ -519,23 +554,24 @@ class DescreteStateSpaceMeta(type): """ if use_cython: - if '_kalman_prediction_step_SVD' in attributes: - attributes['_kalman_prediction_step_SVD'] =\ - AddMethodToClass(state_space_cython. - _kalman_prediction_step_SVD_Cython) + if "_kalman_prediction_step_SVD" in attributes: + attributes["_kalman_prediction_step_SVD"] = AddMethodToClass( + state_space_cython._kalman_prediction_step_SVD_Cython + ) - if '_kalman_update_step_SVD' in attributes: - attributes['_kalman_update_step_SVD'] =\ - AddMethodToClass(state_space_cython. - _kalman_update_step_SVD_Cython) + if "_kalman_update_step_SVD" in attributes: + attributes["_kalman_update_step_SVD"] = AddMethodToClass( + state_space_cython._kalman_update_step_SVD_Cython + ) - if '_cont_discr_kalman_filter_raw' in attributes: - attributes['_cont_discr_kalman_filter_raw'] =\ - AddMethodToClass(state_space_cython. - _cont_discr_kalman_filter_raw_Cython) + if "_cont_discr_kalman_filter_raw" in attributes: + attributes["_cont_discr_kalman_filter_raw"] = AddMethodToClass( + state_space_cython._cont_discr_kalman_filter_raw_Cython + ) - return super(DescreteStateSpaceMeta, - typeclass).__new__(typeclass, name, bases, attributes) + return super(DescreteStateSpaceMeta, typeclass).__new__( + typeclass, name, bases, attributes + ) class DescreteStateSpace(object): @@ -560,6 +596,7 @@ class DescreteStateSpace(object): implementations are very similar. """ + __metaclass__ = DescreteStateSpaceMeta @staticmethod @@ -586,37 +623,56 @@ class DescreteStateSpace(object): None. """ - if (len(shape) > 3): - raise ValueError("""Input array is not supposed to be more - than 3 dimensional.""") + if len(shape) > 3: + raise ValueError( + """Input array is not supposed to be more + than 3 dimensional.""" + ) - if (len(shape) > desired_dim): + if len(shape) > desired_dim: raise ValueError("Input array shape is more than desired shape.") elif len(shape) == 1: - if (desired_dim == 3): + if desired_dim == 3: return ((shape[0], 1, 1), shape) # last dimension is the # time serime_series_no - elif (desired_dim == 2): + elif desired_dim == 2: return ((shape[0], 1), shape) elif len(shape) == 2: - if (desired_dim == 3): - return ((shape[1], 1, 1), shape) if (shape[0] == 1) else\ - ((shape[0], shape[1], 1), shape) # convert to column - # vector - elif (desired_dim == 2): - return ((shape[1], 1), shape) if (shape[0] == 1) else\ - ((shape[0], shape[1]), None) # convert to column vector + if desired_dim == 3: + return ( + ((shape[1], 1, 1), shape) + if (shape[0] == 1) + else ((shape[0], shape[1], 1), shape) + ) # convert to column + # vector + elif desired_dim == 2: + return ( + ((shape[1], 1), shape) + if (shape[0] == 1) + else ((shape[0], shape[1]), None) + ) # convert to column vector else: # len(shape) == 3 return (shape, None) # do nothing @classmethod - def kalman_filter(cls, p_A, p_Q, p_H, p_R, Y, index=None, m_init=None, - P_init=None, p_kalman_filter_type='regular', - calc_log_likelihood=False, - calc_grad_log_likelihood=False, grad_params_no=None, - grad_calc_params=None): + def kalman_filter( + cls, + p_A, + p_Q, + p_H, + p_R, + Y, + index=None, + m_init=None, + P_init=None, + p_kalman_filter_type="regular", + calc_log_likelihood=False, + calc_grad_log_likelihood=False, + grad_params_no=None, + grad_calc_params=None, + ): """ This function implements the basic Kalman Filter algorithm These notations for the State-Space model are assumed: @@ -743,7 +799,7 @@ class DescreteStateSpace(object): The dictionary contains the same fields. """ - #import pdb; pdb.set_trace() + # import pdb; pdb.set_trace() # Parameters checking -> # index @@ -753,14 +809,16 @@ class DescreteStateSpace(object): p_R = np.atleast_1d(p_R) # Reshape and check measurements: - Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape) + Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape) measurement_dim = Y.shape[1] - time_series_no = Y.shape[2] # multiple time series mode + time_series_no = Y.shape[2] # multiple time series mode - if ((len(p_A.shape) == 3) and (len(p_A.shape[2]) != 1)) or\ - ((len(p_Q.shape) == 3) and (len(p_Q.shape[2]) != 1)) or\ - ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or\ - ((len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1)): + if ( + ((len(p_A.shape) == 3) and (len(p_A.shape[2]) != 1)) + or ((len(p_Q.shape) == 3) and (len(p_Q.shape[2]) != 1)) + or ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) + or ((len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1)) + ): model_matrices_chage_with_time = True else: model_matrices_chage_with_time = False @@ -768,35 +826,55 @@ class DescreteStateSpace(object): # Check index old_index_shape = None if index is None: - if (len(p_A.shape) == 3) or (len(p_Q.shape) == 3) or\ - (len(p_H.shape) == 3) or (len(p_R.shape) == 3): - raise ValueError("Parameter index can not be None for time varying matrices (third dimension is present)") - else: # matrices do not change in time, so form dummy zero indices. - index = np.zeros((1,Y.shape[0])) + if ( + (len(p_A.shape) == 3) + or (len(p_Q.shape) == 3) + or (len(p_H.shape) == 3) + or (len(p_R.shape) == 3) + ): + raise ValueError( + "Parameter index can not be None for time varying matrices (third dimension is present)" + ) + else: # matrices do not change in time, so form dummy zero indices. + index = np.zeros((1, Y.shape[0])) else: if len(index.shape) == 1: - index.shape = (1,index.shape[0]) + index.shape = (1, index.shape[0]) old_index_shape = (index.shape[0],) - if (index.shape[1] != Y.shape[0]): - raise ValueError("Number of measurements must be equal the number of A_{k}, Q_{k}, H_{k}, R_{k}") + if index.shape[1] != Y.shape[0]: + raise ValueError( + "Number of measurements must be equal the number of A_{k}, Q_{k}, H_{k}, R_{k}" + ) - if (index.shape[0] == 1): - A_time_var_index = 0; Q_time_var_index = 0 - H_time_var_index = 0; R_time_var_index = 0 - elif (index.shape[0] == 4): - A_time_var_index = 0; Q_time_var_index = 1 - H_time_var_index = 2; R_time_var_index = 3 + if index.shape[0] == 1: + A_time_var_index = 0 + Q_time_var_index = 0 + H_time_var_index = 0 + R_time_var_index = 0 + elif index.shape[0] == 4: + A_time_var_index = 0 + Q_time_var_index = 1 + H_time_var_index = 2 + R_time_var_index = 3 else: raise ValueError("First Dimension of index must be either 1 or 4.") state_dim = p_A.shape[0] # Check and make right shape for model matrices. On exit they all are 3 dimensional. Last dimension # correspond to change in time. - (p_A, old_A_shape) = cls._check_SS_matrix(p_A, state_dim, measurement_dim, which='A') - (p_Q, old_Q_shape) = cls._check_SS_matrix(p_Q, state_dim, measurement_dim, which='Q') - (p_H, old_H_shape) = cls._check_SS_matrix(p_H, state_dim, measurement_dim, which='H') - (p_R, old_R_shape) = cls._check_SS_matrix(p_R, state_dim, measurement_dim, which='R') + (p_A, old_A_shape) = cls._check_SS_matrix( + p_A, state_dim, measurement_dim, which="A" + ) + (p_Q, old_Q_shape) = cls._check_SS_matrix( + p_Q, state_dim, measurement_dim, which="Q" + ) + (p_H, old_H_shape) = cls._check_SS_matrix( + p_H, state_dim, measurement_dim, which="H" + ) + (p_R, old_R_shape) = cls._check_SS_matrix( + p_R, state_dim, measurement_dim, which="R" + ) # m_init if m_init is None: @@ -807,10 +885,10 @@ class DescreteStateSpace(object): # P_init if P_init is None: P_init = np.eye(state_dim) - elif not isinstance(P_init, collections.Iterable): #scalar - P_init = P_init*np.eye(state_dim) + elif not isinstance(P_init, collections.Iterable): # scalar + P_init = P_init * np.eye(state_dim) - if p_kalman_filter_type not in ('regular', 'svd'): + if p_kalman_filter_type not in ("regular", "svd"): raise ValueError("Kalman filer type neither 'regular nor 'svd'.") # Functions to pass to the kalman_filter algorithm: @@ -818,27 +896,51 @@ class DescreteStateSpace(object): # k - number of Kalman filter iteration # m - vector for calculating matrices. Required for EKF. Not used here. - c_p_A = p_A.copy() # create a copy because this object is passed to the smoother - c_p_Q = p_Q.copy() # create a copy because this object is passed to the smoother - c_index = index.copy() # create a copy because this object is passed to the smoother + c_p_A = ( + p_A.copy() + ) # create a copy because this object is passed to the smoother + c_p_Q = ( + p_Q.copy() + ) # create a copy because this object is passed to the smoother + c_index = ( + index.copy() + ) # create a copy because this object is passed to the smoother if calc_grad_log_likelihood: if model_matrices_chage_with_time: - raise ValueError("When computing likelihood gradient A and Q can not change over time.") + raise ValueError( + "When computing likelihood gradient A and Q can not change over time." + ) - dA = cls._check_grad_state_matrices(grad_calc_params.get('dA'), state_dim, grad_params_no, which = 'dA') - dQ = cls._check_grad_state_matrices(grad_calc_params.get('dQ'), state_dim, grad_params_no, which = 'dQ') - dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH') - dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dR'), state_dim, grad_params_no, measurement_dim, which = 'dR') + dA = cls._check_grad_state_matrices( + grad_calc_params.get("dA"), state_dim, grad_params_no, which="dA" + ) + dQ = cls._check_grad_state_matrices( + grad_calc_params.get("dQ"), state_dim, grad_params_no, which="dQ" + ) + dH = cls._check_grad_measurement_matrices( + grad_calc_params.get("dH"), + state_dim, + grad_params_no, + measurement_dim, + which="dH", + ) + dR = cls._check_grad_measurement_matrices( + grad_calc_params.get("dR"), + state_dim, + grad_params_no, + measurement_dim, + which="dR", + ) - dm_init = grad_calc_params.get('dm_init') + dm_init = grad_calc_params.get("dm_init") if dm_init is None: - # multiple time series mode. Keep grad_params always as a last dimension + # multiple time series mode. Keep grad_params always as a last dimension dm_init = np.zeros((state_dim, time_series_no, grad_params_no)) - dP_init = grad_calc_params.get('dP_init') + dP_init = grad_calc_params.get("dP_init") if dP_init is None: - dP_init = np.zeros((state_dim,state_dim,grad_params_no)) + dP_init = np.zeros((state_dim, state_dim, grad_params_no)) else: dA = None dQ = None @@ -847,17 +949,33 @@ class DescreteStateSpace(object): dm_init = None dP_init = None - dynamic_callables = Std_Dynamic_Callables_Class(c_p_A, A_time_var_index, c_p_Q, c_index, Q_time_var_index, 20, dA, dQ) - measurement_callables = Std_Measurement_Callables_Class(p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR) + dynamic_callables = Std_Dynamic_Callables_Class( + c_p_A, A_time_var_index, c_p_Q, c_index, Q_time_var_index, 20, dA, dQ + ) + measurement_callables = Std_Measurement_Callables_Class( + p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR + ) - (M, P,log_likelihood, grad_log_likelihood, dynamic_callables) = \ - cls._kalman_algorithm_raw(state_dim, dynamic_callables, - measurement_callables, Y, m_init, - P_init, p_kalman_filter_type = p_kalman_filter_type, - calc_log_likelihood=calc_log_likelihood, - calc_grad_log_likelihood=calc_grad_log_likelihood, - grad_params_no=grad_params_no, - dm_init=dm_init, dP_init=dP_init) + ( + M, + P, + log_likelihood, + grad_log_likelihood, + dynamic_callables, + ) = cls._kalman_algorithm_raw( + state_dim, + dynamic_callables, + measurement_callables, + Y, + m_init, + P_init, + p_kalman_filter_type=p_kalman_filter_type, + calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, + grad_params_no=grad_params_no, + dm_init=dm_init, + dP_init=dP_init, + ) # restore shapes so that input parameters are unchenged if old_index_shape is not None: @@ -879,12 +997,23 @@ class DescreteStateSpace(object): p_R.shape = old_R_shape # Return values - return (M, P,log_likelihood, grad_log_likelihood, dynamic_callables) + return (M, P, log_likelihood, grad_log_likelihood, dynamic_callables) @classmethod - def extended_kalman_filter(cls,p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init=None, - P_init=None,calc_log_likelihood=False): - + def extended_kalman_filter( + cls, + p_state_dim, + p_a, + p_f_A, + p_f_Q, + p_h, + p_f_H, + p_f_R, + Y, + m_init=None, + P_init=None, + calc_log_likelihood=False, + ): """ Extended Kalman Filter @@ -954,83 +1083,95 @@ class DescreteStateSpace(object): """ # Y - Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape) + Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape) - # m_init + # m_init if m_init is None: - m_init = np.zeros((p_state_dim,1)) + m_init = np.zeros((p_state_dim, 1)) else: m_init = np.atleast_2d(m_init).T # P_init if P_init is None: P_init = np.eye(p_state_dim) - elif not isinstance(P_init, collections.Iterable): #scalar - P_init = P_init*np.eye(p_state_dim) + elif not isinstance(P_init, collections.Iterable): # scalar + P_init = P_init * np.eye(p_state_dim) if p_a is None: - p_a = lambda k,m,A: np.dot(A, m) + p_a = lambda k, m, A: np.dot(A, m) old_A_shape = None - if not isinstance(p_f_A, types.FunctionType): # not a function but array + if not isinstance(p_f_A, types.FunctionType): # not a function but array p_f_A = np.atleast_1d(p_f_A) (p_A, old_A_shape) = cls._check_A_matrix(p_f_A) - p_f_A = lambda k, m, P: p_A[:,:, 0] # make function + p_f_A = lambda k, m, P: p_A[:, :, 0] # make function else: if p_f_A(1, m_init, P_init).shape[0] != m_init.shape[0]: raise ValueError("p_f_A function returns matrix of wrong size") old_Q_shape = None - if not isinstance(p_f_Q, types.FunctionType): # not a function but array + if not isinstance(p_f_Q, types.FunctionType): # not a function but array p_f_Q = np.atleast_1d(p_f_Q) (p_Q, old_Q_shape) = cls._check_Q_matrix(p_f_Q) - p_f_Q = lambda k: p_Q[:,:, 0] # make function + p_f_Q = lambda k: p_Q[:, :, 0] # make function else: if p_f_Q(1).shape[0] != m_init.shape[0]: raise ValueError("p_f_Q function returns matrix of wrong size") if p_h is None: - lambda k,m,H: np.dot(H, m) + lambda k, m, H: np.dot(H, m) old_H_shape = None - if not isinstance(p_f_H, types.FunctionType): # not a function but array + if not isinstance(p_f_H, types.FunctionType): # not a function but array p_f_H = np.atleast_1d(p_f_H) (p_H, old_H_shape) = cls._check_H_matrix(p_f_H) - p_f_H = lambda k, m, P: p_H # make function + p_f_H = lambda k, m, P: p_H # make function else: if p_f_H(1, m_init, P_init).shape[0] != Y.shape[1]: raise ValueError("p_f_H function returns matrix of wrong size") old_R_shape = None - if not isinstance(p_f_R, types.FunctionType): # not a function but array + if not isinstance(p_f_R, types.FunctionType): # not a function but array p_f_R = np.atleast_1d(p_f_R) (p_R, old_R_shape) = cls._check_H_matrix(p_f_R) - p_f_R = lambda k: p_R # make function + p_f_R = lambda k: p_R # make function else: if p_f_R(1).shape[0] != m_init.shape[0]: raise ValueError("p_f_R function returns matrix of wrong size") -# class dynamic_callables_class(Dynamic_Model_Callables): -# -# Ak = -# Qk = - + # class dynamic_callables_class(Dynamic_Model_Callables): + # + # Ak = + # Qk = class measurement_callables_class(R_handling_Class): - def __init__(self,R, index, R_time_var_index, unique_R_number): - super(measurement_callables_class,self).__init__(R, index, R_time_var_index, unique_R_number) + def __init__(self, R, index, R_time_var_index, unique_R_number): + super(measurement_callables_class, self).__init__( + R, index, R_time_var_index, unique_R_number + ) Hk = AddMethodToClass(f_H) f_h = AddMethodToClass(f_hl) - - (M, P,log_likelihood, grad_log_likelihood) = cls._kalman_algorithm_raw(p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init, - P_init, calc_log_likelihood, - calc_grad_log_likelihood=False, grad_calc_params=None) + (M, P, log_likelihood, grad_log_likelihood) = cls._kalman_algorithm_raw( + p_state_dim, + p_a, + p_f_A, + p_f_Q, + p_h, + p_f_H, + p_f_R, + Y, + m_init, + P_init, + calc_log_likelihood, + calc_grad_log_likelihood=False, + grad_calc_params=None, + ) if old_Y_shape is not None: Y.shape = old_Y_shape @@ -1050,11 +1191,21 @@ class DescreteStateSpace(object): return (M, P) @classmethod - def _kalman_algorithm_raw(cls,state_dim, p_dynamic_callables, p_measurement_callables, Y, m_init, - P_init, p_kalman_filter_type='regular', - calc_log_likelihood=False, - calc_grad_log_likelihood=False, grad_params_no=None, - dm_init=None, dP_init=None): + def _kalman_algorithm_raw( + cls, + state_dim, + p_dynamic_callables, + p_measurement_callables, + Y, + m_init, + P_init, + p_kalman_filter_type="regular", + calc_log_likelihood=False, + calc_grad_log_likelihood=False, + grad_params_no=None, + dm_init=None, + dP_init=None, + ): """ General nonlinear filtering algorithm for inference in the state-space model: @@ -1166,94 +1317,142 @@ class DescreteStateSpace(object): """ - steps_no = Y.shape[0] # number of steps in the Kalman Filter - time_series_no = Y.shape[2] # multiple time series mode + steps_no = Y.shape[0] # number of steps in the Kalman Filter + time_series_no = Y.shape[2] # multiple time series mode # Allocate space for results # Mean estimations. Initial values will be included - M = np.empty(((steps_no+1),state_dim,time_series_no)) - M[0,:,:] = m_init # Initialize mean values + M = np.empty(((steps_no + 1), state_dim, time_series_no)) + M[0, :, :] = m_init # Initialize mean values # Variance estimations. Initial values will be included - P = np.empty(((steps_no+1),state_dim,state_dim)) - P_init = 0.5*( P_init + P_init.T) # symmetrize initial covariance. In some ustable cases this is uiseful - P[0,:,:] = P_init # Initialize initial covariance matrix + P = np.empty(((steps_no + 1), state_dim, state_dim)) + P_init = 0.5 * ( + P_init + P_init.T + ) # symmetrize initial covariance. In some ustable cases this is uiseful + P[0, :, :] = P_init # Initialize initial covariance matrix - if p_kalman_filter_type == 'svd': - (U,S,Vh) = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True, - overwrite_a=False,check_finite=True) - S[ (S==0) ] = 1e-17 # allows to run algorithm for singular initial variance - P_upd = (P_init, S,U) + if p_kalman_filter_type == "svd": + (U, S, Vh) = sp.linalg.svd( + P_init, + full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=True, + ) + S[(S == 0)] = 1e-17 # allows to run algorithm for singular initial variance + P_upd = (P_init, S, U) log_likelihood = 0 if calc_log_likelihood else None grad_log_likelihood = 0 if calc_grad_log_likelihood else None - #setting initial values for derivatives update + # setting initial values for derivatives update dm_upd = dm_init dP_upd = dP_init # Main loop of the Kalman filter - for k in range(0,steps_no): + for k in range(0, steps_no): # In this loop index for new estimations is (k+1), old - (k) # This happened because initial values are stored at 0-th index. - prev_mean = M[k,:,:] # mean from the previous step + prev_mean = M[k, :, :] # mean from the previous step - if p_kalman_filter_type == 'svd': - m_pred, P_pred, dm_pred, dP_pred = \ - cls._kalman_prediction_step_SVD(k, prev_mean ,P_upd, p_dynamic_callables, + if p_kalman_filter_type == "svd": + m_pred, P_pred, dm_pred, dP_pred = cls._kalman_prediction_step_SVD( + k, + prev_mean, + P_upd, + p_dynamic_callables, calc_grad_log_likelihood=calc_grad_log_likelihood, - p_dm = dm_upd, p_dP = dP_upd) + p_dm=dm_upd, + p_dP=dP_upd, + ) else: - m_pred, P_pred, dm_pred, dP_pred = \ - cls._kalman_prediction_step(k, prev_mean ,P[k,:,:], p_dynamic_callables, + m_pred, P_pred, dm_pred, dP_pred = cls._kalman_prediction_step( + k, + prev_mean, + P[k, :, :], + p_dynamic_callables, calc_grad_log_likelihood=calc_grad_log_likelihood, - p_dm = dm_upd, p_dP = dP_upd ) + p_dm=dm_upd, + p_dP=dP_upd, + ) - k_measurment = Y[k,:,:] + k_measurment = Y[k, :, :] - if (np.any(np.isnan(k_measurment)) == False): - if p_kalman_filter_type == 'svd': - m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ - cls._kalman_update_step_SVD(k, m_pred , P_pred, p_measurement_callables, - k_measurment, calc_log_likelihood=calc_log_likelihood, - calc_grad_log_likelihood=calc_grad_log_likelihood, - p_dm = dm_pred, p_dP = dP_pred ) + if np.any(np.isnan(k_measurment)) == False: + if p_kalman_filter_type == "svd": + ( + m_upd, + P_upd, + log_likelihood_update, + dm_upd, + dP_upd, + d_log_likelihood_update, + ) = cls._kalman_update_step_SVD( + k, + m_pred, + P_pred, + p_measurement_callables, + k_measurment, + calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, + p_dm=dm_pred, + p_dP=dP_pred, + ) - - # m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ - # cls._kalman_update_step(k, m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment, - # calc_log_likelihood=calc_log_likelihood, - # calc_grad_log_likelihood=calc_grad_log_likelihood, - # p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR)) - # - # (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True, - # overwrite_a=False,check_finite=True) - # P_upd = (P_upd, S,U) + # m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ + # cls._kalman_update_step(k, m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment, + # calc_log_likelihood=calc_log_likelihood, + # calc_grad_log_likelihood=calc_grad_log_likelihood, + # p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR)) + # + # (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True, + # overwrite_a=False,check_finite=True) + # P_upd = (P_upd, S,U) else: - m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ - cls._kalman_update_step(k, m_pred , P_pred, p_measurement_callables, k_measurment, - calc_log_likelihood=calc_log_likelihood, - calc_grad_log_likelihood=calc_grad_log_likelihood, - p_dm = dm_pred, p_dP = dP_pred ) + ( + m_upd, + P_upd, + log_likelihood_update, + dm_upd, + dP_upd, + d_log_likelihood_update, + ) = cls._kalman_update_step( + k, + m_pred, + P_pred, + p_measurement_callables, + k_measurment, + calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, + p_dm=dm_pred, + p_dP=dP_pred, + ) else: -# if k_measurment.shape != (1,1): -# raise ValueError("Nan measurements are currently not supported for \ -# multidimensional output and multiple time series.") -# else: -# m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred -# log_likelihood_update = 0.0; -# d_log_likelihood_update = 0.0; + # if k_measurment.shape != (1,1): + # raise ValueError("Nan measurements are currently not supported for \ + # multidimensional output and multiple time series.") + # else: + # m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred + # log_likelihood_update = 0.0; + # d_log_likelihood_update = 0.0; if not np.all(np.isnan(k_measurment)): - raise ValueError("""Nan measurements are currently not supported if - they are intermixed with not NaN measurements""") + raise ValueError( + """Nan measurements are currently not supported if + they are intermixed with not NaN measurements""" + ) else: - m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred + m_upd = m_pred + P_upd = P_pred + dm_upd = dm_pred + dP_upd = dP_pred if calc_log_likelihood: log_likelihood_update = np.zeros((time_series_no,)) if calc_grad_log_likelihood: - d_log_likelihood_update = np.zeros((grad_params_no,time_series_no)) - + d_log_likelihood_update = np.zeros( + (grad_params_no, time_series_no) + ) if calc_log_likelihood: log_likelihood += log_likelihood_update @@ -1261,20 +1460,33 @@ class DescreteStateSpace(object): if calc_grad_log_likelihood: grad_log_likelihood += d_log_likelihood_update - M[k+1,:,:] = m_upd # separate mean value for each time series + M[k + 1, :, :] = m_upd # separate mean value for each time series - if p_kalman_filter_type == 'svd': - P[k+1,:,:] = P_upd[0] + if p_kalman_filter_type == "svd": + P[k + 1, :, :] = P_upd[0] else: - P[k+1,:,:] = P_upd + P[k + 1, :, :] = P_upd # !!!Print statistics! Print sizes of matrices # !!!Print statistics! Print iteration time base on another boolean variable - return (M, P, log_likelihood, grad_log_likelihood, p_dynamic_callables.reset(False)) + return ( + M, + P, + log_likelihood, + grad_log_likelihood, + p_dynamic_callables.reset(False), + ) @staticmethod - def _kalman_prediction_step(k, p_m , p_P, p_dyn_model_callable, calc_grad_log_likelihood=False, - p_dm = None, p_dP = None): + def _kalman_prediction_step( + k, + p_m, + p_P, + p_dyn_model_callable, + calc_grad_log_likelihood=False, + p_dm=None, + p_dP=None, + ): """ Desctrete prediction function @@ -1315,17 +1527,23 @@ class DescreteStateSpace(object): """ # index correspond to values from previous iteration. - A = p_dyn_model_callable.Ak(k,p_m,p_P) # state transition matrix (or Jacobian) - Q = p_dyn_model_callable.Qk(k) # state noise matrix + A = p_dyn_model_callable.Ak( + k, p_m, p_P + ) # state transition matrix (or Jacobian) + Q = p_dyn_model_callable.Qk(k) # state noise matrix # Prediction step -> - m_pred = p_dyn_model_callable.f_a(k, p_m, A) # predicted mean - P_pred = A.dot(p_P).dot(A.T) + Q # predicted variance + m_pred = p_dyn_model_callable.f_a(k, p_m, A) # predicted mean + P_pred = A.dot(p_P).dot(A.T) + Q # predicted variance # Prediction step <- if calc_grad_log_likelihood: - dA_all_params = p_dyn_model_callable.dAk(k) # derivatives of A wrt parameters - dQ_all_params = p_dyn_model_callable.dQk(k) # derivatives of Q wrt parameters + dA_all_params = p_dyn_model_callable.dAk( + k + ) # derivatives of A wrt parameters + dQ_all_params = p_dyn_model_callable.dQk( + k + ) # derivatives of Q wrt parameters param_number = p_dP.shape[2] @@ -1334,19 +1552,21 @@ class DescreteStateSpace(object): dP_pred = np.empty(p_dP.shape) for j in range(param_number): - dA = dA_all_params[:,:,j] - dQ = dQ_all_params[:,:,j] + dA = dA_all_params[:, :, j] + dQ = dQ_all_params[:, :, j] - dP = p_dP[:,:,j] - dm = p_dm[:,:,j] - dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, dm) + dP = p_dP[:, :, j] + dm = p_dm[:, :, j] + dm_pred[:, :, j] = np.dot(dA, p_m) + np.dot(A, dm) # prediction step derivatives for current parameter: - dP_pred[:,:,j] = np.dot( dA ,np.dot(p_P, A.T)) - dP_pred[:,:,j] += dP_pred[:,:,j].T - dP_pred[:,:,j] += np.dot( A ,np.dot(dP, A.T)) + dQ + dP_pred[:, :, j] = np.dot(dA, np.dot(p_P, A.T)) + dP_pred[:, :, j] += dP_pred[:, :, j].T + dP_pred[:, :, j] += np.dot(A, np.dot(dP, A.T)) + dQ - dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize + dP_pred[:, :, j] = 0.5 * ( + dP_pred[:, :, j] + dP_pred[:, :, j].T + ) # symmetrize else: dm_pred = None dP_pred = None @@ -1354,8 +1574,15 @@ class DescreteStateSpace(object): return m_pred, P_pred, dm_pred, dP_pred @staticmethod - def _kalman_prediction_step_SVD(k, p_m , p_P, p_dyn_model_callable, calc_grad_log_likelihood=False, - p_dm = None, p_dP = None): + def _kalman_prediction_step_SVD( + k, + p_m, + p_P, + p_dyn_model_callable, + calc_grad_log_likelihood=False, + p_dm=None, + p_dP=None, + ): """ Desctrete prediction function @@ -1398,33 +1625,46 @@ class DescreteStateSpace(object): # covariance from the previous step and its SVD decomposition # p_prev_cov = v * S * V.T Prev_cov, S_old, V_old = p_P - #p_prev_cov_tst = np.dot(p_V, (p_S * p_V).T) # reconstructed covariance from the previous step + # p_prev_cov_tst = np.dot(p_V, (p_S * p_V).T) # reconstructed covariance from the previous step # index correspond to values from previous iteration. - A = p_dyn_model_callable.Ak(k,p_m,Prev_cov) # state transition matrix (or Jacobian) - Q = p_dyn_model_callable.Qk(k) # state noise matrx. This is necessary for the square root calculation (next step) + A = p_dyn_model_callable.Ak( + k, p_m, Prev_cov + ) # state transition matrix (or Jacobian) + Q = p_dyn_model_callable.Qk( + k + ) # state noise matrx. This is necessary for the square root calculation (next step) Q_sr = p_dyn_model_callable.Q_srk(k) # Prediction step -> - m_pred = p_dyn_model_callable.f_a(k, p_m, A) # predicted mean + m_pred = p_dyn_model_callable.f_a(k, p_m, A) # predicted mean # coavariance prediction have changed: - svd_1_matr = np.vstack( ( (np.sqrt(S_old)* np.dot(A,V_old)).T , Q_sr.T) ) - (U,S,Vh) = sp.linalg.svd( svd_1_matr,full_matrices=False, compute_uv=True, - overwrite_a=False,check_finite=True) + svd_1_matr = np.vstack(((np.sqrt(S_old) * np.dot(A, V_old)).T, Q_sr.T)) + (U, S, Vh) = sp.linalg.svd( + svd_1_matr, + full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=True, + ) # predicted variance computed by the regular method. For testing - #P_pred_tst = A.dot(Prev_cov).dot(A.T) + Q + # P_pred_tst = A.dot(Prev_cov).dot(A.T) + Q V_new = Vh.T S_new = S**2 - P_pred = np.dot(V_new * S_new, V_new.T) # prediction covariance + P_pred = np.dot(V_new * S_new, V_new.T) # prediction covariance P_pred = (P_pred, S_new, Vh.T) # Prediction step <- # derivatives if calc_grad_log_likelihood: - dA_all_params = p_dyn_model_callable.dAk(k) # derivatives of A wrt parameters - dQ_all_params = p_dyn_model_callable.dQk(k) # derivatives of Q wrt parameters + dA_all_params = p_dyn_model_callable.dAk( + k + ) # derivatives of A wrt parameters + dQ_all_params = p_dyn_model_callable.dQk( + k + ) # derivatives of Q wrt parameters param_number = p_dP.shape[2] @@ -1433,20 +1673,21 @@ class DescreteStateSpace(object): dP_pred = np.empty(p_dP.shape) for j in range(param_number): - dA = dA_all_params[:,:,j] - dQ = dQ_all_params[:,:,j] + dA = dA_all_params[:, :, j] + dQ = dQ_all_params[:, :, j] - #dP = p_dP[:,:,j] - #dm = p_dm[:,:,j] - dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, p_dm[:,:,j]) + # dP = p_dP[:,:,j] + # dm = p_dm[:,:,j] + dm_pred[:, :, j] = np.dot(dA, p_m) + np.dot(A, p_dm[:, :, j]) # prediction step derivatives for current parameter: + dP_pred[:, :, j] = np.dot(dA, np.dot(Prev_cov, A.T)) + dP_pred[:, :, j] += dP_pred[:, :, j].T + dP_pred[:, :, j] += np.dot(A, np.dot(p_dP[:, :, j], A.T)) + dQ - dP_pred[:,:,j] = np.dot( dA ,np.dot(Prev_cov, A.T)) - dP_pred[:,:,j] += dP_pred[:,:,j].T - dP_pred[:,:,j] += np.dot( A ,np.dot(p_dP[:,:,j], A.T)) + dQ - - dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize + dP_pred[:, :, j] = 0.5 * ( + dP_pred[:, :, j] + dP_pred[:, :, j].T + ) # symmetrize else: dm_pred = None dP_pred = None @@ -1454,8 +1695,17 @@ class DescreteStateSpace(object): return m_pred, P_pred, dm_pred, dP_pred @staticmethod - def _kalman_update_step(k, p_m , p_P, p_meas_model_callable, measurement, calc_log_likelihood= False, - calc_grad_log_likelihood=False, p_dm = None, p_dP = None): + def _kalman_update_step( + k, + p_m, + p_P, + p_meas_model_callable, + measurement, + calc_log_likelihood=False, + calc_grad_log_likelihood=False, + p_dm=None, + p_dP=None, + ): """ Input: @@ -1507,45 +1757,54 @@ class DescreteStateSpace(object): adds extra columns to the gradient. """ - #import pdb; pdb.set_trace() + # import pdb; pdb.set_trace() - m_pred = p_m # from prediction step - P_pred = p_P # from prediction step + m_pred = p_m # from prediction step + P_pred = p_P # from prediction step H = p_meas_model_callable.Hk(k, m_pred, P_pred) R = p_meas_model_callable.Rk(k) - time_series_no = p_m.shape[1] # number of time serieses + time_series_no = p_m.shape[1] # number of time serieses - log_likelihood_update=None; dm_upd=None; dP_upd=None; d_log_likelihood_update=None + log_likelihood_update = None + dm_upd = None + dP_upd = None + d_log_likelihood_update = None # Update step (only if there is data) - #if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other. - v = measurement-p_meas_model_callable.f_h(k, m_pred, H) + # if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other. + v = measurement - p_meas_model_callable.f_h(k, m_pred, H) S = H.dot(P_pred).dot(H.T) + R - if measurement.shape[0]==1: # measurements are one dimensional - if (S < 0): - raise ValueError("Kalman Filter Update: S is negative step %i" % k ) - #import pdb; pdb.set_trace() + if measurement.shape[0] == 1: # measurements are one dimensional + if S < 0: + raise ValueError("Kalman Filter Update: S is negative step %i" % k) + # import pdb; pdb.set_trace() K = P_pred.dot(H.T) / S if calc_log_likelihood: - log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) + - v*v / S) - #log_likelihood_update = log_likelihood_update[0,0] # to make int - if np.any(np.isnan(log_likelihood_update)): # some member in P_pred is None. + log_likelihood_update = -0.5 * ( + np.log(2 * np.pi) + np.log(S) + v * v / S + ) + # log_likelihood_update = log_likelihood_update[0,0] # to make int + if np.any( + np.isnan(log_likelihood_update) + ): # some member in P_pred is None. raise ValueError("Nan values in likelihood update!") - LL = None; islower = None + LL = None + islower = None else: - LL,islower = linalg.cho_factor(S) - K = linalg.cho_solve((LL,islower), H.dot(P_pred.T)).T + LL, islower = linalg.cho_factor(S) + K = linalg.cho_solve((LL, islower), H.dot(P_pred.T)).T if calc_log_likelihood: - log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) + - 2*np.sum( np.log(np.diag(LL)) ) +\ - np.sum((linalg.cho_solve((LL,islower),v)) * v, axis = 0) ) # diagonal of v.T*S^{-1}*v + log_likelihood_update = -0.5 * ( + v.shape[0] * np.log(2 * np.pi) + + 2 * np.sum(np.log(np.diag(LL))) + + np.sum((linalg.cho_solve((LL, islower), v)) * v, axis=0) + ) # diagonal of v.T*S^{-1}*v if calc_grad_log_likelihood: - dm_pred_all_params = p_dm # derivativas of the prediction phase + dm_pred_all_params = p_dm # derivativas of the prediction phase dP_pred_all_params = p_dP param_number = p_dP.shape[2] @@ -1556,75 +1815,95 @@ class DescreteStateSpace(object): dm_upd = np.empty(dm_pred_all_params.shape) dP_upd = np.empty(dP_pred_all_params.shape) - # firts dimension parameter_no, second - time series number - d_log_likelihood_update = np.empty((param_number,time_series_no)) + # firts dimension parameter_no, second - time series number + d_log_likelihood_update = np.empty((param_number, time_series_no)) for param in range(param_number): + dH = dH_all_params[:, :, param] + dR = dR_all_params[:, :, param] - dH = dH_all_params[:,:,param] - dR = dR_all_params[:,:,param] - - dm_pred = dm_pred_all_params[:,:,param] - dP_pred = dP_pred_all_params[:,:,param] + dm_pred = dm_pred_all_params[:, :, param] + dP_pred = dP_pred_all_params[:, :, param] # Terms in the likelihood derivatives - dv = - np.dot( dH, m_pred) - np.dot( H, dm_pred) - dS = np.dot(dH, np.dot( P_pred, H.T)) - dS += dS.T - dS += np.dot(H, np.dot( dP_pred, H.T)) + dR + dv = -np.dot(dH, m_pred) - np.dot(H, dm_pred) + dS = np.dot(dH, np.dot(P_pred, H.T)) + dS += dS.T + dS += np.dot(H, np.dot(dP_pred, H.T)) + dR - # TODO: maybe symmetrize dS + # TODO: maybe symmetrize dS - #dm and dP for the next stem - if LL is not None: # the state vector is not a scalar - tmp1 = linalg.cho_solve((LL,islower), H).T - tmp2 = linalg.cho_solve((LL,islower), dH).T - tmp3 = linalg.cho_solve((LL,islower), dS).T - else: # the state vector is a scalar - tmp1 = H.T / S - tmp2 = dH.T / S - tmp3 = dS.T / S + # dm and dP for the next stem + if LL is not None: # the state vector is not a scalar + tmp1 = linalg.cho_solve((LL, islower), H).T + tmp2 = linalg.cho_solve((LL, islower), dH).T + tmp3 = linalg.cho_solve((LL, islower), dS).T + else: # the state vector is a scalar + tmp1 = H.T / S + tmp2 = dH.T / S + tmp3 = dS.T / S - dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \ - np.dot( P_pred, np.dot( tmp1, tmp3 ) ) + dK = ( + np.dot(dP_pred, tmp1) + + np.dot(P_pred, tmp2) + - np.dot(P_pred, np.dot(tmp1, tmp3)) + ) # terms required for the next step, save this for each parameter - dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv) + dm_upd[:, :, param] = dm_pred + np.dot(dK, v) + np.dot(K, dv) - dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T)) - dP_upd[:,:,param] += dP_upd[:,:,param].T - dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T)) + dP_upd[:, :, param] = -np.dot(dK, np.dot(S, K.T)) + dP_upd[:, :, param] += dP_upd[:, :, param].T + dP_upd[:, :, param] += dP_pred - np.dot(K, np.dot(dS, K.T)) - dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize + dP_upd[:, :, param] = 0.5 * ( + dP_upd[:, :, param] + dP_upd[:, :, param].T + ) # symmetrize # computing the likelihood change for each parameter: - if LL is not None: # the state vector is not 1D - #tmp4 = linalg.cho_solve((LL,islower), dv) - tmp5 = linalg.cho_solve((LL,islower), v) - else: # the state vector is a scalar - #tmp4 = dv / S - tmp5 = v / S + if LL is not None: # the state vector is not 1D + # tmp4 = linalg.cho_solve((LL,islower), dv) + tmp5 = linalg.cho_solve((LL, islower), v) + else: # the state vector is a scalar + # tmp4 = dv / S + tmp5 = v / S - - d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \ - np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) ) + d_log_likelihood_update[param, :] = -( + 0.5 * np.sum(np.diag(tmp3)) + + np.sum(tmp5 * dv, axis=0) + - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) + ) # Before - #d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \ - #np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) ) - - + # d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \ + # np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) ) # Compute the actual updates for mean and variance of the states. - m_upd = m_pred + K.dot( v ) + m_upd = m_pred + K.dot(v) # Covariance update and ensure it is symmetric P_upd = K.dot(S).dot(K.T) - P_upd = 0.5*(P_upd + P_upd.T) - P_upd = P_pred - P_upd# this update matrix is symmetric + P_upd = 0.5 * (P_upd + P_upd.T) + P_upd = P_pred - P_upd # this update matrix is symmetric - return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update + return ( + m_upd, + P_upd, + log_likelihood_update, + dm_upd, + dP_upd, + d_log_likelihood_update, + ) @staticmethod - def _kalman_update_step_SVD(k, p_m , p_P, p_meas_model_callable, measurement, calc_log_likelihood= False, - calc_grad_log_likelihood=False, p_dm = None, p_dP = None): + def _kalman_update_step_SVD( + k, + p_m, + p_P, + p_meas_model_callable, + measurement, + calc_log_likelihood=False, + calc_grad_log_likelihood=False, + p_dm=None, + p_dP=None, + ): """ Input: @@ -1700,67 +1979,84 @@ class DescreteStateSpace(object): """ - #import pdb; pdb.set_trace() + # import pdb; pdb.set_trace() - m_pred = p_m # from prediction step - P_pred,S_pred,V_pred = p_P # from prediction step + m_pred = p_m # from prediction step + P_pred, S_pred, V_pred = p_P # from prediction step H = p_meas_model_callable.Hk(k, m_pred, P_pred) R = p_meas_model_callable.Rk(k) - R_isr = p_meas_model_callable.R_isrk(k) # square root of the inverse of R matrix + R_isr = p_meas_model_callable.R_isrk( + k + ) # square root of the inverse of R matrix - time_series_no = p_m.shape[1] # number of time serieses + time_series_no = p_m.shape[1] # number of time serieses - log_likelihood_update=None; dm_upd=None; dP_upd=None; d_log_likelihood_update=None + log_likelihood_update = None + dm_upd = None + dP_upd = None + d_log_likelihood_update = None # Update step (only if there is data) - #if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other. - v = measurement-p_meas_model_callable.f_h(k, m_pred, H) + # if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other. + v = measurement - p_meas_model_callable.f_h(k, m_pred, H) - svd_2_matr = np.vstack( ( np.dot( R_isr.T, np.dot(H, V_pred)) , np.diag( 1.0/np.sqrt(S_pred) ) ) ) + svd_2_matr = np.vstack( + (np.dot(R_isr.T, np.dot(H, V_pred)), np.diag(1.0 / np.sqrt(S_pred))) + ) - (U,S,Vh) = sp.linalg.svd( svd_2_matr,full_matrices=False, compute_uv=True, - overwrite_a=False,check_finite=True) + (U, S, Vh) = sp.linalg.svd( + svd_2_matr, + full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=True, + ) - # P_upd = U_upd S_upd**2 U_upd.T + # P_upd = U_upd S_upd**2 U_upd.T U_upd = np.dot(V_pred, Vh.T) - S_upd = (1.0/S)**2 + S_upd = (1.0 / S) ** 2 - P_upd = np.dot(U_upd * S_upd, U_upd.T) # update covariance - P_upd = (P_upd,S_upd,U_upd) # tuple to pass to the next step + P_upd = np.dot(U_upd * S_upd, U_upd.T) # update covariance + P_upd = (P_upd, S_upd, U_upd) # tuple to pass to the next step - # stil need to compute S and K for derivative computation + # stil need to compute S and K for derivative computation S = H.dot(P_pred).dot(H.T) + R - if measurement.shape[0]==1: # measurements are one dimensional - if (S < 0): - raise ValueError("Kalman Filter Update: S is negative step %i" % k ) - #import pdb; pdb.set_trace() + if measurement.shape[0] == 1: # measurements are one dimensional + if S < 0: + raise ValueError("Kalman Filter Update: S is negative step %i" % k) + # import pdb; pdb.set_trace() K = P_pred.dot(H.T) / S if calc_log_likelihood: - log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) + - v*v / S) - #log_likelihood_update = log_likelihood_update[0,0] # to make int - if np.any(np.isnan(log_likelihood_update)): # some member in P_pred is None. + log_likelihood_update = -0.5 * ( + np.log(2 * np.pi) + np.log(S) + v * v / S + ) + # log_likelihood_update = log_likelihood_update[0,0] # to make int + if np.any( + np.isnan(log_likelihood_update) + ): # some member in P_pred is None. raise ValueError("Nan values in likelihood update!") - LL = None; islower = None + LL = None + islower = None else: - LL,islower = linalg.cho_factor(S) - K = linalg.cho_solve((LL,islower), H.dot(P_pred.T)).T + LL, islower = linalg.cho_factor(S) + K = linalg.cho_solve((LL, islower), H.dot(P_pred.T)).T if calc_log_likelihood: - log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) + - 2*np.sum( np.log(np.diag(LL)) ) +\ - np.sum((linalg.cho_solve((LL,islower),v)) * v, axis = 0) ) # diagonal of v.T*S^{-1}*v - + log_likelihood_update = -0.5 * ( + v.shape[0] * np.log(2 * np.pi) + + 2 * np.sum(np.log(np.diag(LL))) + + np.sum((linalg.cho_solve((LL, islower), v)) * v, axis=0) + ) # diagonal of v.T*S^{-1}*v # Old method of computing updated covariance (for testing) -> - #P_upd_tst = K.dot(S).dot(K.T) - #P_upd_tst = 0.5*(P_upd_tst + P_upd_tst.T) - #P_upd_tst = P_pred - P_upd_tst# this update matrix is symmetric + # P_upd_tst = K.dot(S).dot(K.T) + # P_upd_tst = 0.5*(P_upd_tst + P_upd_tst.T) + # P_upd_tst = P_pred - P_upd_tst# this update matrix is symmetric # Old method of computing updated covariance (for testing) <- if calc_grad_log_likelihood: - dm_pred_all_params = p_dm # derivativas of the prediction phase + dm_pred_all_params = p_dm # derivativas of the prediction phase dP_pred_all_params = p_dP param_number = p_dP.shape[2] @@ -1771,67 +2067,88 @@ class DescreteStateSpace(object): dm_upd = np.empty(dm_pred_all_params.shape) dP_upd = np.empty(dP_pred_all_params.shape) - # firts dimension parameter_no, second - time series number - d_log_likelihood_update = np.empty((param_number,time_series_no)) + # firts dimension parameter_no, second - time series number + d_log_likelihood_update = np.empty((param_number, time_series_no)) for param in range(param_number): + dH = dH_all_params[:, :, param] + dR = dR_all_params[:, :, param] - dH = dH_all_params[:,:,param] - dR = dR_all_params[:,:,param] - - dm_pred = dm_pred_all_params[:,:,param] - dP_pred = dP_pred_all_params[:,:,param] + dm_pred = dm_pred_all_params[:, :, param] + dP_pred = dP_pred_all_params[:, :, param] # Terms in the likelihood derivatives - dv = - np.dot( dH, m_pred) - np.dot( H, dm_pred) - dS = np.dot(dH, np.dot( P_pred, H.T)) - dS += dS.T - dS += np.dot(H, np.dot( dP_pred, H.T)) + dR + dv = -np.dot(dH, m_pred) - np.dot(H, dm_pred) + dS = np.dot(dH, np.dot(P_pred, H.T)) + dS += dS.T + dS += np.dot(H, np.dot(dP_pred, H.T)) + dR # TODO: maybe symmetrize dS - #dm and dP for the next stem - if LL is not None: # the state vector is not a scalar - tmp1 = linalg.cho_solve((LL,islower), H).T - tmp2 = linalg.cho_solve((LL,islower), dH).T - tmp3 = linalg.cho_solve((LL,islower), dS).T - else: # the state vector is a scalar - tmp1 = H.T / S - tmp2 = dH.T / S - tmp3 = dS.T / S + # dm and dP for the next stem + if LL is not None: # the state vector is not a scalar + tmp1 = linalg.cho_solve((LL, islower), H).T + tmp2 = linalg.cho_solve((LL, islower), dH).T + tmp3 = linalg.cho_solve((LL, islower), dS).T + else: # the state vector is a scalar + tmp1 = H.T / S + tmp2 = dH.T / S + tmp3 = dS.T / S - dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \ - np.dot( P_pred, np.dot( tmp1, tmp3 ) ) + dK = ( + np.dot(dP_pred, tmp1) + + np.dot(P_pred, tmp2) + - np.dot(P_pred, np.dot(tmp1, tmp3)) + ) - # terms required for the next step, save this for each parameter - dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv) + # terms required for the next step, save this for each parameter + dm_upd[:, :, param] = dm_pred + np.dot(dK, v) + np.dot(K, dv) - dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T)) - dP_upd[:,:,param] += dP_upd[:,:,param].T - dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T)) + dP_upd[:, :, param] = -np.dot(dK, np.dot(S, K.T)) + dP_upd[:, :, param] += dP_upd[:, :, param].T + dP_upd[:, :, param] += dP_pred - np.dot(K, np.dot(dS, K.T)) - dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize - # computing the likelihood change for each parameter: - if LL is not None: # the state vector is not 1D - tmp5 = linalg.cho_solve((LL,islower), v) - else: # the state vector is a scalar - tmp5 = v / S + dP_upd[:, :, param] = 0.5 * ( + dP_upd[:, :, param] + dP_upd[:, :, param].T + ) # symmetrize + # computing the likelihood change for each parameter: + if LL is not None: # the state vector is not 1D + tmp5 = linalg.cho_solve((LL, islower), v) + else: # the state vector is a scalar + tmp5 = v / S - - d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \ - np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) ) + d_log_likelihood_update[param, :] = -( + 0.5 * np.sum(np.diag(tmp3)) + + np.sum(tmp5 * dv, axis=0) + - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) + ) # Before - #d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \ - #np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) ) + # d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \ + # np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) ) # Compute the actual updates for mean of the states. Variance update # is computed earlier. - m_upd = m_pred + K.dot( v ) + m_upd = m_pred + K.dot(v) - return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update + return ( + m_upd, + P_upd, + log_likelihood_update, + dm_upd, + dP_upd, + d_log_likelihood_update, + ) @staticmethod - def _rts_smoother_update_step(k, p_m , p_P, p_m_pred, p_P_pred, p_m_prev_step, - p_P_prev_step, p_dynamic_callables): + def _rts_smoother_update_step( + k, + p_m, + p_P, + p_m_pred, + p_P_pred, + p_m_prev_step, + p_P_prev_step, + p_dynamic_callables, + ): """ Rauch–Tung–Striebel(RTS) update step @@ -1867,31 +2184,30 @@ class DescreteStateSpace(object): """ - A = p_dynamic_callables.Ak(k,p_m,p_P) # state transition matrix (or Jacobian) + A = p_dynamic_callables.Ak(k, p_m, p_P) # state transition matrix (or Jacobian) - tmp = np.dot( A, p_P.T) - if A.shape[0] == 1: # 1D states - G = tmp.T / p_P_pred # P[:,:,k] is symmetric + tmp = np.dot(A, p_P.T) + if A.shape[0] == 1: # 1D states + G = tmp.T / p_P_pred # P[:,:,k] is symmetric else: try: - LL,islower = linalg.cho_factor(p_P_pred) - G = linalg.cho_solve((LL,islower),tmp).T + LL, islower = linalg.cho_factor(p_P_pred) + G = linalg.cho_solve((LL, islower), tmp).T except: # It happende that p_P_pred has several near zero eigenvalues # hence the Cholesky method does not work. res = sp.linalg.lstsq(p_P_pred, tmp) G = res[0].T - m_upd = p_m + G.dot( p_m_prev_step-p_m_pred ) - P_upd = p_P + G.dot( p_P_prev_step-p_P_pred).dot(G.T) + m_upd = p_m + G.dot(p_m_prev_step - p_m_pred) + P_upd = p_P + G.dot(p_P_prev_step - p_P_pred).dot(G.T) - P_upd = 0.5*(P_upd + P_upd.T) + P_upd = 0.5 * (P_upd + P_upd.T) return m_upd, P_upd, G @classmethod - def rts_smoother(cls,state_dim, p_dynamic_callables, filter_means, - filter_covars): + def rts_smoother(cls, state_dim, p_dynamic_callables, filter_means, filter_covars): """ This function implements Rauch–Tung–Striebel(RTS) smoother algorithm based on the results of kalman_filter_raw. @@ -1934,41 +2250,69 @@ class DescreteStateSpace(object): Smoothed estimates of the state covariances """ - no_steps = filter_covars.shape[0]-1# number of steps (minus initial covariance) + no_steps = ( + filter_covars.shape[0] - 1 + ) # number of steps (minus initial covariance) - M = np.empty(filter_means.shape) # smoothed means - P = np.empty(filter_covars.shape) # smoothed covars - #G = np.empty( (no_steps,state_dim,state_dim) ) # G from the update step of the smoother + M = np.empty(filter_means.shape) # smoothed means + P = np.empty(filter_covars.shape) # smoothed covars + # G = np.empty( (no_steps,state_dim,state_dim) ) # G from the update step of the smoother - M[-1,:] = filter_means[-1,:] - P[-1,:,:] = filter_covars[-1,:,:] - for k in range(no_steps-1,-1,-1): + M[-1, :] = filter_means[-1, :] + P[-1, :, :] = filter_covars[-1, :, :] + for k in range(no_steps - 1, -1, -1): + m_pred, P_pred, tmp1, tmp2 = cls._kalman_prediction_step( + k, + filter_means[k, :], + filter_covars[k, :, :], + p_dynamic_callables, + calc_grad_log_likelihood=False, + ) + p_m = filter_means[k, :] + if len(p_m.shape) < 2: + p_m.shape = (p_m.shape[0], 1) - m_pred, P_pred, tmp1, tmp2 = \ - cls._kalman_prediction_step(k, filter_means[k,:], - filter_covars[k,:,:], p_dynamic_callables, - calc_grad_log_likelihood=False) - p_m = filter_means[k,:] - if len(p_m.shape)<2: - p_m.shape = (p_m.shape[0],1) + p_m_prev_step = M[k + 1, :] + if len(p_m_prev_step.shape) < 2: + p_m_prev_step.shape = (p_m_prev_step.shape[0], 1) - p_m_prev_step = M[k+1,:] - if len(p_m_prev_step.shape)<2: - p_m_prev_step.shape = (p_m_prev_step.shape[0],1) + m_upd, P_upd, G_tmp = cls._rts_smoother_update_step( + k, + p_m, + filter_covars[k, :, :], + m_pred, + P_pred, + p_m_prev_step, + P[k + 1, :, :], + p_dynamic_callables, + ) - m_upd, P_upd, G_tmp = cls._rts_smoother_update_step(k, - p_m ,filter_covars[k,:,:], - m_pred, P_pred, p_m_prev_step ,P[k+1,:,:], p_dynamic_callables) - - M[k,:] = m_upd#np.squeeze(m_upd) - P[k,:,:] = P_upd - #G[k,:,:] = G_upd.T # store transposed G. + M[k, :] = m_upd # np.squeeze(m_upd) + P[k, :, :] = P_upd + # G[k,:,:] = G_upd.T # store transposed G. # Return values - return (M, P) #, G) + return (M, P) # , G) @staticmethod - def _EM_gradient(A,Q,H,R,m_init,P_init,measurements, M, P, G, dA, dQ, dH, dR, dm_init, dP_init): + def _EM_gradient( + A, + Q, + H, + R, + m_init, + P_init, + measurements, + M, + P, + G, + dA, + dQ, + dH, + dR, + dm_init, + dP_init, + ): """ Gradient computation with the EM algorithm. @@ -1979,35 +2323,37 @@ class DescreteStateSpace(object): P: Variances from the smoother G: Gains? from the smoother """ - import pdb; pdb.set_trace(); + import pdb + + pdb.set_trace() param_number = dA.shape[-1] - d_log_likelihood_update = np.empty((param_number,1)) + d_log_likelihood_update = np.empty((param_number, 1)) sample_no = measurements.shape[0] - P_1 = P[1:,:,:] # remove 0-th step - P_2 = P[0:-1,:,:] # remove 0-th step + P_1 = P[1:, :, :] # remove 0-th step + P_2 = P[0:-1, :, :] # remove 0-th step - M_1 = M[1:,:] # remove 0-th step - M_2 = M[0:-1,:] # remove the last step + M_1 = M[1:, :] # remove 0-th step + M_2 = M[0:-1, :] # remove the last step - Sigma = np.mean(P_1,axis=0) + np.dot(M_1.T, M_1) / sample_no # - Phi = np.mean(P_2,axis=0) + np.dot(M_2.T, M_2) / sample_no # + Sigma = np.mean(P_1, axis=0) + np.dot(M_1.T, M_1) / sample_no # + Phi = np.mean(P_2, axis=0) + np.dot(M_2.T, M_2) / sample_no # - B = np.dot( measurements.T, M_1 )/ sample_no - C = (sp.einsum( 'ijk,ikl', P_1, G) + np.dot(M_1.T, M_2)) / sample_no # + B = np.dot(measurements.T, M_1) / sample_no + C = (sp.einsum("ijk,ikl", P_1, G) + np.dot(M_1.T, M_2)) / sample_no # -# C1 = np.zeros( (P_1.shape[1],P_1.shape[1]) ) -# for k in range(P_1.shape[0]): -# C1 += np.dot(P_1[k,:,:],G[k,:,:]) + sp.outer( M_1[k,:], M_2[k,:] ) -# C1 = C1 / sample_no + # C1 = np.zeros( (P_1.shape[1],P_1.shape[1]) ) + # for k in range(P_1.shape[0]): + # C1 += np.dot(P_1[k,:,:],G[k,:,:]) + sp.outer( M_1[k,:], M_2[k,:] ) + # C1 = C1 / sample_no - D = np.dot( measurements.T, measurements ) / sample_no + D = np.dot(measurements.T, measurements) / sample_no try: P_init_inv = sp.linalg.inv(P_init) - if np.max( np.abs(P_init_inv)) > 10e13: + if np.max(np.abs(P_init_inv)) > 10e13: compute_P_init_terms = False else: compute_P_init_terms = True @@ -2017,7 +2363,7 @@ class DescreteStateSpace(object): try: Q_inv = sp.linalg.inv(Q) - if np.max( np.abs(Q_inv)) > 10e13: + if np.max(np.abs(Q_inv)) > 10e13: compute_Q_terms = False else: compute_Q_terms = True @@ -2027,54 +2373,84 @@ class DescreteStateSpace(object): try: R_inv = sp.linalg.inv(R) - if np.max( np.abs(R_inv)) > 10e13: + if np.max(np.abs(R_inv)) > 10e13: compute_R_terms = False else: compute_R_terms = True except np.linalg.LinAlgError: compute_R_terms = False - - d_log_likelihood_update = np.zeros((param_number,1)) + d_log_likelihood_update = np.zeros((param_number, 1)) for j in range(param_number): if compute_P_init_terms: - d_log_likelihood_update[j,:] -= 0.5 * np.sum(P_init_inv* dP_init[:,:,j].T ) #p #m + d_log_likelihood_update[j, :] -= 0.5 * np.sum( + P_init_inv * dP_init[:, :, j].T + ) # p #m - M0_smoothed = M[0]; M0_smoothed.shape = (M0_smoothed.shape[0],1) - tmp1 = np.dot( dP_init[:,:,j], np.dot( P_init_inv, (P[0,:,:] + sp.outer( (M0_smoothed - m_init), (M0_smoothed - m_init) )) ) ) #p #m - d_log_likelihood_update[j,:] += 0.5 * np.sum(P_init_inv* tmp1.T ) + M0_smoothed = M[0] + M0_smoothed.shape = (M0_smoothed.shape[0], 1) + tmp1 = np.dot( + dP_init[:, :, j], + np.dot( + P_init_inv, + ( + P[0, :, :] + + sp.outer((M0_smoothed - m_init), (M0_smoothed - m_init)) + ), + ), + ) # p #m + d_log_likelihood_update[j, :] += 0.5 * np.sum(P_init_inv * tmp1.T) - tmp2 = sp.outer( dm_init[:,j], M0_smoothed ) + tmp2 = sp.outer(dm_init[:, j], M0_smoothed) tmp2 += tmp2.T - d_log_likelihood_update[j,:] += 0.5 * np.sum(P_init_inv* tmp2.T ) + d_log_likelihood_update[j, :] += 0.5 * np.sum(P_init_inv * tmp2.T) if compute_Q_terms: + d_log_likelihood_update[j, :] -= ( + sample_no / 2.0 * np.sum(Q_inv * dQ[:, :, j].T) + ) # m - d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(Q_inv* dQ[:,:,j].T ) #m + tmp1 = np.dot(C, A.T) + tmp1 += tmp1.T + tmp1 = Sigma - tmp1 + np.dot(A, np.dot(Phi, A.T)) # m + tmp1 = np.dot(dQ[:, :, j], np.dot(Q_inv, tmp1)) + d_log_likelihood_update[j, :] += ( + sample_no / 2.0 * np.sum(Q_inv * tmp1.T) + ) - tmp1 = np.dot(C,A.T); tmp1 += tmp1.T; tmp1 = Sigma - tmp1 + np.dot(A, np.dot(Phi,A.T)) #m - tmp1 = np.dot( dQ[:,:,j], np.dot( Q_inv, tmp1) ) - d_log_likelihood_update[j,:] += sample_no/2.0 * np.sum(Q_inv * tmp1.T) - - tmp2 = np.dot( dA[:,:,j], C.T); tmp2 += tmp2.T; - tmp3 = np.dot(dA[:,:,j], np.dot(Phi,A.T)); tmp3 += tmp3.T - d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(Q_inv.T * (tmp3 - tmp2) ) + tmp2 = np.dot(dA[:, :, j], C.T) + tmp2 += tmp2.T + tmp3 = np.dot(dA[:, :, j], np.dot(Phi, A.T)) + tmp3 += tmp3.T + d_log_likelihood_update[j, :] -= ( + sample_no / 2.0 * np.sum(Q_inv.T * (tmp3 - tmp2)) + ) if compute_R_terms: - d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(R_inv* dR[:,:,j].T ) + d_log_likelihood_update[j, :] -= ( + sample_no / 2.0 * np.sum(R_inv * dR[:, :, j].T) + ) - tmp1 = np.dot(B,H.T); tmp1 += tmp1.T; tmp1 = D - tmp1 + np.dot(H, np.dot(Sigma,H.T)) - tmp1 = np.dot( dR[:,:,j], np.dot( R_inv, tmp1) ) - d_log_likelihood_update[j,:] += sample_no/2.0 * np.sum(R_inv * tmp1.T) + tmp1 = np.dot(B, H.T) + tmp1 += tmp1.T + tmp1 = D - tmp1 + np.dot(H, np.dot(Sigma, H.T)) + tmp1 = np.dot(dR[:, :, j], np.dot(R_inv, tmp1)) + d_log_likelihood_update[j, :] += ( + sample_no / 2.0 * np.sum(R_inv * tmp1.T) + ) - tmp2 = np.dot( dH[:,:,j], B.T); tmp2 += tmp2.T; - tmp3 = np.dot(dH[:,:,j], np.dot(Sigma,H.T)); tmp3 += tmp3.T - d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(R_inv.T * (tmp3 - tmp2) ) + tmp2 = np.dot(dH[:, :, j], B.T) + tmp2 += tmp2.T + tmp3 = np.dot(dH[:, :, j], np.dot(Sigma, H.T)) + tmp3 += tmp3.T + d_log_likelihood_update[j, :] -= ( + sample_no / 2.0 * np.sum(R_inv.T * (tmp3 - tmp2)) + ) return d_log_likelihood_update @staticmethod - def _check_SS_matrix(p_M, state_dim, measurement_dim, which='A'): + def _check_SS_matrix(p_M, state_dim, measurement_dim, which="A"): """ Veryfy that on exit the matrix has appropriate shape for the KF algorithm. @@ -2096,30 +2472,42 @@ class DescreteStateSpace(object): """ old_M_shape = None - if len(p_M.shape) < 3: # new shape is 3 dimensional - old_M_shape = p_M.shape # save shape to restore it on exit - if len(p_M.shape) == 2: # matrix - p_M.shape = (p_M.shape[0],p_M.shape[1],1) - elif len(p_M.shape) == 1: # scalar but in array already - if (p_M.shape[0] != 1): - raise ValueError("Matrix %s is an 1D array, while it must be a matrix or scalar", which) + if len(p_M.shape) < 3: # new shape is 3 dimensional + old_M_shape = p_M.shape # save shape to restore it on exit + if len(p_M.shape) == 2: # matrix + p_M.shape = (p_M.shape[0], p_M.shape[1], 1) + elif len(p_M.shape) == 1: # scalar but in array already + if p_M.shape[0] != 1: + raise ValueError( + "Matrix %s is an 1D array, while it must be a matrix or scalar", + which, + ) else: - p_M.shape = (1,1,1) + p_M.shape = (1, 1, 1) - if (which == 'A') or (which == 'Q'): + if (which == "A") or (which == "Q"): if (p_M.shape[0] != state_dim) or (p_M.shape[1] != state_dim): - raise ValueError("%s must be a square matrix of size (%i,%i)" % (which, state_dim, state_dim)) - if (which == 'H'): + raise ValueError( + "%s must be a square matrix of size (%i,%i)" + % (which, state_dim, state_dim) + ) + if which == "H": if (p_M.shape[0] != measurement_dim) or (p_M.shape[1] != state_dim): - raise ValueError("H must be of shape (measurement_dim, state_dim) (%i,%i)" % (measurement_dim, state_dim)) - if (which == 'R'): + raise ValueError( + "H must be of shape (measurement_dim, state_dim) (%i,%i)" + % (measurement_dim, state_dim) + ) + if which == "R": if (p_M.shape[0] != measurement_dim) or (p_M.shape[1] != measurement_dim): - raise ValueError("R must be of shape (measurement_dim, measurement_dim) (%i,%i)" % (measurement_dim, measurement_dim)) + raise ValueError( + "R must be of shape (measurement_dim, measurement_dim) (%i,%i)" + % (measurement_dim, measurement_dim) + ) - return (p_M,old_M_shape) + return (p_M, old_M_shape) @staticmethod - def _check_grad_state_matrices(dM, state_dim, grad_params_no, which = 'dA'): + def _check_grad_state_matrices(dM, state_dim, grad_params_no, which="dA"): """ Function checks (mostly check dimensions) matrices for marginal likelihood gradient parameters calculation. It check dA, dQ matrices. @@ -2147,32 +2535,34 @@ class DescreteStateSpace(object): """ - if dM is None: - dM=np.zeros((state_dim,state_dim,grad_params_no)) + dM = np.zeros((state_dim, state_dim, grad_params_no)) elif isinstance(dM, np.ndarray): if state_dim == 1: if len(dM.shape) < 3: - dM.shape = (1,1,1) + dM.shape = (1, 1, 1) else: if len(dM.shape) < 3: - dM.shape = (state_dim,state_dim,1) - elif isinstance(dM, np.int): + dM.shape = (state_dim, state_dim, 1) + elif isinstance(dM, int): if state_dim > 1: - raise ValueError("When computing likelihood gradient wrong %s dimension." % which) + raise ValueError( + "When computing likelihood gradient wrong %s dimension." % which + ) else: - dM = np.ones((1,1,1)) * dM + dM = np.ones((1, 1, 1)) * dM -# if not isinstance(dM, types.FunctionType): -# f_dM = lambda k: dM -# else: -# f_dM = dM + # if not isinstance(dM, types.FunctionType): + # f_dM = lambda k: dM + # else: + # f_dM = dM return dM - @staticmethod - def _check_grad_measurement_matrices(dM, state_dim, grad_params_no, measurement_dim, which = 'dH'): + def _check_grad_measurement_matrices( + dM, state_dim, grad_params_no, measurement_dim, which="dH" + ): """ Function checks (mostly check dimensions) matrices for marginal likelihood gradient parameters calculation. It check dH, dR matrices. @@ -2206,38 +2596,40 @@ class DescreteStateSpace(object): """ if dM is None: - if which == 'dH': - dM=np.zeros((measurement_dim ,state_dim,grad_params_no)) - elif which == 'dR': - dM=np.zeros((measurement_dim,measurement_dim,grad_params_no)) + if which == "dH": + dM = np.zeros((measurement_dim, state_dim, grad_params_no)) + elif which == "dR": + dM = np.zeros((measurement_dim, measurement_dim, grad_params_no)) elif isinstance(dM, np.ndarray): if state_dim == 1: if len(dM.shape) < 3: - dM.shape = (1,1,1) + dM.shape = (1, 1, 1) else: if len(dM.shape) < 3: - if which == 'dH': - dM.shape = (measurement_dim,state_dim,1) - elif which == 'dR': - dM.shape = (measurement_dim,measurement_dim,1) - elif isinstance(dM, np.int): + if which == "dH": + dM.shape = (measurement_dim, state_dim, 1) + elif which == "dR": + dM.shape = (measurement_dim, measurement_dim, 1) + elif isinstance(dM, int): if state_dim > 1: - raise ValueError("When computing likelihood gradient wrong dH dimension.") + raise ValueError( + "When computing likelihood gradient wrong dH dimension." + ) else: - dM = np.ones((1,1,1)) * dM + dM = np.ones((1, 1, 1)) * dM -# if not isinstance(dM, types.FunctionType): -# f_dM = lambda k: dM -# else: -# f_dM = dM + # if not isinstance(dM, types.FunctionType): + # f_dM = lambda k: dM + # else: + # f_dM = dM return dM - class Struct(object): pass + class ContDescrStateSpace(DescreteStateSpace): """ Class for continuous-discrete Kalman filter. State equation is @@ -2261,7 +2653,19 @@ class ContDescrStateSpace(DescreteStateSpace): would take too much memory. """ - def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): + def __init__( + self, + F, + L, + Qc, + dt, + compute_derivatives=False, + grad_params_no=None, + P_inf=None, + dP_inf=None, + dF=None, + dQc=None, + ): """ Constructor. All necessary parameters are passed here and stored in the opject. @@ -2288,7 +2692,7 @@ class ContDescrStateSpace(DescreteStateSpace): self.L = L.copy() self.Qc = Qc.copy() - self.dt = dt # copy is not taken because dt is internal parameter + self.dt = dt # copy is not taken because dt is internal parameter # Parameters are used to calculate derivatives but derivatives # are not used in the smoother. Therefore copies are not taken. @@ -2298,8 +2702,7 @@ class ContDescrStateSpace(DescreteStateSpace): self.dQc = dQc self.compute_derivatives = compute_derivatives - self.grad_params_no = grad_params_no - + self.grad_params_no = grad_params_no self.last_k = 0 self.last_k_computed = False @@ -2313,14 +2716,14 @@ class ContDescrStateSpace(DescreteStateSpace): self.Q_svd_computed = False # !!!Print statistics! Which object is created - def f_a(self, k,m,A): + def f_a(self, k, m, A): """ Dynamic model """ - return np.dot(A, m) # default dynamic model + return np.dot(A, m) # default dynamic model - def _recompute_for_new_k(self,k): + def _recompute_for_new_k(self, k): """ Computes the necessary matrices for an index k and store the results. @@ -2335,9 +2738,18 @@ class ContDescrStateSpace(DescreteStateSpace): A, Q, dA dQ on step k """ if (self.last_k != k) or (self.last_k_computed == False): - v_Ak,v_Qk, tmp, v_dAk, v_dQk = ContDescrStateSpace.lti_sde_to_descrete(self.F, - self.L,self.Qc,self.dt[k],self.compute_derivatives, - grad_params_no=self.grad_params_no, P_inf=self.P_inf, dP_inf=self.dP_inf, dF=self.dF, dQc=self.dQc) + v_Ak, v_Qk, tmp, v_dAk, v_dQk = ContDescrStateSpace.lti_sde_to_descrete( + self.F, + self.L, + self.Qc, + self.dt[k], + self.compute_derivatives, + grad_params_no=self.grad_params_no, + P_inf=self.P_inf, + dP_inf=self.dP_inf, + dF=self.dF, + dQc=self.dQc, + ) self.last_k = k self.last_k_computed = True @@ -2345,7 +2757,7 @@ class ContDescrStateSpace(DescreteStateSpace): self.v_Qk = v_Qk self.v_dAk = v_dAk self.v_dQk = v_dQk - + self.Q_square_root_computed = False self.Q_inverse_computed = False self.Q_svd_computed = False @@ -2357,7 +2769,7 @@ class ContDescrStateSpace(DescreteStateSpace): # !!!Print statistics! Print sizes of matrices - return v_Ak,v_Qk, v_dAk, v_dQk + return v_Ak, v_Qk, v_dAk, v_dQk def reset(self, compute_derivatives): """ @@ -2370,44 +2782,50 @@ class ContDescrStateSpace(DescreteStateSpace): self.last_k = 0 self.last_k_computed = False self.compute_derivatives = compute_derivatives - + self.Q_square_root_computed = False self.Q_inverse_computed = False self.Q_svd_computed = False self.Q_eigen_computed = False return self - def Ak(self,k,m,P): - v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) + def Ak(self, k, m, P): + v_Ak, v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) return v_Ak - def Qk(self,k): - v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) + def Qk(self, k): + v_Ak, v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) return v_Qk def dAk(self, k): - v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) + v_Ak, v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) return v_dAk def dQk(self, k): - v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) + v_Ak, v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) return v_dQk - def Q_srk(self,k): + def Q_srk(self, k): """ Check square root, maybe rewriting for Spectral decomposition is needed. Square root of the noise matrix Q """ - if ((self.last_k == k) and (self.last_k_computed == True)): + if (self.last_k == k) and (self.last_k_computed == True): if not self.Q_square_root_computed: if not self.Q_svd_computed: - (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False) + (U, S, Vh) = sp.linalg.svd( + self.v_Qk, + full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=False, + ) self.Q_svd = (U, S, Vh) self.Q_svd_computed = True else: (U, S, Vh) = self.Q_svd - + square_root = U * np.sqrt(S) self.square_root_computed = True self.Q_square_root = square_root @@ -2417,56 +2835,70 @@ class ContDescrStateSpace(DescreteStateSpace): raise ValueError("Square root of Q can not be computed") return square_root - - def Q_inverse(self, k, p_largest_cond_num, p_regularization_type): + + def Q_inverse(self, k, p_largest_cond_num, p_regularization_type): """ Function inverts Q matrix and regularizes the inverse. Regularization is useful when original matrix is badly conditioned. Function is currently used only in SparseGP code. - + Inputs: ------------------------------ k: int Iteration number. - + p_largest_cond_num: float Largest condition value for the inverted matrix. If cond. number is smaller than that no regularization happen. - + regularization_type: 1 or 2 Regularization type. - + regularization_type: int (1 or 2) - + type 1: 1/(S[k] + regularizer) regularizer is computed type 2: S[k]/(S^2[k] + regularizer) regularizer is computed """ - - #import pdb; pdb.set_trace() - - if ((self.last_k == k) and (self.last_k_computed == True)): + + # import pdb; pdb.set_trace() + + if (self.last_k == k) and (self.last_k_computed == True): if not self.Q_inverse_computed: if not self.Q_svd_computed: - (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False) + (U, S, Vh) = sp.linalg.svd( + self.v_Qk, + full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=False, + ) self.Q_svd = (U, S, Vh) self.Q_svd_computed = True else: (U, S, Vh) = self.Q_svd - Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.v_Qk + self.v_Qk.T), U,S, p_largest_cond_num, p_regularization_type) - + Q_inverse_r = psd_matrix_inverse( + k, + 0.5 * (self.v_Qk + self.v_Qk.T), + U, + S, + p_largest_cond_num, + p_regularization_type, + ) + self.Q_inverse_computed = True self.Q_inverse_r = Q_inverse_r - + else: Q_inverse_r = self.Q_inverse_r else: - raise ValueError("""Inverse of Q can not be computed, because Q has not been computed. - This requires some programming""") + raise ValueError( + """Inverse of Q can not be computed, because Q has not been computed. + This requires some programming""" + ) return Q_inverse_r - - + def return_last(self): """ Function returns last computed matrices. @@ -2497,7 +2929,20 @@ class ContDescrStateSpace(DescreteStateSpace): Since all the matrices are computed all together, this object can be used in smoother without repeating the computations. """ - def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): + + def __init__( + self, + F, + L, + Qc, + dt, + compute_derivatives=False, + grad_params_no=None, + P_inf=None, + dP_inf=None, + dF=None, + dQc=None, + ): """ Constructor. All necessary parameters are passed here and stored in the opject. @@ -2518,33 +2963,55 @@ class ContDescrStateSpace(DescreteStateSpace): ------------------- Nothing """ - As, Qs, reconstruct_indices, dAs, dQs = ContDescrStateSpace.lti_sde_to_descrete(F, - L,Qc,dt,compute_derivatives, - grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) + ( + As, + Qs, + reconstruct_indices, + dAs, + dQs, + ) = ContDescrStateSpace.lti_sde_to_descrete( + F, + L, + Qc, + dt, + compute_derivatives, + grad_params_no=grad_params_no, + P_inf=P_inf, + dP_inf=dP_inf, + dF=dF, + dQc=dQc, + ) self.As = As self.Qs = Qs self.dAs = dAs self.dQs = dQs self.reconstruct_indices = reconstruct_indices - self.total_size_of_data = self.As.nbytes + self.Qs.nbytes +\ - (self.dAs.nbytes if (self.dAs is not None) else 0) +\ - (self.dQs.nbytes if (self.dQs is not None) else 0) +\ - (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0) + self.total_size_of_data = ( + self.As.nbytes + + self.Qs.nbytes + + (self.dAs.nbytes if (self.dAs is not None) else 0) + + (self.dQs.nbytes if (self.dQs is not None) else 0) + + ( + self.reconstruct_indices.nbytes + if (self.reconstruct_indices is not None) + else 0 + ) + ) self.Q_svd_dict = {} self.Q_square_root_dict = {} self.Q_inverse_dict = {} - + self.last_k = None - # !!!Print statistics! Which object is created + # !!!Print statistics! Which object is created # !!!Print statistics! Print sizes of matrices - def f_a(self, k,m,A): + def f_a(self, k, m, A): """ Dynamic model """ - return np.dot(A, m) # default dynamic model + return np.dot(A, m) # default dynamic model def reset(self, compute_derivatives=False): """ @@ -2554,24 +3021,23 @@ class ContDescrStateSpace(DescreteStateSpace): """ return self - def Ak(self,k,m,P): + def Ak(self, k, m, P): self.last_k = k - return self.As[:,:, self.reconstruct_indices[k]] + return self.As[:, :, self.reconstruct_indices[k]] - def Qk(self,k): + def Qk(self, k): self.last_k = k - return self.Qs[:,:, self.reconstruct_indices[k]] + return self.Qs[:, :, self.reconstruct_indices[k]] - def dAk(self,k): + def dAk(self, k): self.last_k = k - return self.dAs[:,:, :, self.reconstruct_indices[k]] + return self.dAs[:, :, :, self.reconstruct_indices[k]] - def dQk(self,k): + def dQk(self, k): self.last_k = k - return self.dQs[:,:, :, self.reconstruct_indices[k]] + return self.dQs[:, :, :, self.reconstruct_indices[k]] - - def Q_srk(self,k): + def Q_srk(self, k): """ Square root of the noise matrix Q """ @@ -2582,83 +3048,109 @@ class ContDescrStateSpace(DescreteStateSpace): if matrix_index in self.Q_svd_dict: (U, S, Vh) = self.Q_svd_dict[matrix_index] else: - (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index], - full_matrices=False, compute_uv=True, - overwrite_a=False, check_finite=False) - self.Q_svd_dict[matrix_index] = (U,S,Vh) - + (U, S, Vh) = sp.linalg.svd( + self.Qs[:, :, matrix_index], + full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=False, + ) + self.Q_svd_dict[matrix_index] = (U, S, Vh) + square_root = U * np.sqrt(S) self.Q_square_root_dict[matrix_index] = square_root return square_root - + def Q_inverse(self, k, p_largest_cond_num, p_regularization_type): """ Function inverts Q matrix and regularizes the inverse. Regularization is useful when original matrix is badly conditioned. Function is currently used only in SparseGP code. - + Inputs: ------------------------------ k: int Iteration number. - + p_largest_cond_num: float Largest condition value for the inverted matrix. If cond. number is smaller than that no regularization happen. - + regularization_type: 1 or 2 Regularization type. - + regularization_type: int (1 or 2) - + type 1: 1/(S[k] + regularizer) regularizer is computed type 2: S[k]/(S^2[k] + regularizer) regularizer is computed """ - #import pdb; pdb.set_trace() - + # import pdb; pdb.set_trace() + matrix_index = self.reconstruct_indices[k] if matrix_index in self.Q_inverse_dict: Q_inverse_r = self.Q_inverse_dict[matrix_index] else: - if matrix_index in self.Q_svd_dict: (U, S, Vh) = self.Q_svd_dict[matrix_index] else: - (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index], - full_matrices=False, compute_uv=True, - overwrite_a=False, check_finite=False) - self.Q_svd_dict[matrix_index] = (U,S,Vh) - - Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.Qs[:,:, matrix_index] + self.Qs[:,:, matrix_index].T), U,S, p_largest_cond_num, p_regularization_type) + (U, S, Vh) = sp.linalg.svd( + self.Qs[:, :, matrix_index], + full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=False, + ) + self.Q_svd_dict[matrix_index] = (U, S, Vh) + + Q_inverse_r = psd_matrix_inverse( + k, + 0.5 * (self.Qs[:, :, matrix_index] + self.Qs[:, :, matrix_index].T), + U, + S, + p_largest_cond_num, + p_regularization_type, + ) self.Q_inverse_dict[matrix_index] = Q_inverse_r return Q_inverse_r - - + def return_last(self): """ Function returns last available matrices. """ - if (self.last_k is None): + if self.last_k is None: raise ValueError("Matrices are not computed.") else: ind = self.reconstruct_indices[self.last_k] - A = self.As[:,:, ind] - Q = self.Qs[:,:, ind] - dA = self.dAs[:,:, :, ind] - dQ = self.dQs[:,:, :, ind] + A = self.As[:, :, ind] + Q = self.Qs[:, :, ind] + dA = self.dAs[:, :, :, ind] + dQ = self.dQs[:, :, :, ind] return self.last_k, A, Q, dA, dQ @classmethod - def cont_discr_kalman_filter(cls, F, L, Qc, p_H, p_R, P_inf, X, Y, index = None, - m_init=None, P_init=None, - p_kalman_filter_type='regular', - calc_log_likelihood=False, - calc_grad_log_likelihood=False, - grad_params_no=0, grad_calc_params=None): + def cont_discr_kalman_filter( + cls, + F, + L, + Qc, + p_H, + p_R, + P_inf, + X, + Y, + index=None, + m_init=None, + P_init=None, + p_kalman_filter_type="regular", + calc_log_likelihood=False, + calc_grad_log_likelihood=False, + grad_params_no=0, + grad_calc_params=None, + ): """ This function implements the continuous-discrete Kalman Filter algorithm These notations for the State-Space model are assumed: @@ -2800,18 +3292,21 @@ class ContDescrStateSpace(DescreteStateSpace): p_H = np.atleast_1d(p_H) p_R = np.atleast_1d(p_R) - X.shape, old_X_shape = cls._reshape_input_data(X.shape, 2) # represent as column - if (X.shape[1] != 1): + X.shape, old_X_shape = cls._reshape_input_data( + X.shape, 2 + ) # represent as column + if X.shape[1] != 1: raise ValueError("Only one dimensional X data is supported.") - Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape) # represent as column + Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape) # represent as column state_dim = F.shape[0] measurement_dim = Y.shape[1] - time_series_no = Y.shape[2] # multiple time series mode + time_series_no = Y.shape[2] # multiple time series mode - if ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or\ - ((len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1)): + if ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or ( + (len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1) + ): model_matrices_chage_with_time = True else: model_matrices_chage_with_time = False @@ -2820,26 +3315,36 @@ class ContDescrStateSpace(DescreteStateSpace): old_index_shape = None if index is None: if (len(p_H.shape) == 3) or (len(p_R.shape) == 3): - raise ValueError("Parameter index can not be None for time varying matrices (third dimension is present)") - else: # matrices do not change in time, so form dummy zero indices. - index = np.zeros((1,Y.shape[0])) + raise ValueError( + "Parameter index can not be None for time varying matrices (third dimension is present)" + ) + else: # matrices do not change in time, so form dummy zero indices. + index = np.zeros((1, Y.shape[0])) else: if len(index.shape) == 1: - index.shape = (1,index.shape[0]) + index.shape = (1, index.shape[0]) old_index_shape = (index.shape[0],) - if (index.shape[1] != Y.shape[0]): - raise ValueError("Number of measurements must be equal the number of H_{k}, R_{k}") + if index.shape[1] != Y.shape[0]: + raise ValueError( + "Number of measurements must be equal the number of H_{k}, R_{k}" + ) - if (index.shape[0] == 1): - H_time_var_index = 0; R_time_var_index = 0 - elif (index.shape[0] == 4): - H_time_var_index = 0; R_time_var_index = 1 + if index.shape[0] == 1: + H_time_var_index = 0 + R_time_var_index = 0 + elif index.shape[0] == 4: + H_time_var_index = 0 + R_time_var_index = 1 else: raise ValueError("First Dimension of index must be either 1 or 2.") - (p_H, old_H_shape) = cls._check_SS_matrix(p_H, state_dim, measurement_dim, which='H') - (p_R, old_R_shape) = cls._check_SS_matrix(p_R, state_dim, measurement_dim, which='R') + (p_H, old_H_shape) = cls._check_SS_matrix( + p_H, state_dim, measurement_dim, which="H" + ) + (p_R, old_R_shape) = cls._check_SS_matrix( + p_R, state_dim, measurement_dim, which="R" + ) if m_init is None: m_init = np.zeros((state_dim, time_series_no)) @@ -2849,7 +3354,7 @@ class ContDescrStateSpace(DescreteStateSpace): if P_init is None: P_init = P_inf.copy() - if p_kalman_filter_type not in ('regular', 'svd'): + if p_kalman_filter_type not in ("regular", "svd"): raise ValueError("Kalman filer type neither 'regular nor 'svd'.") # Functions to pass to the kalman_filter algorithm: @@ -2858,26 +3363,49 @@ class ContDescrStateSpace(DescreteStateSpace): # m - vector for calculating matrices. Required for EKF. Not used here. # f_hl = lambda k,m,H: np.dot(H, m) # f_H = lambda k,m,P: p_H[:,:, index[H_time_var_index, k]] - #f_R = lambda k: p_R[:,:, index[R_time_var_index, k]] - #o_R = R_handling( p_R, index, R_time_var_index, 20) + # f_R = lambda k: p_R[:,:, index[R_time_var_index, k]] + # o_R = R_handling( p_R, index, R_time_var_index, 20) if calc_grad_log_likelihood: + dF = cls._check_grad_state_matrices( + grad_calc_params.get("dF"), state_dim, grad_params_no, which="dA" + ) + dQc = cls._check_grad_state_matrices( + grad_calc_params.get("dQc"), state_dim, grad_params_no, which="dQ" + ) + dP_inf = cls._check_grad_state_matrices( + grad_calc_params.get("dP_inf"), state_dim, grad_params_no, which="dA" + ) - dF = cls._check_grad_state_matrices(grad_calc_params.get('dF'), state_dim, grad_params_no, which = 'dA') - dQc = cls._check_grad_state_matrices(grad_calc_params.get('dQc'), state_dim, grad_params_no, which = 'dQ') - dP_inf = cls._check_grad_state_matrices(grad_calc_params.get('dP_inf'), state_dim, grad_params_no, which = 'dA') + dH = cls._check_grad_measurement_matrices( + grad_calc_params.get("dH"), + state_dim, + grad_params_no, + measurement_dim, + which="dH", + ) + dR = cls._check_grad_measurement_matrices( + grad_calc_params.get("dR"), + state_dim, + grad_params_no, + measurement_dim, + which="dR", + ) - dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH') - dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dR'), state_dim, grad_params_no, measurement_dim, which = 'dR') - - dm_init = grad_calc_params.get('dm_init') # Initial values for the Kalman Filter + dm_init = grad_calc_params.get( + "dm_init" + ) # Initial values for the Kalman Filter if dm_init is None: # multiple time series mode. Keep grad_params always as a last dimension - dm_init = np.zeros( (state_dim, time_series_no, grad_params_no) ) + dm_init = np.zeros((state_dim, time_series_no, grad_params_no)) - dP_init = grad_calc_params.get('dP_init') # Initial values for the Kalman Filter + dP_init = grad_calc_params.get( + "dP_init" + ) # Initial values for the Kalman Filter if dP_init is None: - dP_init = dP_inf(0).copy() # get the dP_init matrix, because now it is a function + dP_init = dP_inf( + 0 + ).copy() # get the dP_init matrix, because now it is a function else: dP_inf = None @@ -2888,23 +3416,48 @@ class ContDescrStateSpace(DescreteStateSpace): dm_init = None dP_init = None - measurement_callables = Std_Measurement_Callables_Class(p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR) - #import pdb; pdb.set_trace() + measurement_callables = Std_Measurement_Callables_Class( + p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR + ) + # import pdb; pdb.set_trace() - dynamic_callables = cls._cont_to_discrete_object(X, F, L, Qc, compute_derivatives=calc_grad_log_likelihood, - grad_params_no=grad_params_no, - P_inf=P_inf, dP_inf=dP_inf, dF = dF, dQc=dQc) + dynamic_callables = cls._cont_to_discrete_object( + X, + F, + L, + Qc, + compute_derivatives=calc_grad_log_likelihood, + grad_params_no=grad_params_no, + P_inf=P_inf, + dP_inf=dP_inf, + dF=dF, + dQc=dQc, + ) if print_verbose: print("General: run Continuos-Discrete Kalman Filter") # Also for dH, dR and probably for all derivatives - (M, P, log_likelihood, grad_log_likelihood, AQcomp) = cls._cont_discr_kalman_filter_raw(state_dim, - dynamic_callables, measurement_callables, - X, Y, m_init=m_init, P_init=P_init, - p_kalman_filter_type=p_kalman_filter_type, - calc_log_likelihood=calc_log_likelihood, - calc_grad_log_likelihood=calc_grad_log_likelihood, grad_params_no=grad_params_no, - dm_init=dm_init, dP_init=dP_init) + ( + M, + P, + log_likelihood, + grad_log_likelihood, + AQcomp, + ) = cls._cont_discr_kalman_filter_raw( + state_dim, + dynamic_callables, + measurement_callables, + X, + Y, + m_init=m_init, + P_init=P_init, + p_kalman_filter_type=p_kalman_filter_type, + calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, + grad_params_no=grad_params_no, + dm_init=dm_init, + dP_init=dP_init, + ) if old_index_shape is not None: index.shape = old_index_shape @@ -2924,12 +3477,22 @@ class ContDescrStateSpace(DescreteStateSpace): return (M, P, log_likelihood, grad_log_likelihood, AQcomp) @classmethod - def _cont_discr_kalman_filter_raw(cls,state_dim, p_dynamic_callables, p_measurement_callables, X, Y, - m_init, P_init, - p_kalman_filter_type='regular', - calc_log_likelihood=False, - calc_grad_log_likelihood=False, grad_params_no=None, - dm_init=None, dP_init=None): + def _cont_discr_kalman_filter_raw( + cls, + state_dim, + p_dynamic_callables, + p_measurement_callables, + X, + Y, + m_init, + P_init, + p_kalman_filter_type="regular", + calc_log_likelihood=False, + calc_grad_log_likelihood=False, + grad_params_no=None, + dm_init=None, + dP_init=None, + ): """ General filtering algorithm for inference in the continuos-discrete state-space model: @@ -3015,89 +3578,134 @@ class ContDescrStateSpace(DescreteStateSpace): """ - #import pdb; pdb.set_trace() - steps_no = Y.shape[0] # number of steps in the Kalman Filter - time_series_no = Y.shape[2] # multiple time series mode + # import pdb; pdb.set_trace() + steps_no = Y.shape[0] # number of steps in the Kalman Filter + time_series_no = Y.shape[2] # multiple time series mode # Allocate space for results # Mean estimations. Initial values will be included - M = np.empty(((steps_no+1),state_dim,time_series_no)) - M[0,:,:] = m_init # Initialize mean values + M = np.empty(((steps_no + 1), state_dim, time_series_no)) + M[0, :, :] = m_init # Initialize mean values # Variance estimations. Initial values will be included - P = np.empty(((steps_no+1),state_dim,state_dim)) - P_init = 0.5*( P_init + P_init.T) # symmetrize initial covariance. In some ustable cases this is uiseful - P[0,:,:] = P_init # Initialize initial covariance matrix + P = np.empty(((steps_no + 1), state_dim, state_dim)) + P_init = 0.5 * ( + P_init + P_init.T + ) # symmetrize initial covariance. In some ustable cases this is uiseful + P[0, :, :] = P_init # Initialize initial covariance matrix - #import pdb;pdb.set_trace() - if p_kalman_filter_type == 'svd': - (U,S,Vh) = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True, - overwrite_a=False,check_finite=True) - S[ (S==0) ] = 1e-17 # allows to run algorithm for singular initial variance - P_upd = (P_init, S,U) - #log_likelihood = 0 - #grad_log_likelihood = np.zeros((grad_params_no,1)) + # import pdb;pdb.set_trace() + if p_kalman_filter_type == "svd": + (U, S, Vh) = sp.linalg.svd( + P_init, + full_matrices=False, + compute_uv=True, + overwrite_a=False, + check_finite=True, + ) + S[(S == 0)] = 1e-17 # allows to run algorithm for singular initial variance + P_upd = (P_init, S, U) + # log_likelihood = 0 + # grad_log_likelihood = np.zeros((grad_params_no,1)) log_likelihood = 0 if calc_log_likelihood else None grad_log_likelihood = 0 if calc_grad_log_likelihood else None - #setting initial values for derivatives update + # setting initial values for derivatives update dm_upd = dm_init dP_upd = dP_init # Main loop of the Kalman filter - for k in range(0,steps_no): + for k in range(0, steps_no): # In this loop index for new estimations is (k+1), old - (k) # This happened because initial values are stored at 0-th index. - #import pdb; pdb.set_trace() + # import pdb; pdb.set_trace() - prev_mean = M[k,:,:] # mean from the previous step + prev_mean = M[k, :, :] # mean from the previous step - if p_kalman_filter_type == 'svd': - m_pred, P_pred, dm_pred, dP_pred = \ - cls._kalman_prediction_step_SVD(k, prev_mean ,P_upd, p_dynamic_callables, + if p_kalman_filter_type == "svd": + m_pred, P_pred, dm_pred, dP_pred = cls._kalman_prediction_step_SVD( + k, + prev_mean, + P_upd, + p_dynamic_callables, calc_grad_log_likelihood=calc_grad_log_likelihood, - p_dm = dm_upd, p_dP = dP_upd) + p_dm=dm_upd, + p_dP=dP_upd, + ) else: - m_pred, P_pred, dm_pred, dP_pred = \ - cls._kalman_prediction_step(k, prev_mean ,P[k,:,:], p_dynamic_callables, + m_pred, P_pred, dm_pred, dP_pred = cls._kalman_prediction_step( + k, + prev_mean, + P[k, :, :], + p_dynamic_callables, calc_grad_log_likelihood=calc_grad_log_likelihood, - p_dm = dm_upd, p_dP = dP_upd ) + p_dm=dm_upd, + p_dP=dP_upd, + ) - #import pdb; pdb.set_trace() - k_measurment = Y[k,:,:] + # import pdb; pdb.set_trace() + k_measurment = Y[k, :, :] - if (np.any(np.isnan(k_measurment)) == False): + if np.any(np.isnan(k_measurment)) == False: + if p_kalman_filter_type == "svd": + ( + m_upd, + P_upd, + log_likelihood_update, + dm_upd, + dP_upd, + d_log_likelihood_update, + ) = cls._kalman_update_step_SVD( + k, + m_pred, + P_pred, + p_measurement_callables, + k_measurment, + calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, + p_dm=dm_pred, + p_dP=dP_pred, + ) - if p_kalman_filter_type == 'svd': - m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ - cls._kalman_update_step_SVD(k, m_pred , P_pred, p_measurement_callables, - k_measurment, calc_log_likelihood=calc_log_likelihood, - calc_grad_log_likelihood=calc_grad_log_likelihood, - p_dm = dm_pred, p_dP = dP_pred ) - - - # m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ - # cls._kalman_update_step(k, m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment, - # calc_log_likelihood=calc_log_likelihood, - # calc_grad_log_likelihood=calc_grad_log_likelihood, - # p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR)) - # - # (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True, - # overwrite_a=False,check_finite=True) - # P_upd = (P_upd, S,U) + # m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ + # cls._kalman_update_step(k, m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment, + # calc_log_likelihood=calc_log_likelihood, + # calc_grad_log_likelihood=calc_grad_log_likelihood, + # p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR)) + # + # (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True, + # overwrite_a=False,check_finite=True) + # P_upd = (P_upd, S,U) else: - m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ - cls._kalman_update_step(k, m_pred , P_pred, p_measurement_callables, k_measurment, - calc_log_likelihood=calc_log_likelihood, - calc_grad_log_likelihood=calc_grad_log_likelihood, - p_dm = dm_pred, p_dP = dP_pred ) + ( + m_upd, + P_upd, + log_likelihood_update, + dm_upd, + dP_upd, + d_log_likelihood_update, + ) = cls._kalman_update_step( + k, + m_pred, + P_pred, + p_measurement_callables, + k_measurment, + calc_log_likelihood=calc_log_likelihood, + calc_grad_log_likelihood=calc_grad_log_likelihood, + p_dm=dm_pred, + p_dP=dP_pred, + ) else: - if k_measurment.shape != (1,1): - raise ValueError("Nan measurements are currently not supported for \ - multidimensional output and multiple tiem series.") + if k_measurment.shape != (1, 1): + raise ValueError( + "Nan measurements are currently not supported for \ + multidimensional output and multiple tiem series." + ) else: - m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred - log_likelihood_update = 0.0; - d_log_likelihood_update = 0.0; - + m_upd = m_pred + P_upd = P_pred + dm_upd = dm_pred + dP_upd = dP_pred + log_likelihood_update = 0.0 + d_log_likelihood_update = 0.0 if calc_log_likelihood: log_likelihood += log_likelihood_update @@ -3105,20 +3713,35 @@ class ContDescrStateSpace(DescreteStateSpace): if calc_grad_log_likelihood: grad_log_likelihood += d_log_likelihood_update - M[k+1,:,:] = m_upd # separate mean value for each time series + M[k + 1, :, :] = m_upd # separate mean value for each time series - if p_kalman_filter_type == 'svd': - P[k+1,:,:] = P_upd[0] + if p_kalman_filter_type == "svd": + P[k + 1, :, :] = P_upd[0] else: - P[k+1,:,:] = P_upd - #print("kf it: %i" % k) + P[k + 1, :, :] = P_upd + # print("kf it: %i" % k) # !!!Print statistics! Print sizes of matrices # !!!Print statistics! Print iteration time base on another boolean variable - return (M, P, log_likelihood, grad_log_likelihood, p_dynamic_callables.reset(False)) + return ( + M, + P, + log_likelihood, + grad_log_likelihood, + p_dynamic_callables.reset(False), + ) @classmethod - def cont_discr_rts_smoother(cls,state_dim, filter_means, filter_covars, - p_dynamic_callables=None, X=None, F=None,L=None,Qc=None): + def cont_discr_rts_smoother( + cls, + state_dim, + filter_means, + filter_covars, + p_dynamic_callables=None, + X=None, + F=None, + L=None, + Qc=None, + ): """ Continuos-discrete Rauch–Tung–Striebel(RTS) smoother. @@ -3158,45 +3781,78 @@ class ContDescrStateSpace(DescreteStateSpace): Smoothed estimates of the state covariances """ - f_a = lambda k,m,A: np.dot(A, m) # state dynamic model - if p_dynamic_callables is None: # make this object from scratch - p_dynamic_callables = cls._cont_to_discrete_object(cls, X, F,L,Qc,f_a,compute_derivatives=False, - grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None) + f_a = lambda k, m, A: np.dot(A, m) # state dynamic model + if p_dynamic_callables is None: # make this object from scratch + p_dynamic_callables = cls._cont_to_discrete_object( + cls, + X, + F, + L, + Qc, + f_a, + compute_derivatives=False, + grad_params_no=None, + P_inf=None, + dP_inf=None, + dF=None, + dQc=None, + ) - no_steps = filter_covars.shape[0]-1# number of steps (minus initial covariance) + no_steps = ( + filter_covars.shape[0] - 1 + ) # number of steps (minus initial covariance) - M = np.empty(filter_means.shape) # smoothed means - P = np.empty(filter_covars.shape) # smoothed covars + M = np.empty(filter_means.shape) # smoothed means + P = np.empty(filter_covars.shape) # smoothed covars if print_verbose: print("General: run Continuos-Discrete Kalman Smoother") - M[-1,:,:] = filter_means[-1,:,:] - P[-1,:,:] = filter_covars[-1,:,:] - for k in range(no_steps-1,-1,-1): + M[-1, :, :] = filter_means[-1, :, :] + P[-1, :, :] = filter_covars[-1, :, :] + for k in range(no_steps - 1, -1, -1): + prev_mean = filter_means[k, :] # mean from the previous step + m_pred, P_pred, tmp1, tmp2 = cls._kalman_prediction_step( + k, + prev_mean, + filter_covars[k, :, :], + p_dynamic_callables, + calc_grad_log_likelihood=False, + ) + p_m = filter_means[k, :] + p_m_prev_step = M[(k + 1), :] - prev_mean = filter_means[k,:] # mean from the previous step - m_pred, P_pred, tmp1, tmp2 = \ - cls._kalman_prediction_step(k, prev_mean, - filter_covars[k,:,:], p_dynamic_callables, - calc_grad_log_likelihood=False) - p_m = filter_means[k,:] - p_m_prev_step = M[(k+1),:] + m_upd, P_upd, tmp_G = cls._rts_smoother_update_step( + k, + p_m, + filter_covars[k, :, :], + m_pred, + P_pred, + p_m_prev_step, + P[(k + 1), :, :], + p_dynamic_callables, + ) - m_upd, P_upd, tmp_G = cls._rts_smoother_update_step(k, - p_m ,filter_covars[k,:,:], - m_pred, P_pred, p_m_prev_step ,P[(k+1),:,:], p_dynamic_callables) - - M[k,:,:] = m_upd - P[k,:,:] = P_upd + M[k, :, :] = m_upd + P[k, :, :] = P_upd # Return values return (M, P) @classmethod - def _cont_to_discrete_object(cls, X, F, L, Qc, compute_derivatives=False, - grad_params_no=None, - P_inf=None, dP_inf=None, dF = None, dQc=None, - dt0=None): + def _cont_to_discrete_object( + cls, + X, + F, + L, + Qc, + compute_derivatives=False, + grad_params_no=None, + P_inf=None, + dP_inf=None, + dF=None, + dQc=None, + dt0=None, + ): """ Function return the object which is used in Kalman filter and/or smoother to obtain matrices A, Q and their derivatives for discrete model @@ -3230,53 +3886,121 @@ class ContDescrStateSpace(DescreteStateSpace): """ unique_round_decimals = 10 - threshold_number_of_unique_time_steps = 20 # above which matrices are separately each time + threshold_number_of_unique_time_steps = ( + 20 # above which matrices are separately each time + ) dt = np.empty((X.shape[0],)) - dt[1:] = np.diff(X[:,0],axis=0) + dt[1:] = np.diff(X[:, 0], axis=0) if dt0 is None: - dt[0] = 0#dt[1] + dt[0] = 0 # dt[1] else: - if isinstance(dt0,str): + if isinstance(dt0, str): dt = dt[1:] else: dt[0] = dt0 - + unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals)) number_unique_indices = len(unique_indices) - #import pdb; pdb.set_trace() + # import pdb; pdb.set_trace() if use_cython: - class AQcompute_batch(state_space_cython.AQcompute_batch_Cython): - def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): - As, Qs, reconstruct_indices, dAs, dQs = ContDescrStateSpace.lti_sde_to_descrete(F, - L,Qc,dt,compute_derivatives, - grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) - super(AQcompute_batch,self).__init__(As, Qs, reconstruct_indices, dAs,dQs) + class AQcompute_batch(state_space_cython.AQcompute_batch_Cython): + def __init__( + self, + F, + L, + Qc, + dt, + compute_derivatives=False, + grad_params_no=None, + P_inf=None, + dP_inf=None, + dF=None, + dQc=None, + ): + ( + As, + Qs, + reconstruct_indices, + dAs, + dQs, + ) = ContDescrStateSpace.lti_sde_to_descrete( + F, + L, + Qc, + dt, + compute_derivatives, + grad_params_no=grad_params_no, + P_inf=P_inf, + dP_inf=dP_inf, + dF=dF, + dQc=dQc, + ) + + super(AQcompute_batch, self).__init__( + As, Qs, reconstruct_indices, dAs, dQs + ) + else: AQcompute_batch = cls.AQcompute_batch_Python if number_unique_indices > threshold_number_of_unique_time_steps: - AQcomp = cls.AQcompute_once(F,L,Qc, dt,compute_derivatives=compute_derivatives, - grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) + AQcomp = cls.AQcompute_once( + F, + L, + Qc, + dt, + compute_derivatives=compute_derivatives, + grad_params_no=grad_params_no, + P_inf=P_inf, + dP_inf=dP_inf, + dF=dF, + dQc=dQc, + ) if print_verbose: print("CDO: Continue-to-discrete INSTANTANEOUS object is created.") - print("CDO: Number of different time steps: %i" % (number_unique_indices,) ) + print( + "CDO: Number of different time steps: %i" + % (number_unique_indices,) + ) else: - AQcomp = AQcompute_batch(F,L,Qc,dt,compute_derivatives=compute_derivatives, - grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) + AQcomp = AQcompute_batch( + F, + L, + Qc, + dt, + compute_derivatives=compute_derivatives, + grad_params_no=grad_params_no, + P_inf=P_inf, + dP_inf=dP_inf, + dF=dF, + dQc=dQc, + ) if print_verbose: print("CDO: Continue-to-discrete BATCH object is created.") - print("CDO: Number of different time steps: %i" % (number_unique_indices,) ) - print("CDO: Total size if its data: %i" % (AQcomp.total_size_of_data,) ) + print( + "CDO: Number of different time steps: %i" + % (number_unique_indices,) + ) + print("CDO: Total size if its data: %i" % (AQcomp.total_size_of_data,)) return AQcomp @staticmethod - def lti_sde_to_descrete(F,L,Qc,dt,compute_derivatives=False, - grad_params_no=None, P_inf=None, - dP_inf=None, dF = None, dQc=None): + def lti_sde_to_descrete( + F, + L, + Qc, + dt, + compute_derivatives=False, + grad_params_no=None, + P_inf=None, + dP_inf=None, + dF=None, + dQc=None, + ): """ Linear Time-Invariant Stochastic Differential Equation (LTI SDE): @@ -3294,7 +4018,7 @@ class ContDescrStateSpace(DescreteStateSpace): TODO: this function can be redone to "preprocess dataset", when close time points are handeled properly (with rounding parameter) and values are averaged accordingly. - + Input: -------------- F,L: LTI SDE matrices of corresponding dimensions @@ -3354,106 +4078,123 @@ class ContDescrStateSpace(DescreteStateSpace): # Dimensionality n = F.shape[0] - if not isinstance(dt, collections.Iterable): # not iterable, scalar - #import pdb; pdb.set_trace() + if not isinstance(dt, collections.Iterable): # not iterable, scalar + # import pdb; pdb.set_trace() # The dynamical model - A = matrix_exponent(F*dt) + A = matrix_exponent(F * dt) # The covariance matrix Q by matrix fraction decomposition -> - Phi = np.zeros((2*n,2*n)) - Phi[:n,:n] = F - Phi[:n,n:] = L.dot(Qc).dot(L.T) - Phi[n:,n:] = -F.T - AB = matrix_exponent(Phi*dt) - AB = np.dot(AB, np.vstack((np.zeros((n,n)),np.eye(n)))) + Phi = np.zeros((2 * n, 2 * n)) + Phi[:n, :n] = F + Phi[:n, n:] = L.dot(Qc).dot(L.T) + Phi[n:, n:] = -F.T + AB = matrix_exponent(Phi * dt) + AB = np.dot(AB, np.vstack((np.zeros((n, n)), np.eye(n)))) - Q_noise_1 = linalg.solve(AB[n:,:].T,AB[:n,:].T) - Q_noise_2 = P_inf - A.dot(P_inf).dot(A.T) + Q_noise_1 = linalg.solve(AB[n:, :].T, AB[:n, :].T) + Q_noise_2 = P_inf - A.dot(P_inf).dot(A.T) # The covariance matrix Q by matrix fraction decomposition <- if compute_derivatives: dA = np.zeros([n, n, grad_params_no]) dQ = np.zeros([n, n, grad_params_no]) - #AA = np.zeros([2*n, 2*n, nparam]) - FF = np.zeros([2*n, 2*n]) - AA = np.zeros([2*n, 2*n, grad_params_no]) + # AA = np.zeros([2*n, 2*n, nparam]) + FF = np.zeros([2 * n, 2 * n]) + AA = np.zeros([2 * n, 2 * n, grad_params_no]) for p in range(0, grad_params_no): - - FF[:n,:n] = F - FF[n:,:n] = dF[:,:,p] - FF[n:,n:] = F + FF[:n, :n] = F + FF[n:, :n] = dF[:, :, p] + FF[n:, n:] = F # Solve the matrix exponential - AA[:,:,p] = matrix_exponent(FF*dt) + AA[:, :, p] = matrix_exponent(FF * dt) # Solve the differential equation - #foo = AA[:,:,p].dot(np.vstack([m, dm[:,p]])) - #mm = foo[:n,:] - #dm[:,p] = foo[n:,:] + # foo = AA[:,:,p].dot(np.vstack([m, dm[:,p]])) + # mm = foo[:n,:] + # dm[:,p] = foo[n:,:] # The discrete-time dynamical model* - if p==0: - A = AA[:n,:n,p] - Q_noise_3 = P_inf - A.dot(P_inf).dot(A.T) + if p == 0: + A = AA[:n, :n, p] + Q_noise_3 = P_inf - A.dot(P_inf).dot(A.T) Q_noise = Q_noise_3 - #PP = A.dot(P).dot(A.T) + Q_noise_2 + # PP = A.dot(P).dot(A.T) + Q_noise_2 # The derivatives of A and Q - dA[:,:,p] = AA[n:,:n,p] - tmp = dA[:,:,p].dot(P_inf).dot(A.T) - dQ[:,:,p] = dP_inf[:,:,p] - tmp \ - - A.dot(dP_inf[:,:,p]).dot(A.T) - tmp.T - - dQ[:,:,p] = 0.5*(dQ[:,:,p] + dQ[:,:,p].T) # Symmetrize + dA[:, :, p] = AA[n:, :n, p] + tmp = dA[:, :, p].dot(P_inf).dot(A.T) + dQ[:, :, p] = ( + dP_inf[:, :, p] - tmp - A.dot(dP_inf[:, :, p]).dot(A.T) - tmp.T + ) + + dQ[:, :, p] = 0.5 * (dQ[:, :, p] + dQ[:, :, p].T) # Symmetrize else: - dA = None - dQ = None - Q_noise = Q_noise_2 - # Innacuracies have been observed when Q_noise_1 was used. - - #Q_noise = Q_noise_1 + dA = None + dQ = None + Q_noise = Q_noise_2 + # Innacuracies have been observed when Q_noise_1 was used. - Q_noise = 0.5*(Q_noise + Q_noise.T) # Symmetrize - return A, Q_noise,None, dA, dQ + # Q_noise = Q_noise_1 - else: # iterable, array + Q_noise = 0.5 * (Q_noise + Q_noise.T) # Symmetrize + return A, Q_noise, None, dA, dQ + else: # iterable, array # Time discretizations (round to 14 decimals to avoid problems) - dt_unique, tmp, reconstruct_index = np.unique(np.round(dt,8), - return_index=True,return_inverse=True) + dt_unique, tmp, reconstruct_index = np.unique( + np.round(dt, 8), return_index=True, return_inverse=True + ) del tmp # Allocate space for A and Q - A = np.empty((n,n,dt_unique.shape[0])) - Q_noise = np.empty((n,n,dt_unique.shape[0])) + A = np.empty((n, n, dt_unique.shape[0])) + Q_noise = np.empty((n, n, dt_unique.shape[0])) if compute_derivatives: - dA = np.empty((n,n,grad_params_no,dt_unique.shape[0])) - dQ = np.empty((n,n,grad_params_no,dt_unique.shape[0])) + dA = np.empty((n, n, grad_params_no, dt_unique.shape[0])) + dQ = np.empty((n, n, grad_params_no, dt_unique.shape[0])) else: dA = None dQ = None # Call this function for each unique dt - for j in range(0,dt_unique.shape[0]): - A[:,:,j], Q_noise[:,:,j], tmp1, dA_t, dQ_t = ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt_unique[j], - compute_derivatives=compute_derivatives, grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF = dF, dQc=dQc) + for j in range(0, dt_unique.shape[0]): + ( + A[:, :, j], + Q_noise[:, :, j], + tmp1, + dA_t, + dQ_t, + ) = ContDescrStateSpace.lti_sde_to_descrete( + F, + L, + Qc, + dt_unique[j], + compute_derivatives=compute_derivatives, + grad_params_no=grad_params_no, + P_inf=P_inf, + dP_inf=dP_inf, + dF=dF, + dQc=dQc, + ) if compute_derivatives: - dA[:,:,:,j] = dA_t - dQ[:,:,:,j] = dQ_t + dA[:, :, :, j] = dA_t + dQ[:, :, :, j] = dQ_t # Return return A, Q_noise, reconstruct_index, dA, dQ + def matrix_exponent(M): """ The function computes matrix exponent and handles some special cases """ - if (M.shape[0] == 1): # 1*1 matrix - Mexp = np.array( ((np.exp(M[0,0]) ,),) ) + if M.shape[0] == 1: # 1*1 matrix + Mexp = np.array(((np.exp(M[0, 0]),),)) - else: # matrix is larger + else: # matrix is larger method = None try: Mexp = linalg.expm(M) @@ -3473,6 +4214,7 @@ def matrix_exponent(M): return Mexp + def balance_matrix(A): """ Balance matrix, i.e. finds such similarity transformation of the original @@ -3503,16 +4245,19 @@ def balance_matrix(A): """ if len(A.shape) != 2 or (A.shape[0] != A.shape[1]): - raise ValueError('balance_matrix: Expecting square matrix') + raise ValueError("balance_matrix: Expecting square matrix") - N = A.shape[0] # matrix size + N = A.shape[0] # matrix size - gebal = sp.linalg.lapack.get_lapack_funcs('gebal',(A,)) - bA, lo, hi, pivscale, info = gebal(A, permute=True, scale=True,overwrite_a=False) + gebal = sp.linalg.lapack.get_lapack_funcs("gebal", (A,)) + bA, lo, hi, pivscale, info = gebal(A, permute=True, scale=True, overwrite_a=False) if info < 0: - raise ValueError('balance_matrix: Illegal value in %d-th argument of internal gebal ' % -info) + raise ValueError( + "balance_matrix: Illegal value in %d-th argument of internal gebal " % -info + ) + # calculating the similarity transforamtion: - def perm_matr(D, c1,c2): + def perm_matr(D, c1, c2): """ Function creates the permutation matrix which swaps columns c1 and c2. @@ -3525,33 +4270,39 @@ def balance_matrix(A): c2: int Column 2. Numeration starts from 1...D """ - i1 = c1-1; i2 = c2-1 # indices - P = np.eye(D); - P[i1,i1] = 0.0; P[i2,i2] = 0.0; # nullify diagonal elements - P[i1,i2] = 1.0; P[i2,i1] = 1.0 + i1 = c1 - 1 + i2 = c2 - 1 # indices + P = np.eye(D) + P[i1, i1] = 0.0 + P[i2, i2] = 0.0 + # nullify diagonal elements + P[i1, i2] = 1.0 + P[i2, i1] = 1.0 return P - P = np.eye(N) # permutation matrix - if (hi != N-1): # there are row permutations - for k in range(N-1,hi,-1): - new_perm = perm_matr(N, k+1, pivscale[k]) - P = np.dot(P,new_perm) - if (lo != 0): - for k in range(0,lo,1): - new_perm = perm_matr(N, k+1, pivscale[k]) - P = np.dot(P,new_perm) + P = np.eye(N) # permutation matrix + if hi != N - 1: # there are row permutations + for k in range(N - 1, hi, -1): + new_perm = perm_matr(N, k + 1, pivscale[k]) + P = np.dot(P, new_perm) + if lo != 0: + for k in range(0, lo, 1): + new_perm = perm_matr(N, k + 1, pivscale[k]) + P = np.dot(P, new_perm) D = pivscale.copy() - D[0:lo] = 1.0; D[hi+1:N] = 1.0 # thesee scaling factors must be set to one. - #D = np.diag(D) # make a diagonal matrix + D[0:lo] = 1.0 + D[hi + 1 : N] = 1.0 # thesee scaling factors must be set to one. + # D = np.diag(D) # make a diagonal matrix - T = np.dot(P,np.diag(D)) # similarity transformation in question - T_inv = np.dot(np.diag(D**(-1)),P.T) + T = np.dot(P, np.diag(D)) # similarity transformation in question + T_inv = np.dot(np.diag(D ** (-1)), P.T) - #print( np.max(A - np.dot(T, np.dot(bA, T_inv) )) ) + # print( np.max(A - np.dot(T, np.dot(bA, T_inv) )) ) return bA.copy(), T, T_inv -def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None): + +def balance_ss_model(F, L, Qc, H, Pinf, P0, dF=None, dQc=None, dPinf=None, dP0=None): """ Balances State-Space model for more numerical stability @@ -3566,28 +4317,28 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None): y = H T z """ - bF,T,T_inv = balance_matrix(F) + bF, T, T_inv = balance_matrix(F) - bL = np.dot( T_inv, L) - bQc = Qc # not affected + bL = np.dot(T_inv, L) + bQc = Qc # not affected bH = np.dot(H, T) bPinf = np.dot(T_inv, np.dot(Pinf, T_inv.T)) - #import pdb; pdb.set_trace() -# LL,islower = linalg.cho_factor(Pinf) -# inds = np.triu_indices(Pinf.shape[0],k=1) -# LL[inds] = 0.0 -# bLL = np.dot(T_inv, LL) -# bPinf = np.dot( bLL, bLL.T) + # import pdb; pdb.set_trace() + # LL,islower = linalg.cho_factor(Pinf) + # inds = np.triu_indices(Pinf.shape[0],k=1) + # LL[inds] = 0.0 + # bLL = np.dot(T_inv, LL) + # bPinf = np.dot( bLL, bLL.T) bP0 = np.dot(T_inv, np.dot(P0, T_inv.T)) if dF is not None: bdF = np.zeros(dF.shape) for i in range(dF.shape[2]): - bdF[:,:,i] = np.dot( T_inv, np.dot( dF[:,:,i], T)) + bdF[:, :, i] = np.dot(T_inv, np.dot(dF[:, :, i], T)) else: bdF = None @@ -3595,14 +4346,13 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None): if dPinf is not None: bdPinf = np.zeros(dPinf.shape) for i in range(dPinf.shape[2]): - bdPinf[:,:,i] = np.dot( T_inv, np.dot( dPinf[:,:,i], T_inv.T)) - -# LL,islower = linalg.cho_factor(dPinf[:,:,i]) -# inds = np.triu_indices(dPinf[:,:,i].shape[0],k=1) -# LL[inds] = 0.0 -# bLL = np.dot(T_inv, LL) -# bdPinf[:,:,i] = np.dot( bLL, bLL.T) + bdPinf[:, :, i] = np.dot(T_inv, np.dot(dPinf[:, :, i], T_inv.T)) + # LL,islower = linalg.cho_factor(dPinf[:,:,i]) + # inds = np.triu_indices(dPinf[:,:,i].shape[0],k=1) + # LL[inds] = 0.0 + # bLL = np.dot(T_inv, LL) + # bdPinf[:,:,i] = np.dot( bLL, bLL.T) else: bdPinf = None @@ -3610,12 +4360,11 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None): if dP0 is not None: bdP0 = np.zeros(dP0.shape) for i in range(dP0.shape[2]): - bdP0[:,:,i] = np.dot( T_inv, np.dot( dP0[:,:,i], T_inv.T)) + bdP0[:, :, i] = np.dot(T_inv, np.dot(dP0[:, :, i], T_inv.T)) else: bdP0 = None - - bdQc = dQc # not affected + bdQc = dQc # not affected # (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0) diff --git a/GPy/plotting/matplot_dep/base_plots.py b/GPy/plotting/matplot_dep/base_plots.py index e43f8efa..1eaf7d6c 100644 --- a/GPy/plotting/matplot_dep/base_plots.py +++ b/GPy/plotting/matplot_dep/base_plots.py @@ -5,6 +5,7 @@ import numpy as np from .util import align_subplot_array, align_subplots + def ax_default(fignum, ax): if ax is None: fig = plt.figure(fignum) @@ -13,11 +14,23 @@ def ax_default(fignum, ax): fig = ax.figure return fig, ax -def meanplot(x, mu, color='#3300FF', ax=None, fignum=None, linewidth=2,**kw): - _, axes = ax_default(fignum, ax) - return axes.plot(x,mu,color=color,linewidth=linewidth,**kw) -def gpplot(x, mu, lower, upper, edgecol='#3300FF', fillcol='#33CCFF', ax=None, fignum=None, **kwargs): +def meanplot(x, mu, color="#3300FF", ax=None, fignum=None, linewidth=2, **kw): + _, axes = ax_default(fignum, ax) + return axes.plot(x, mu, color=color, linewidth=linewidth, **kw) + + +def gpplot( + x, + mu, + lower, + upper, + edgecol="#3300FF", + fillcol="#33CCFF", + ax=None, + fignum=None, + **kwargs +): _, axes = ax_default(fignum, ax) mu = mu.flatten() @@ -27,51 +40,62 @@ def gpplot(x, mu, lower, upper, edgecol='#3300FF', fillcol='#33CCFF', ax=None, f plots = [] - #here's the mean + # here's the mean plots.append(meanplot(x, mu, edgecol, axes)) - #here's the box - kwargs['linewidth']=0.5 - if not 'alpha' in kwargs.keys(): - kwargs['alpha'] = 0.3 - plots.append(axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs)) + # here's the box + kwargs["linewidth"] = 0.5 + if not "alpha" in kwargs.keys(): + kwargs["alpha"] = 0.3 + plots.append( + axes.fill( + np.hstack((x, x[::-1])), + np.hstack((upper, lower[::-1])), + color=fillcol, + **kwargs + ) + ) - #this is the edge: - plots.append(meanplot(x, upper,color=edgecol, linewidth=0.2, ax=axes)) - plots.append(meanplot(x, lower,color=edgecol, linewidth=0.2, ax=axes)) + # this is the edge: + plots.append(meanplot(x, upper, color=edgecol, linewidth=0.2, ax=axes)) + plots.append(meanplot(x, lower, color=edgecol, linewidth=0.2, ax=axes)) return plots + def gradient_fill(x, percentiles, ax=None, fignum=None, **kwargs): _, ax = ax_default(fignum, ax) plots = [] - #here's the box - if 'linewidth' not in kwargs: - kwargs['linewidth'] = 0.5 - if not 'alpha' in kwargs.keys(): - kwargs['alpha'] = 1./(len(percentiles)) + # here's the box + if "linewidth" not in kwargs: + kwargs["linewidth"] = 0.5 + if not "alpha" in kwargs.keys(): + kwargs["alpha"] = 1.0 / (len(percentiles)) # pop where from kwargs - where = kwargs.pop('where') if 'where' in kwargs else None + where = kwargs.pop("where") if "where" in kwargs else None # pop interpolate, which we actually do not do here! - if 'interpolate' in kwargs: kwargs.pop('interpolate') + if "interpolate" in kwargs: + kwargs.pop("interpolate") def pairwise(inlist): l = len(inlist) - for i in range(int(np.ceil(l/2.))): - yield inlist[:][i], inlist[:][(l-1)-i] + for i in range(int(np.ceil(l / 2.0))): + yield inlist[:][i], inlist[:][(l - 1) - i] polycol = [] for y1, y2 in pairwise(percentiles): import matplotlib.mlab as mlab + # Handle united data, such as dates ax._process_unit_info(xdata=x, ydata=y1) ax._process_unit_info(ydata=y2) # Convert the arrays so we can work with them from numpy import ma + x = ma.masked_invalid(ax.convert_xunits(x)) y1 = ma.masked_invalid(ax.convert_yunits(y1)) y2 = ma.masked_invalid(ax.convert_yunits(y2)) @@ -103,7 +127,7 @@ def gradient_fill(x, percentiles, ax=None, fignum=None, **kwargs): continue N = len(xslice) - X = np.zeros((2 * N + 2, 2), np.float) + X = np.zeros((2 * N + 2, 2), float) # the purpose of the next two lines is for when y2 is a # scalar like 0 and we want the fill to go all the way @@ -114,19 +138,21 @@ def gradient_fill(x, percentiles, ax=None, fignum=None, **kwargs): X[0] = start X[N + 1] = end - X[1:N + 1, 0] = xslice - X[1:N + 1, 1] = y1slice - X[N + 2:, 0] = xslice[::-1] - X[N + 2:, 1] = y2slice[::-1] + X[1 : N + 1, 0] = xslice + X[1 : N + 1, 1] = y1slice + X[N + 2 :, 0] = xslice[::-1] + X[N + 2 :, 1] = y2slice[::-1] polys.append(X) polycol.extend(polys) from matplotlib.collections import PolyCollection + plots.append(PolyCollection(polycol, **kwargs)) ax.add_collection(plots[-1], autolim=True) ax.autoscale_view() return plots + def gperrors(x, mu, lower, upper, edgecol=None, ax=None, fignum=None, **kwargs): _, axes = ax_default(fignum, ax) @@ -138,17 +164,19 @@ def gperrors(x, mu, lower, upper, edgecol=None, ax=None, fignum=None, **kwargs): plots = [] if edgecol is None: - edgecol='#3300FF' + edgecol = "#3300FF" - if not 'alpha' in kwargs.keys(): - kwargs['alpha'] = 1. + if not "alpha" in kwargs.keys(): + kwargs["alpha"] = 1.0 + if not "lw" in kwargs.keys(): + kwargs["lw"] = 1.0 - if not 'lw' in kwargs.keys(): - kwargs['lw'] = 1. - - - plots.append(axes.errorbar(x,mu,yerr=np.vstack([mu-lower,upper-mu]),color=edgecol,**kwargs)) + plots.append( + axes.errorbar( + x, mu, yerr=np.vstack([mu - lower, upper - mu]), color=edgecol, **kwargs + ) + ) plots[-1][0].remove() return plots @@ -156,53 +184,60 @@ def gperrors(x, mu, lower, upper, edgecol=None, ax=None, fignum=None, **kwargs): def removeRightTicks(ax=None): ax = ax or plt.gca() for i, line in enumerate(ax.get_yticklines()): - if i%2 == 1: # odd indices + if i % 2 == 1: # odd indices line.set_visible(False) + def removeUpperTicks(ax=None): ax = ax or plt.gca() for i, line in enumerate(ax.get_xticklines()): - if i%2 == 1: # odd indices + if i % 2 == 1: # odd indices line.set_visible(False) -def fewerXticks(ax=None,divideby=2): + +def fewerXticks(ax=None, divideby=2): ax = ax or plt.gca() ax.set_xticks(ax.get_xticks()[::divideby]) -def x_frame1D(X,plot_limits=None,resolution=None): + +def x_frame1D(X, plot_limits=None, resolution=None): """ Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits """ - assert X.shape[1] ==1, "x_frame1D is defined for one-dimensional inputs" + assert X.shape[1] == 1, "x_frame1D is defined for one-dimensional inputs" if plot_limits is None: from ...core.parameterization.variational import VariationalPosterior + if isinstance(X, VariationalPosterior): - xmin,xmax = X.mean.min(0),X.mean.max(0) + xmin, xmax = X.mean.min(0), X.mean.max(0) else: - xmin,xmax = X.min(0),X.max(0) - xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin) - elif len(plot_limits)==2: + xmin, xmax = X.min(0), X.max(0) + xmin, xmax = xmin - 0.2 * (xmax - xmin), xmax + 0.2 * (xmax - xmin) + elif len(plot_limits) == 2: xmin, xmax = plot_limits else: raise ValueError("Bad limits for plotting") - Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None] + Xnew = np.linspace(xmin, xmax, resolution or 200)[:, None] return Xnew, xmin, xmax -def x_frame2D(X,plot_limits=None,resolution=None): + +def x_frame2D(X, plot_limits=None, resolution=None): """ Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits """ - assert X.shape[1] ==2, "x_frame2D is defined for two-dimensional inputs" + assert X.shape[1] == 2, "x_frame2D is defined for two-dimensional inputs" if plot_limits is None: - xmin,xmax = X.min(0),X.max(0) - xmin, xmax = xmin-0.2*(xmax-xmin), xmax+0.2*(xmax-xmin) - elif len(plot_limits)==2: + xmin, xmax = X.min(0), X.max(0) + xmin, xmax = xmin - 0.2 * (xmax - xmin), xmax + 0.2 * (xmax - xmin) + elif len(plot_limits) == 2: xmin, xmax = plot_limits else: raise ValueError("Bad limits for plotting") resolution = resolution or 50 - xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution] - Xnew = np.vstack((xx.flatten(),yy.flatten())).T + xx, yy = np.mgrid[ + xmin[0] : xmax[0] : 1j * resolution, xmin[1] : xmax[1] : 1j * resolution + ] + Xnew = np.vstack((xx.flatten(), yy.flatten())).T return Xnew, xx, yy, xmin, xmax diff --git a/GPy/plotting/matplot_dep/plot_definitions.py b/GPy/plotting/matplot_dep/plot_definitions.py index 7fadbf67..e462dea2 100644 --- a/GPy/plotting/matplot_dep/plot_definitions.py +++ b/GPy/plotting/matplot_dep/plot_definitions.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright (c) 2015, Max Zwiessele # All rights reserved. # @@ -26,7 +26,7 @@ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#=============================================================================== +# =============================================================================== import numpy as np from matplotlib import pyplot as plt from ..abstract_plotting_library import AbstractPlottingLibrary @@ -37,6 +37,7 @@ from .controllers import ImshowController, ImAnnotateController import itertools from .util import legend_ontop + class MatplotlibPlots(AbstractPlottingLibrary): def __init__(self): super(MatplotlibPlots, self).__init__() @@ -49,54 +50,86 @@ class MatplotlibPlots(AbstractPlottingLibrary): fig.gridspec = plt.GridSpec(rows, cols, **gridspec_kwargs) return fig - def new_canvas(self, figure=None, row=1, col=1, projection='2d', xlabel=None, ylabel=None, zlabel=None, title=None, xlim=None, ylim=None, zlim=None, **kwargs): - if projection == '3d': + def new_canvas( + self, + figure=None, + row=1, + col=1, + projection="2d", + xlabel=None, + ylabel=None, + zlabel=None, + title=None, + xlim=None, + ylim=None, + zlim=None, + **kwargs + ): + if projection == "3d": from mpl_toolkits.mplot3d import Axes3D - elif projection == '2d': + elif projection == "2d": projection = None - if 'ax' in kwargs: - ax = kwargs.pop('ax') + if "ax" in kwargs: + ax = kwargs.pop("ax") else: if figure is not None: fig = figure - elif 'num' in kwargs and 'figsize' in kwargs: - fig = self.figure(num=kwargs.pop('num'), figsize=kwargs.pop('figsize')) - elif 'num' in kwargs: - fig = self.figure(num=kwargs.pop('num')) - elif 'figsize' in kwargs: - fig = self.figure(figsize=kwargs.pop('figsize')) + elif "num" in kwargs and "figsize" in kwargs: + fig = self.figure(num=kwargs.pop("num"), figsize=kwargs.pop("figsize")) + elif "num" in kwargs: + fig = self.figure(num=kwargs.pop("num")) + elif "figsize" in kwargs: + fig = self.figure(figsize=kwargs.pop("figsize")) else: fig = self.figure() - #if hasattr(fig, 'rows') and hasattr(fig, 'cols'): - ax = fig.add_subplot(fig.gridspec[row-1, col-1], projection=projection) + # if hasattr(fig, 'rows') and hasattr(fig, 'cols'): + ax = fig.add_subplot(fig.gridspec[row - 1, col - 1], projection=projection) - if xlim is not None: ax.set_xlim(xlim) - if ylim is not None: ax.set_ylim(ylim) - if xlabel is not None: ax.set_xlabel(xlabel) - if ylabel is not None: ax.set_ylabel(ylabel) - if title is not None: ax.set_title(title) - if projection == '3d': - if zlim is not None: ax.set_zlim(zlim) - if zlabel is not None: ax.set_zlabel(zlabel) + if xlim is not None: + ax.set_xlim(xlim) + if ylim is not None: + ax.set_ylim(ylim) + if xlabel is not None: + ax.set_xlabel(xlabel) + if ylabel is not None: + ax.set_ylabel(ylabel) + if title is not None: + ax.set_title(title) + if projection == "3d": + if zlim is not None: + ax.set_zlim(zlim) + if zlabel is not None: + ax.set_zlabel(zlabel) return ax, kwargs def add_to_canvas(self, ax, plots, legend=False, title=None, **kwargs): - #ax.autoscale_view() - fontdict=dict(family='sans-serif', weight='light', size=9) + # ax.autoscale_view() + fontdict = dict(family="sans-serif", weight="light", size=9) if legend is True: ax.legend(*ax.get_legend_handles_labels()) elif legend >= 1: - #ax.legend(prop=fontdict) + # ax.legend(prop=fontdict) legend_ontop(ax, ncol=legend, fontdict=fontdict) - if title is not None: ax.figure.suptitle(title) + if title is not None: + ax.figure.suptitle(title) return plots def show_canvas(self, ax, **kwargs): ax.figure.canvas.draw() return ax.figure - def scatter(self, ax, X, Y, Z=None, color=Tango.colorsHex['mediumBlue'], label=None, marker='o', **kwargs): + def scatter( + self, + ax, + X, + Y, + Z=None, + color=Tango.colorsHex["mediumBlue"], + label=None, + marker="o", + **kwargs + ): if Z is not None: return ax.scatter(X, Y, c=color, zs=Z, label=label, marker=marker, **kwargs) return ax.scatter(X, Y, c=color, label=label, marker=marker, **kwargs) @@ -106,129 +139,258 @@ class MatplotlibPlots(AbstractPlottingLibrary): return ax.plot(X, Y, color=color, zs=Z, label=label, **kwargs) return ax.plot(X, Y, color=color, label=label, **kwargs) - def plot_axis_lines(self, ax, X, color=Tango.colorsHex['darkRed'], label=None, **kwargs): + def plot_axis_lines( + self, ax, X, color=Tango.colorsHex["darkRed"], label=None, **kwargs + ): from matplotlib import transforms from matplotlib.path import Path - if 'marker' not in kwargs: - kwargs['marker'] = Path([[-.2,0.], [-.2,.5], [0.,1.], [.2,.5], [.2,0.], [-.2,0.]], - [Path.MOVETO, Path.LINETO, Path.LINETO, Path.LINETO, Path.LINETO, Path.CLOSEPOLY]) - if 'transform' not in kwargs: + + if "marker" not in kwargs: + kwargs["marker"] = Path( + [ + [-0.2, 0.0], + [-0.2, 0.5], + [0.0, 1.0], + [0.2, 0.5], + [0.2, 0.0], + [-0.2, 0.0], + ], + [ + Path.MOVETO, + Path.LINETO, + Path.LINETO, + Path.LINETO, + Path.LINETO, + Path.CLOSEPOLY, + ], + ) + if "transform" not in kwargs: if X.shape[1] == 1: - kwargs['transform'] = transforms.blended_transform_factory(ax.transData, ax.transAxes) + kwargs["transform"] = transforms.blended_transform_factory( + ax.transData, ax.transAxes + ) if X.shape[1] == 2: - return ax.scatter(X[:,0], X[:,1], ax.get_zlim()[0], c=color, label=label, **kwargs) + return ax.scatter( + X[:, 0], X[:, 1], ax.get_zlim()[0], c=color, label=label, **kwargs + ) return ax.scatter(X, np.zeros_like(X), c=color, label=label, **kwargs) - def barplot(self, ax, x, height, width=0.8, bottom=0, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs): - if 'align' not in kwargs: - kwargs['align'] = 'center' - return ax.bar(x=x, height=height, width=width, - bottom=bottom, label=label, color=color, - **kwargs) + def barplot( + self, + ax, + x, + height, + width=0.8, + bottom=0, + color=Tango.colorsHex["mediumBlue"], + label=None, + **kwargs + ): + if "align" not in kwargs: + kwargs["align"] = "center" + return ax.bar( + x=x, + height=height, + width=width, + bottom=bottom, + label=label, + color=color, + **kwargs + ) - def xerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['darkRed'], label=None, **kwargs): - if not('linestyle' in kwargs or 'ls' in kwargs): - kwargs['ls'] = 'none' - #if Z is not None: + def xerrorbar( + self, ax, X, Y, error, color=Tango.colorsHex["darkRed"], label=None, **kwargs + ): + if not ("linestyle" in kwargs or "ls" in kwargs): + kwargs["ls"] = "none" + # if Z is not None: # return ax.errorbar(X, Y, Z, xerr=error, ecolor=color, label=label, **kwargs) return ax.errorbar(X, Y, xerr=error, ecolor=color, label=label, **kwargs) - def yerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['darkRed'], label=None, **kwargs): - if not('linestyle' in kwargs or 'ls' in kwargs): - kwargs['ls'] = 'none' - #if Z is not None: + def yerrorbar( + self, ax, X, Y, error, color=Tango.colorsHex["darkRed"], label=None, **kwargs + ): + if not ("linestyle" in kwargs or "ls" in kwargs): + kwargs["ls"] = "none" + # if Z is not None: # return ax.errorbar(X, Y, Z, yerr=error, ecolor=color, label=label, **kwargs) return ax.errorbar(X, Y, yerr=error, ecolor=color, label=label, **kwargs) - def imshow(self, ax, X, extent=None, label=None, vmin=None, vmax=None, **imshow_kwargs): - if 'origin' not in imshow_kwargs: - imshow_kwargs['origin'] = 'lower' - #xmin, xmax, ymin, ymax = extent - #xoffset, yoffset = (xmax - xmin) / (2. * X.shape[0]), (ymax - ymin) / (2. * X.shape[1]) - #xmin, xmax, ymin, ymax = extent = xmin-xoffset, xmax+xoffset, ymin-yoffset, ymax+yoffset - return ax.imshow(X, label=label, extent=extent, vmin=vmin, vmax=vmax, **imshow_kwargs) + def imshow( + self, ax, X, extent=None, label=None, vmin=None, vmax=None, **imshow_kwargs + ): + if "origin" not in imshow_kwargs: + imshow_kwargs["origin"] = "lower" + # xmin, xmax, ymin, ymax = extent + # xoffset, yoffset = (xmax - xmin) / (2. * X.shape[0]), (ymax - ymin) / (2. * X.shape[1]) + # xmin, xmax, ymin, ymax = extent = xmin-xoffset, xmax+xoffset, ymin-yoffset, ymax+yoffset + return ax.imshow( + X, label=label, extent=extent, vmin=vmin, vmax=vmax, **imshow_kwargs + ) - def imshow_interact(self, ax, plot_function, extent, label=None, resolution=None, vmin=None, vmax=None, **imshow_kwargs): - if imshow_kwargs is None: imshow_kwargs = {} - if 'origin' not in imshow_kwargs: - imshow_kwargs['origin'] = 'lower' - return ImshowController(ax, plot_function, extent, resolution=resolution, vmin=vmin, vmax=vmax, **imshow_kwargs) + def imshow_interact( + self, + ax, + plot_function, + extent, + label=None, + resolution=None, + vmin=None, + vmax=None, + **imshow_kwargs + ): + if imshow_kwargs is None: + imshow_kwargs = {} + if "origin" not in imshow_kwargs: + imshow_kwargs["origin"] = "lower" + return ImshowController( + ax, + plot_function, + extent, + resolution=resolution, + vmin=vmin, + vmax=vmax, + **imshow_kwargs + ) - def annotation_heatmap(self, ax, X, annotation, extent=None, label=None, imshow_kwargs=None, **annotation_kwargs): - if imshow_kwargs is None: imshow_kwargs = {} - if 'origin' not in imshow_kwargs: - imshow_kwargs['origin'] = 'lower' - if ('ha' not in annotation_kwargs) and ('horizontalalignment' not in annotation_kwargs): - annotation_kwargs['ha'] = 'center' - if ('va' not in annotation_kwargs) and ('verticalalignment' not in annotation_kwargs): - annotation_kwargs['va'] = 'center' + def annotation_heatmap( + self, + ax, + X, + annotation, + extent=None, + label=None, + imshow_kwargs=None, + **annotation_kwargs + ): + if imshow_kwargs is None: + imshow_kwargs = {} + if "origin" not in imshow_kwargs: + imshow_kwargs["origin"] = "lower" + if ("ha" not in annotation_kwargs) and ( + "horizontalalignment" not in annotation_kwargs + ): + annotation_kwargs["ha"] = "center" + if ("va" not in annotation_kwargs) and ( + "verticalalignment" not in annotation_kwargs + ): + annotation_kwargs["va"] = "center" imshow = self.imshow(ax, X, extent, label, **imshow_kwargs) if extent is None: extent = (0, X.shape[0], 0, X.shape[1]) xmin, xmax, ymin, ymax = extent - xoffset, yoffset = (xmax - xmin) / (2. * X.shape[0]), (ymax - ymin) / (2. * X.shape[1]) + xoffset, yoffset = (xmax - xmin) / (2.0 * X.shape[0]), (ymax - ymin) / ( + 2.0 * X.shape[1] + ) xlin = np.linspace(xmin, xmax, X.shape[0], endpoint=False) ylin = np.linspace(ymin, ymax, X.shape[1], endpoint=False) annotations = [] for [i, x], [j, y] in itertools.product(enumerate(xlin), enumerate(ylin)): - annotations.append(ax.text(x+xoffset, y+yoffset, "{}".format(annotation[j, i]), **annotation_kwargs)) + annotations.append( + ax.text( + x + xoffset, + y + yoffset, + "{}".format(annotation[j, i]), + **annotation_kwargs + ) + ) return imshow, annotations - def annotation_heatmap_interact(self, ax, plot_function, extent, label=None, resolution=15, imshow_kwargs=None, **annotation_kwargs): - if imshow_kwargs is None: imshow_kwargs = {} - if 'origin' not in imshow_kwargs: - imshow_kwargs['origin'] = 'lower' - return ImAnnotateController(ax, plot_function, extent, resolution=resolution, imshow_kwargs=imshow_kwargs or {}, **annotation_kwargs) + def annotation_heatmap_interact( + self, + ax, + plot_function, + extent, + label=None, + resolution=15, + imshow_kwargs=None, + **annotation_kwargs + ): + if imshow_kwargs is None: + imshow_kwargs = {} + if "origin" not in imshow_kwargs: + imshow_kwargs["origin"] = "lower" + return ImAnnotateController( + ax, + plot_function, + extent, + resolution=resolution, + imshow_kwargs=imshow_kwargs or {}, + **annotation_kwargs + ) def contour(self, ax, X, Y, C, levels=20, label=None, **kwargs): - return ax.contour(X, Y, C, levels=np.linspace(C.min(), C.max(), levels), label=label, **kwargs) + return ax.contour( + X, Y, C, levels=np.linspace(C.min(), C.max(), levels), label=label, **kwargs + ) def surface(self, ax, X, Y, Z, color=None, label=None, **kwargs): return ax.plot_surface(X, Y, Z, label=label, **kwargs) - def fill_between(self, ax, X, lower, upper, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs): + def fill_between( + self, + ax, + X, + lower, + upper, + color=Tango.colorsHex["mediumBlue"], + label=None, + **kwargs + ): return ax.fill_between(X, lower, upper, facecolor=color, label=label, **kwargs) - def fill_gradient(self, canvas, X, percentiles, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs): + def fill_gradient( + self, + canvas, + X, + percentiles, + color=Tango.colorsHex["mediumBlue"], + label=None, + **kwargs + ): ax = canvas plots = [] - if 'edgecolors' not in kwargs: - kwargs['edgecolors'] = 'none' + if "edgecolors" not in kwargs: + kwargs["edgecolors"] = "none" - if 'facecolors' in kwargs: - color = kwargs.pop('facecolors') + if "facecolors" in kwargs: + color = kwargs.pop("facecolors") - if 'array' in kwargs: - array = kwargs.pop('array') + if "array" in kwargs: + array = kwargs.pop("array") else: - array = 1.-np.abs(np.linspace(-.97, .97, len(percentiles)-1)) + array = 1.0 - np.abs(np.linspace(-0.97, 0.97, len(percentiles) - 1)) - if 'alpha' in kwargs: - alpha = kwargs.pop('alpha') + if "alpha" in kwargs: + alpha = kwargs.pop("alpha") else: - alpha = .8 + alpha = 0.8 - if 'cmap' in kwargs: - cmap = kwargs.pop('cmap') + if "cmap" in kwargs: + cmap = kwargs.pop("cmap") else: - cmap = LinearSegmentedColormap.from_list('WhToColor', (color, color), N=array.size) + cmap = LinearSegmentedColormap.from_list( + "WhToColor", (color, color), N=array.size + ) cmap._init() - cmap._lut[:-3, -1] = alpha*array + cmap._lut[:-3, -1] = alpha * array - kwargs['facecolors'] = [cmap(i) for i in np.linspace(0,1,cmap.N)] + kwargs["facecolors"] = [cmap(i) for i in np.linspace(0, 1, cmap.N)] # pop where from kwargs - where = kwargs.pop('where') if 'where' in kwargs else None + where = kwargs.pop("where") if "where" in kwargs else None # pop interpolate, which we actually do not do here! - if 'interpolate' in kwargs: kwargs.pop('interpolate') + if "interpolate" in kwargs: + kwargs.pop("interpolate") def pairwise(iterable): "s -> (s0,s1), (s1,s2), (s2, s3), ..." from itertools import tee - #try: + + # try: # from itertools import izip as zip - #except ImportError: + # except ImportError: # pass a, b = tee(iterable) next(b, None) @@ -245,6 +407,7 @@ class MatplotlibPlots(AbstractPlottingLibrary): ax._process_unit_info(ydata=y2) # Convert the arrays so we can work with them from numpy import ma + x = ma.masked_invalid(ax.convert_xunits(X)) y1 = ma.masked_invalid(ax.convert_yunits(y1)) y2 = ma.masked_invalid(ax.convert_yunits(y2)) @@ -263,6 +426,7 @@ class MatplotlibPlots(AbstractPlottingLibrary): raise ValueError("Argument dimensions are incompatible") from functools import reduce + mask = reduce(ma.mask_or, [ma.getmask(a) for a in (x, y1, y2)]) if mask is not ma.nomask: where &= ~mask @@ -277,7 +441,7 @@ class MatplotlibPlots(AbstractPlottingLibrary): continue N = len(xslice) - p = np.zeros((2 * N + 2, 2), np.float) + p = np.zeros((2 * N + 2, 2), float) # the purpose of the next two lines is for when y2 is a # scalar like 0 and we want the fill to go all the way @@ -288,16 +452,17 @@ class MatplotlibPlots(AbstractPlottingLibrary): p[0] = start p[N + 1] = end - p[1:N + 1, 0] = xslice - p[1:N + 1, 1] = y1slice - p[N + 2:, 0] = xslice[::-1] - p[N + 2:, 1] = y2slice[::-1] + p[1 : N + 1, 0] = xslice + p[1 : N + 1, 1] = y1slice + p[N + 2 :, 0] = xslice[::-1] + p[N + 2 :, 1] = y2slice[::-1] polys.append(p) polycol.extend(polys) from matplotlib.collections import PolyCollection - if 'zorder' not in kwargs: - kwargs['zorder'] = 0 + + if "zorder" not in kwargs: + kwargs["zorder"] = 0 plots.append(PolyCollection(polycol, label=label, **kwargs)) ax.add_collection(plots[-1], autolim=True) ax.autoscale_view() diff --git a/GPy/testing/test_ep_likelihood.py b/GPy/testing/test_ep_likelihood.py index 2ab42617..67bea0a4 100644 --- a/GPy/testing/test_ep_likelihood.py +++ b/GPy/testing/test_ep_likelihood.py @@ -24,7 +24,7 @@ class TestObservationModels: self.Y = (np.sin(self.X[:, 0] * 2 * np.pi) + noise)[:, None] self.num_points = self.X.shape[0] self.f = np.random.rand(self.N, 1) - self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None] + self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=int)[:, None] # self.binary_Y[self.binary_Y == 0.0] = -1.0 self.positive_Y = np.exp(self.Y.copy()) diff --git a/GPy/testing/test_likelihood.py b/GPy/testing/test_likelihood.py index ce82b9c0..f35bd0f3 100644 --- a/GPy/testing/test_likelihood.py +++ b/GPy/testing/test_likelihood.py @@ -136,7 +136,7 @@ class TestNoiseModels: noise = np.random.randn(*self.X[:, 0].shape) * self.real_std self.Y = (np.sin(self.X[:, 0] * 2 * np.pi) + noise)[:, None] self.f = np.random.rand(self.N, 1) - self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None] + self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=int)[:, None] self.binary_Y[self.binary_Y == 0.0] = -1.0 self.positive_Y = np.exp(self.Y.copy()) tmp = ( diff --git a/GPy/testing/test_model.py b/GPy/testing/test_model.py index 44b2c0a6..f78885af 100644 --- a/GPy/testing/test_model.py +++ b/GPy/testing/test_model.py @@ -1432,8 +1432,8 @@ class TestGradient: y = np.zeros((D * N_train,)) x_test = np.zeros((D * (N - N_train),)) y_test = np.zeros((D * (N - N_train),)) - indexD = np.zeros((D * N_train), dtype=np.int) - indexD_test = np.zeros((D * (N - N_train)), dtype=np.int) + indexD = np.zeros((D * N_train), dtype=int) + indexD_test = np.zeros((D * (N - N_train)), dtype=int) offset_all = 0 offset_train = 0 diff --git a/GPy/testing/test_pickle.py b/GPy/testing/test_pickle.py index dea50889..6783336f 100644 --- a/GPy/testing/test_pickle.py +++ b/GPy/testing/test_pickle.py @@ -53,7 +53,7 @@ class TestPickleSupport(ListDictTestCase): assert par.param_array.tolist() == pcopy.param_array.tolist() np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full) assert str(par) == str(pcopy) - assert par.param_array != pcopy.param_array + assert np.all(par.param_array != pcopy.param_array) assert par.gradient_full != pcopy.gradient_full assert pcopy.checkgrad() assert np.any(pcopy.gradient != 0.0) @@ -72,7 +72,7 @@ class TestPickleSupport(ListDictTestCase): np.testing.assert_allclose(par.param_array, pcopy.param_array) np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full) assert str(par) == str(pcopy) - assert par.param_array != pcopy.param_array + assert np.all(par.param_array != pcopy.param_array) assert par.gradient_full != pcopy.gradient_full assert pcopy.checkgrad() assert np.any(pcopy.gradient != 0.0) @@ -97,7 +97,7 @@ class TestPickleSupport(ListDictTestCase): assert par.param_array.tolist() == pcopy.param_array.tolist() assert par.gradient_full.tolist() == pcopy.gradient_full.tolist() assert str(par) == str(pcopy) - assert par.param_array != pcopy.param_array + assert np.all(par.param_array != pcopy.param_array) assert par.gradient_full != pcopy.gradient_full with tempfile.TemporaryFile("w+b") as f: par.pickle(f) @@ -116,7 +116,7 @@ class TestPickleSupport(ListDictTestCase): assert par.param_array.tolist() == pcopy.param_array.tolist() assert par.gradient_full.tolist() == pcopy.gradient_full.tolist() assert str(par) == str(pcopy) - assert par.param_array != pcopy.param_array + assert np.all(par.param_array != pcopy.param_array) assert par.gradient_full != pcopy.gradient_full assert par.checkgrad() assert pcopy.checkgrad() diff --git a/GPy/util/classification.py b/GPy/util/classification.py index 69609091..bb321729 100644 --- a/GPy/util/classification.py +++ b/GPy/util/classification.py @@ -2,7 +2,8 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -def conf_matrix(p,labels,names=['1','0'],threshold=.5,show=True): + +def conf_matrix(p, labels, names=["1", "0"], threshold=0.5, show=True): """ Returns error rate and true/false positives in a binary classification problem - Actual classes are displayed by column. @@ -16,18 +17,18 @@ def conf_matrix(p,labels,names=['1','0'],threshold=.5,show=True): :type show: False|True """ assert p.size == labels.size, "Arrays p and labels have different dimensions." - decision = np.ones((labels.size,1)) - decision[p>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]] """ - if len(index)==0: - return[] + if len(index) == 0: + return [] - #contruct the return structure - ind = np.asarray(index,dtype=np.int) - ret = [[] for i in range(ind.max()+1)] + # contruct the return structure + ind = np.asarray(index, dtype=int) + ret = [[] for i in range(ind.max() + 1)] - #find the switchpoints - ind_ = np.hstack((ind,ind[0]+ind[-1]+1)) - switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0] + # find the switchpoints + ind_ = np.hstack((ind, ind[0] + ind[-1] + 1)) + switchpoints = np.nonzero(ind_ - np.roll(ind_, +1))[0] - [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))] + [ + ret[ind_i].append(slice(*indexes_i)) + for ind_i, indexes_i in zip( + ind[switchpoints[:-1]], zip(switchpoints, switchpoints[1:]) + ) + ] return ret + def get_slices(input_list): num_outputs = len(input_list) - _s = [0] + [ _x.shape[0] for _x in input_list ] + _s = [0] + [_x.shape[0] for _x in input_list] _s = np.cumsum(_s) - slices = [slice(a,b) for a,b in zip(_s[:-1],_s[1:])] + slices = [slice(a, b) for a, b in zip(_s[:-1], _s[1:])] return slices -def build_XY(input_list,output_list=None,index=None): + +def build_XY(input_list, output_list=None, index=None): num_outputs = len(input_list) if output_list is not None: assert num_outputs == len(output_list) @@ -47,27 +55,35 @@ def build_XY(input_list,output_list=None,index=None): if index is not None: assert len(index) == num_outputs - I = np.hstack( [np.repeat(j,_x.shape[0]) for _x,j in zip(input_list,index)] ) + I = np.hstack([np.repeat(j, _x.shape[0]) for _x, j in zip(input_list, index)]) else: - I = np.hstack( [np.repeat(j,_x.shape[0]) for _x,j in zip(input_list,range(num_outputs))] ) + I = np.hstack( + [np.repeat(j, _x.shape[0]) for _x, j in zip(input_list, range(num_outputs))] + ) X = np.vstack(input_list) - X = np.hstack([X,I[:,None]]) + X = np.hstack([X, I[:, None]]) - return X,Y,I[:,None]#slices + return X, Y, I[:, None] # slices -def build_likelihood(Y_list,noise_index,likelihoods_list=None): + +def build_likelihood(Y_list, noise_index, likelihoods_list=None): Ny = len(Y_list) if likelihoods_list is None: - likelihoods_list = [GPy.likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for y,j in zip(Y_list,range(Ny))] + likelihoods_list = [ + GPy.likelihoods.Gaussian(name="Gaussian_noise_%s" % j) + for y, j in zip(Y_list, range(Ny)) + ] else: assert len(likelihoods_list) == Ny - #likelihood = GPy.likelihoods.mixed_noise.MixedNoise(likelihoods_list=likelihoods_list, noise_index=noise_index) - likelihood = GPy.likelihoods.mixed_noise.MixedNoise(likelihoods_list=likelihoods_list) + # likelihood = GPy.likelihoods.mixed_noise.MixedNoise(likelihoods_list=likelihoods_list, noise_index=noise_index) + likelihood = GPy.likelihoods.mixed_noise.MixedNoise( + likelihoods_list=likelihoods_list + ) return likelihood -def ICM(input_dim, num_outputs, kernel, W_rank=1,W=None,kappa=None,name='ICM'): +def ICM(input_dim, num_outputs, kernel, W_rank=1, W=None, kappa=None, name="ICM"): """ Builds a kernel for an Intrinsic Coregionalization Model @@ -80,13 +96,26 @@ def ICM(input_dim, num_outputs, kernel, W_rank=1,W=None,kappa=None,name='ICM'): """ if kernel.input_dim != input_dim: kernel.input_dim = input_dim - warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") + warnings.warn( + "kernel's input dimension overwritten to fit input_dim parameter." + ) - K = kernel.prod(GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B'),name=name) + K = kernel.prod( + GPy.kern.Coregionalize( + 1, + num_outputs, + active_dims=[input_dim], + rank=W_rank, + W=W, + kappa=kappa, + name="B", + ), + name=name, + ) return K -def LCM(input_dim, num_outputs, kernels_list, W_rank=1,name='ICM'): +def LCM(input_dim, num_outputs, kernels_list, W_rank=1, name="ICM"): """ Builds a kernel for an Linear Coregionalization Model @@ -98,15 +127,15 @@ def LCM(input_dim, num_outputs, kernels_list, W_rank=1,name='ICM'): :type W_rank: integer """ Nk = len(kernels_list) - K = ICM(input_dim,num_outputs,kernels_list[0],W_rank,name='%s%s' %(name,0)) + K = ICM(input_dim, num_outputs, kernels_list[0], W_rank, name="%s%s" % (name, 0)) j = 1 for kernel in kernels_list[1:]: - K += ICM(input_dim,num_outputs,kernel,W_rank,name='%s%s' %(name,j)) + K += ICM(input_dim, num_outputs, kernel, W_rank, name="%s%s" % (name, j)) j += 1 return K -def Private(input_dim, num_outputs, kernel, output, kappa=None,name='X'): +def Private(input_dim, num_outputs, kernel, output, kappa=None, name="X"): """ Builds a kernel for an Intrinsic Coregionalization Model @@ -117,7 +146,7 @@ def Private(input_dim, num_outputs, kernel, output, kappa=None,name='X'): :param W_rank: number tuples of the corregionalization parameters 'W' :type W_rank: integer """ - K = ICM(input_dim,num_outputs,kernel,W_rank=1,kappa=kappa,name=name) + K = ICM(input_dim, num_outputs, kernel, W_rank=1, kappa=kappa, name=name) K.B.W.fix(0) _range = range(num_outputs) _range.pop(output)