From 726d76c427a640c7f28271ee8e92d5c8505fb7fa Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Tue, 3 May 2016 14:37:55 +0100 Subject: [PATCH 1/7] [examples] dim reduction plotting changes --- GPy/examples/dimensionality_reduction.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index ce1c89e8..f1df3cf9 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -340,7 +340,7 @@ def bgplvm_simulation(optimize=True, verbose=1, gtol=.05) if plot: m.X.plot("BGPLVM Latent Space 1D") - m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') + m.kern.plot_ARD() return m def gplvm_simulation(optimize=True, verbose=1, @@ -364,7 +364,7 @@ def gplvm_simulation(optimize=True, verbose=1, gtol=.05) if plot: m.X.plot("BGPLVM Latent Space 1D") - m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') + m.kern.plot_ARD() return m def ssgplvm_simulation(optimize=True, verbose=1, plot=True, plot_sim=False, @@ -388,7 +388,7 @@ def ssgplvm_simulation(optimize=True, verbose=1, gtol=.05) if plot: m.X.plot("SSGPLVM Latent Space 1D") - m.kern.plot_ARD('SSGPLVM Simulation ARD Parameters') + m.kern.plot_ARD() return m def bgplvm_simulation_missing_data(optimize=True, verbose=1, @@ -418,7 +418,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, gtol=.05) if plot: m.X.plot("BGPLVM Latent Space 1D") - m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') + m.kern.plot_ARD() return m def bgplvm_simulation_missing_data_stochastics(optimize=True, verbose=1, @@ -448,7 +448,7 @@ def bgplvm_simulation_missing_data_stochastics(optimize=True, verbose=1, gtol=.05) if plot: m.X.plot("BGPLVM Latent Space 1D") - m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') + m.kern.plot_ARD() return m @@ -469,7 +469,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): m.optimize(messages=verbose, max_iters=8e3) if plot: m.X.plot("MRD Latent Space 1D") - m.plot_scales("MRD Scales") + m.plot_scales() return m def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): @@ -496,7 +496,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1) if plot: m.X.plot("MRD Latent Space 1D") - m.plot_scales("MRD Scales") + m.plot_scales() return m def brendan_faces(optimize=True, verbose=True, plot=True): From 22d7e2042909a8aa5ab5c0a52dc3fd05ff319493 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Wed, 11 May 2016 14:42:25 +0100 Subject: [PATCH 2/7] =?UTF-8?q?Bump=20version:=201.0.7=20=E2=86=92=201.0.8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GPy/__version__.py | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/__version__.py b/GPy/__version__.py index 9e604c04..e13bd590 100644 --- a/GPy/__version__.py +++ b/GPy/__version__.py @@ -1 +1 @@ -__version__ = "1.0.7" +__version__ = "1.0.8" diff --git a/setup.cfg b/setup.cfg index 652cc7a2..510a146c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.0.7 +current_version = 1.0.8 tag = False commit = True From 375f3e0d6c7ee2656f964ba1ad70e03e1cb4aba8 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Wed, 11 May 2016 15:02:44 +0100 Subject: [PATCH 3/7] [setxy] was bugged --- GPy/core/gp.py | 8 +++++--- GPy/testing/gp_tests.py | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 1434573a..decca6b8 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -148,14 +148,16 @@ class GP(Model): # LVM models if isinstance(self.X, VariationalPosterior): assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!" + index = self.X._parent_index_ self.unlink_parameter(self.X) self.X = X - self.link_parameter(self.X) + self.link_parameter(self.X, index=index) else: + index = self.X._parent_index_ self.unlink_parameter(self.X) from ..core import Param - self.X = Param('latent mean',X) - self.link_parameter(self.X) + self.X = Param('latent mean', X) + self.link_parameter(self.X, index=index) else: self.X = ObsAr(X) self.update_model(True) diff --git a/GPy/testing/gp_tests.py b/GPy/testing/gp_tests.py index 3ce3ffc4..97e3718d 100644 --- a/GPy/testing/gp_tests.py +++ b/GPy/testing/gp_tests.py @@ -24,9 +24,9 @@ class Test(unittest.TestCase): k = GPy.kern.RBF(1) m = GPy.models.BayesianGPLVM(self.Y, 1, kernel=k) mu, var = m.predict(m.X) - X = m.X.copy() + X = m.X Xnew = NormalPosterior(m.X.mean[:10].copy(), m.X.variance[:10].copy()) - m.set_XY(Xnew, m.Y[:10]) + m.set_XY(Xnew, m.Y[:10].copy()) assert(m.checkgrad()) m.set_XY(X, self.Y) mu2, var2 = m.predict(m.X) @@ -40,7 +40,7 @@ class Test(unittest.TestCase): mu, var = m.predict(m.X) X = m.X.copy() Xnew = X[:10].copy() - m.set_XY(Xnew, m.Y[:10]) + m.set_XY(Xnew, m.Y[:10].copy()) assert(m.checkgrad()) m.set_XY(X, self.Y) mu2, var2 = m.predict(m.X) From a20f93850dbd009447de1979dc14153d028ae0b7 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Wed, 11 May 2016 15:02:53 +0100 Subject: [PATCH 4/7] =?UTF-8?q?Bump=20version:=201.0.8=20=E2=86=92=201.0.9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GPy/__version__.py | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/GPy/__version__.py b/GPy/__version__.py index e13bd590..39e0411d 100644 --- a/GPy/__version__.py +++ b/GPy/__version__.py @@ -1 +1 @@ -__version__ = "1.0.8" +__version__ = "1.0.9" diff --git a/setup.cfg b/setup.cfg index 510a146c..0f00211e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.0.8 +current_version = 1.0.9 tag = False commit = True From f8441433539c4ae6f17f3186ec3375a5167112c6 Mon Sep 17 00:00:00 2001 From: cdguarnizo Date: Fri, 20 May 2016 02:03:55 -0500 Subject: [PATCH 5/7] Add eq_ode1 kern and ibp_lfm model --- GPy/kern/__init__.py | 1 + GPy/kern/src/eq_ode1.py | 612 ++++++++++++++++++++++++++++++++++++++++ GPy/models/__init__.py | 2 + GPy/models/ibp_lfm.py | 535 +++++++++++++++++++++++++++++++++++ 4 files changed, 1150 insertions(+) create mode 100644 GPy/kern/src/eq_ode1.py create mode 100644 GPy/models/ibp_lfm.py diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 7f44b6a9..4a2201b1 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -24,6 +24,7 @@ from .src.ODE_st import ODE_st from .src.ODE_t import ODE_t from .src.poly import Poly from .src.eq_ode2 import EQ_ODE2 +from .src.eq_ode1 import EQ_ODE1 from .src.trunclinear import TruncLinear,TruncLinear_inf from .src.splitKern import SplitKern,DEtime from .src.splitKern import DEtime as DiffGenomeKern diff --git a/GPy/kern/src/eq_ode1.py b/GPy/kern/src/eq_ode1.py new file mode 100644 index 00000000..7b218068 --- /dev/null +++ b/GPy/kern/src/eq_ode1.py @@ -0,0 +1,612 @@ +# Copyright (c) 2014, Cristian Guarnizo. +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from scipy.special import erf, erfcx +from .kern import Kern +from ...core.parameterization import Param +from paramz.transformations import Logexp +from paramz.caching import Cache_this + +class EQ_ODE1(Kern): + """ + Covariance function for first order differential equation driven by an exponentiated quadratic covariance. + + This outputs of this kernel have the form + .. math:: + \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} u_i(t-\delta_j) - d_jy_j(t) + + where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`u_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance. + + :param output_dim: number of outputs driven by latent function. + :type output_dim: int + :param W: sensitivities of each output to the latent driving function. + :type W: ndarray (output_dim x rank). + :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance. + :type rank: int + :param decay: decay rates for the first order system. + :type decay: array of length output_dim. + :param delay: delay between latent force and output response. + :type delay: array of length output_dim. + :param kappa: diagonal term that allows each latent output to have an independent component to the response. + :type kappa: array of length output_dim. + + .. Note: see first order differential equation examples in GPy.examples.regression for some usage. + """ + def __init__(self, input_dim=2, output_dim=1, rank=1, W = None, lengthscale=None, decay=None, active_dims=None, name='eq_ode1'): + assert input_dim == 2, "only defined for 1 input dims" + super(EQ_ODE1, self).__init__(input_dim=input_dim, active_dims=active_dims, name=name) + + self.rank = rank + self.output_dim = output_dim + + if lengthscale is None: + lengthscale = .5 + np.random.rand(self.rank) + else: + lengthscale = np.asarray(lengthscale) + assert lengthscale.size in [1, self.rank], "Bad number of lengthscales" + if lengthscale.size != self.rank: + lengthscale = np.ones(self.rank)*lengthscale + + if W is None: + W = .5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank) + else: + assert W.shape == (self.output_dim, self.rank) + + if decay is None: + decay = np.ones(self.output_dim) + else: + decay = np.asarray(decay) + assert decay.size in [1, self.output_dim], "Bad number of decay" + if decay.size != self.output_dim: + decay = np.ones(self.output_dim)*decay + +# if kappa is None: +# self.kappa = np.ones(self.output_dim) +# else: +# kappa = np.asarray(kappa) +# assert kappa.size in [1, self.output_dim], "Bad number of kappa" +# if decay.size != self.output_dim: +# decay = np.ones(self.output_dim)*kappa + + #self.kappa = Param('kappa', kappa, Logexp()) + #self.delay = Param('delay', delay, Logexp()) + #self.is_normalized = True + #self.is_stationary = False + #self.gaussian_initial = False + + self.lengthscale = Param('lengthscale', lengthscale, Logexp()) + self.decay = Param('decay', decay, Logexp()) + self.W = Param('W', W) + self.link_parameters(self.lengthscale, self.decay, self.W) + + @Cache_this(limit=3) + def K(self, X, X2=None): + #This way is not working, indexes are lost after using k._slice_X + #index = np.asarray(X, dtype=np.int) + #index = index.reshape(index.size,) + if hasattr(X, 'values'): + X = X.values + index = np.int_(np.round(X[:, 1])) + index = index.reshape(index.size,) + X_flag = index[0] >= self.output_dim + if X2 is None: + if X_flag: + #Calculate covariance function for the latent functions + index -= self.output_dim + return self._Kuu(X, index) + else: + raise NotImplementedError + else: + #This way is not working, indexes are lost after using k._slice_X + #index2 = np.asarray(X2, dtype=np.int) + #index2 = index2.reshape(index2.size,) + if hasattr(X2, 'values'): + X2 = X2.values + index2 = np.int_(np.round(X2[:, 1])) + index2 = index2.reshape(index2.size,) + X2_flag = index2[0] >= self.output_dim + #Calculate cross-covariance function + if not X_flag and X2_flag: + index2 -= self.output_dim + return self._Kfu(X, index, X2, index2) #Kfu + else: + index -= self.output_dim + return self._Kfu(X2, index2, X, index).T #Kuf + + #Calculate the covariance function for diag(Kff(X,X)) + def Kdiag(self, X): + #This way is not working, indexes are lost after using k._slice_X + #index = np.asarray(X, dtype=np.int) + #index = index.reshape(index.size,) + if hasattr(X, 'values'): + X = X.values + index = np.int_(X[:, 1]) + index = index.reshape(index.size,) + + #terms that move along t + t = X[:, 0].reshape(X.shape[0], 1) + d = np.unique(index) #Output Indexes + B = self.decay.values[d] + S = self.W.values[d, :] + #Index transformation + indd = np.arange(self.output_dim) + indd[d] = np.arange(d.size) + index = indd[index] + + B = B.reshape(B.size, 1) + #Terms that move along q + lq = self.lengthscale.values.reshape(1, self.rank) + S2 = S*S + kdiag = np.empty((t.size, )) + + #Dx1 terms + c0 = (S2/B)*((.5*np.sqrt(np.pi))*lq) + + #DxQ terms + nu = lq*(B*.5) + nu2 = nu*nu + #Nx1 terms + gamt = -2.*B + gamt = gamt[index]*t + + #NxQ terms + t_lq = t/lq + + # Upsilon Calculations + # Using wofz + #erfnu = erf(nu) + + upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :] ,t_lq+nu[index,:] )) + upm[t[:, 0] == 0, :] = 0. + + + upv = np.exp(nu2[index, :] + gamt + lnDifErf( -t_lq+nu[index,:], nu[index, :] ) ) + upv[t[:, 0] == 0, :] = 0. + + #Covariance calculation + #kdiag = np.sum(c0[index, :]*(upm-upv), axis=1) + kdiag = c0[index, :]*(upm-upv) + return kdiag + + def update_gradients_full(self, dL_dK, X, X2 = None): + #index = np.asarray(X, dtype=np.int) + #index = index.reshape(index.size,) + if hasattr(X, 'values'): + X = X.values + self.decay.gradient = np.zeros(self.decay.shape) + self.W.gradient = np.zeros(self.W.shape) + self.lengthscale.gradient = np.zeros(self.lengthscale.shape) + index = np.int_(np.round(X[:, 1])) + index = index.reshape(index.size,) + X_flag = index[0] >= self.output_dim + if X2 is None: + if X_flag: #Kuu or Kmm + index -= self.output_dim + tmp = dL_dK*self._gkuu_lq(X, index) + for q in np.unique(index): + ind = np.where(index == q) + self.lengthscale.gradient[q] = tmp[np.ix_(ind[0], ind[0])].sum() + else: + raise NotImplementedError + else: #Kfu or Knm + #index2 = np.asarray(X2, dtype=np.int) + #index2 = index2.reshape(index2.size,) + if hasattr(X2, 'values'): + X2 = X2.values + index2 = np.int_(np.round(X2[:, 1])) + index2 = index2.reshape(index2.size,) + X2_flag = index2[0] >= self.output_dim + if not X_flag and X2_flag: #Kfu + index2 -= self.output_dim + else: #Kuf + dL_dK = dL_dK.T #so we obtaing dL_Kfu + indtemp = index - self.output_dim + Xtemp = X + X = X2 + X2 = Xtemp + index = index2 + index2 = indtemp + glq, gSdq, gB = self._gkfu(X, index, X2, index2) + tmp = dL_dK*glq + for q in np.unique(index2): + ind = np.where(index2 == q) + self.lengthscale.gradient[q] = tmp[:, ind].sum() + tmpB = dL_dK*gB + tmp = dL_dK*gSdq + for d in np.unique(index): + ind = np.where(index == d) + self.decay.gradient[d] = tmpB[ind, :].sum() + for q in np.unique(index2): + ind2 = np.where(index2 == q) + self.W.gradient[d, q] = tmp[np.ix_(ind[0], ind2[0])].sum() + + def update_gradients_diag(self, dL_dKdiag, X): + #index = np.asarray(X, dtype=np.int) + #index = index.reshape(index.size,) + if hasattr(X, 'values'): + X = X.values + self.decay.gradient = np.zeros(self.decay.shape) + self.W.gradient = np.zeros(self.W.shape) + self.lengthscale.gradient = np.zeros(self.lengthscale.shape) + index = np.int_(X[:, 1]) + index = index.reshape(index.size,) + + glq, gS, gB = self._gkdiag(X, index) + if dL_dKdiag.size == X.shape[0]: + dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1)) + tmp = dL_dKdiag*glq + self.lengthscale.gradient = tmp.sum(0) + tmpB = dL_dKdiag*gB + tmp = dL_dKdiag*gS + for d in np.unique(index): + ind = np.where(index == d) + self.decay.gradient[d] = tmpB[ind, :].sum() + self.W.gradient[d, :] = tmp[ind].sum(0) + + def gradients_X(self, dL_dK, X, X2=None): + #index = np.asarray(X, dtype=np.int) + #index = index.reshape(index.size,) + if hasattr(X, 'values'): + X = X.values + index = np.int_(np.round(X[:, 1])) + index = index.reshape(index.size,) + X_flag = index[0] >= self.output_dim + #If input_dim == 1, use this + #gX = np.zeros((X.shape[0], 1)) + #Cheat to allow gradient for input_dim==2 + gX = np.zeros(X.shape) + if X2 is None: #Kuu or Kmm + if X_flag: + index -= self.output_dim + gX[:, 0] = 2.*(dL_dK*self._gkuu_X(X, index)).sum(0) + return gX + else: + raise NotImplementedError + else: #Kuf or Kmn + #index2 = np.asarray(X2, dtype=np.int) + #index2 = index2.reshape(index2.size,) + if hasattr(X2, 'values'): + X2 = X2.values + index2 = np.int_(np.round(X2[:, 1])) + index2 = index2.reshape(index2.size,) + X2_flag = index2[0] >= self.output_dim + if X_flag and not X2_flag: #gradient of Kuf(Z, X) wrt Z + index -= self.output_dim + gX[:, 0] = (dL_dK*self._gkfu_z(X2, index2, X, index).T).sum(1) + return gX + else: + raise NotImplementedError + + #---------------------------------------# + # Helper functions # + #---------------------------------------# + + #Evaluation of squared exponential for LFM + def _Kuu(self, X, index): + index = index.reshape(index.size,) + t = X[:, 0].reshape(X.shape[0],) + lq = self.lengthscale.values.reshape(self.rank,) + lq2 = lq*lq + #Covariance matrix initialization + kuu = np.zeros((t.size, t.size)) + #Assign 1. to diagonal terms + kuu[np.diag_indices(t.size)] = 1. + #Upper triangular indices + indtri1, indtri2 = np.triu_indices(t.size, 1) + #Block Diagonal indices among Upper Triangular indices + ind = np.where(index[indtri1] == index[indtri2]) + indr = indtri1[ind] + indc = indtri2[ind] + r = t[indr] - t[indc] + r2 = r*r + #Calculation of covariance function + kuu[indr, indc] = np.exp(-r2/lq2[index[indr]]) + #Completion of lower triangular part + kuu[indc, indr] = kuu[indr, indc] + return kuu + + #Evaluation of cross-covariance function + def _Kfu(self, X, index, X2, index2): + #terms that move along t + t = X[:, 0].reshape(X.shape[0], 1) + d = np.unique(index) #Output Indexes + B = self.decay.values[d] + S = self.W.values[d, :] + #Index transformation + indd = np.arange(self.output_dim) + indd[d] = np.arange(d.size) + index = indd[index] + #Output related variables must be column-wise + B = B.reshape(B.size, 1) + #Input related variables must be row-wise + z = X2[:, 0].reshape(1, X2.shape[0]) + lq = self.lengthscale.values.reshape((1, self.rank)) + + kfu = np.empty((t.size, z.size)) + + #DxQ terms + c0 = S*((.5*np.sqrt(np.pi))*lq) + nu = B*(.5*lq) + nu2 = nu**2 + #1xM terms + z_lq = z/lq[0, index2] + #NxM terms + tz = t-z + tz_lq = tz/lq[0, index2] + + # Upsilon Calculations + fullind = np.ix_(index, index2) + + upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind])) + upsi[t[:, 0] == 0, :] = 0. + #Covariance calculation + kfu = c0[fullind]*upsi + + return kfu + + #Gradient of Kuu wrt lengthscale + def _gkuu_lq(self, X, index): + t = X[:, 0].reshape(X.shape[0],) + index = index.reshape(X.shape[0],) + lq = self.lengthscale.values.reshape(self.rank,) + lq2 = lq*lq + #Covariance matrix initialization + glq = np.zeros((t.size, t.size)) + #Upper triangular indices + indtri1, indtri2 = np.triu_indices(t.size, 1) + #Block Diagonal indices among Upper Triangular indices + ind = np.where(index[indtri1] == index[indtri2]) + indr = indtri1[ind] + indc = indtri2[ind] + r = t[indr] - t[indc] + r2 = r*r + r2_lq2 = r2/lq2[index[indr]] + #Calculation of covariance function + er2_lq2 = np.exp(-r2_lq2) + #Gradient wrt lq + c = 2.*r2_lq2/lq[index[indr]] + glq[indr, indc] = er2_lq2*c + #Complete the lower triangular + glq[indc, indr] = glq[indr, indc] + return glq + + #Be careful this derivative should be transpose it + def _gkuu_X(self, X, index): #Diagonal terms are always zero + t = X[:, 0].reshape(X.shape[0],) + index = index.reshape(index.size,) + lq = self.lengthscale.values.reshape(self.rank,) + lq2 = lq*lq + #Covariance matrix initialization + gt = np.zeros((t.size, t.size)) + #Upper triangular indices + indtri1, indtri2 = np.triu_indices(t.size, 1) #Offset of 1 from the diagonal + #Block Diagonal indices among Upper Triangular indices + ind = np.where(index[indtri1] == index[indtri2]) + indr = indtri1[ind] + indc = indtri2[ind] + r = t[indr] - t[indc] + r2 = r*r + r2_lq2 = r2/(-lq2[index[indr]]) + #Calculation of covariance function + er2_lq2 = np.exp(r2_lq2) + #Gradient wrt t + c = 2.*r/lq2[index[indr]] + gt[indr, indc] = er2_lq2*c + #Complete the lower triangular + gt[indc, indr] = -gt[indr, indc] + return gt + + #Gradients for Diagonal Kff + def _gkdiag(self, X, index): + index = index.reshape(index.size,) + #terms that move along t + d = np.unique(index) + B = self.decay[d].values + S = self.W[d, :].values + #Index transformation + indd = np.arange(self.output_dim) + indd[d] = np.arange(d.size) + index = indd[index] + #Output related variables must be column-wise + t = X[:, 0].reshape(X.shape[0], 1) + B = B.reshape(B.size, 1) + S2 = S*S + + #Input related variables must be row-wise + lq = self.lengthscale.values.reshape(1, self.rank) + + gB = np.empty((t.size,)) + glq = np.empty((t.size, lq.size)) + gS = np.empty((t.size, lq.size)) + + #Dx1 terms + c0 = S2*lq*np.sqrt(np.pi) + + #DxQ terms + nu = (.5*lq)*B + nu2 = nu*nu + + #Nx1 terms + gamt = -B[index]*t + egamt = np.exp(gamt) + e2gamt = egamt*egamt + + #NxQ terms + t_lq = t/lq + t2_lq2 = -t_lq*t_lq + + etlq2gamt = np.exp(t2_lq2 + gamt) #NXQ + + ##Upsilon calculations + #erfnu = erf(nu) #TODO: This can be improved + + upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :], t_lq + nu[index, :]) ) + upm[t[:, 0] == 0, :] = 0. + + upv = np.exp(nu2[index, :] + 2.*gamt + lnDifErf(-t_lq + nu[index, :], nu[index, :]) ) #egamt*upv + upv[t[:, 0] == 0, :] = 0. + + #Gradient wrt S + c0_S = (S/B)*(lq*np.sqrt(np.pi)) + + gS = c0_S[index]*(upm - upv) + + #For B + CB1 = (.5*lq)**2 - .5/B**2 #DXQ + lq2_2B = (.5*lq**2)*(S2/B) #DXQ + CB2 = 2.*etlq2gamt - e2gamt - 1. #NxQ + + # gradient wrt B NxZ + gB = c0[index, :]*(CB1[index, :]*upm - (CB1[index, :] - t/B[index])*upv) + \ + lq2_2B[index, :]*CB2 + + #Gradient wrt lengthscale + #DxQ terms + c0 = (.5*np.sqrt(np.pi))*(S2/B)*(1.+.5*(lq*B)**2) + Clq1 = S2*(lq*.5) + glq = c0[index]*(upm - upv) + Clq1[index]*CB2 + + return glq, gS, gB + + def _gkfu(self, X, index, Z, index2): + index = index.reshape(index.size,) + #TODO: reduce memory usage + #terms that move along t + d = np.unique(index) + B = self.decay[d].values + S = self.W[d, :].values + + #Index transformation + indd = np.arange(self.output_dim) + indd[d] = np.arange(d.size) + index = indd[index] + #t column + t = X[:, 0].reshape(X.shape[0], 1) + B = B.reshape(B.size, 1) + #z row + z = Z[:, 0].reshape(1, Z.shape[0]) + index2 = index2.reshape(index2.size,) + lq = self.lengthscale.values.reshape((1, self.rank)) + + #kfu = np.empty((t.size, z.size)) + glq = np.empty((t.size, z.size)) + gSdq = np.empty((t.size, z.size)) + gB = np.empty((t.size, z.size)) + + #Dx1 terms + B_2 = B*.5 + S_pi = S*(.5*np.sqrt(np.pi)) + #DxQ terms + c0 = S_pi*lq #lq*Sdq*sqrt(pi) + nu = B*lq*.5 + nu2 = nu*nu + + #1xM terms + z_lq = z/lq[0, index2] + + #NxM terms + tz = t-z + tz_lq = tz/lq[0, index2] + etz_lq2 = -np.exp(-tz_lq*tz_lq) + ez_lq_Bt = np.exp(-z_lq*z_lq -B[index]*t) + + # Upsilon calculations + fullind = np.ix_(index, index2) + upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind] ) ) + upsi[t[:, 0] == 0., :] = 0. + + #Gradient wrt S + #DxQ term + Sa1 = lq*(.5*np.sqrt(np.pi)) + + gSdq = Sa1[0,index2]*upsi + + #Gradient wrt lq + la1 = S_pi*(1. + 2.*nu2) + Slq = S*lq + uplq = etz_lq2*(tz_lq/lq[0, index2] + B_2[index]) + uplq += ez_lq_Bt*(-z_lq/lq[0, index2] + B_2[index]) + + glq = la1[fullind]*upsi + glq += Slq[fullind]*uplq + + #Gradient wrt B + Slq = Slq*lq + nulq = nu*lq + upBd = etz_lq2 + ez_lq_Bt + gB = c0[fullind]*(nulq[fullind] - tz)*upsi + .5*Slq[fullind]*upBd + + return glq, gSdq, gB + + #TODO: reduce memory usage + def _gkfu_z(self, X, index, Z, index2): #Kfu(t,z) + index = index.reshape(index.size,) + #terms that move along t + d = np.unique(index) + B = self.decay[d].values + S = self.W[d, :].values + #Index transformation + indd = np.arange(self.output_dim) + indd[d] = np.arange(d.size) + index = indd[index] + + #t column + t = X[:, 0].reshape(X.shape[0], 1) + B = B.reshape(B.size, 1) + #z row + z = Z[:, 0].reshape(1, Z.shape[0]) + index2 = index2.reshape(index2.size,) + lq = self.lengthscale.values.reshape((1, self.rank)) + + #kfu = np.empty((t.size, z.size)) + gz = np.empty((t.size, z.size)) + + #Dx1 terms + S_pi =S*(.5*np.sqrt(np.pi)) + #DxQ terms + #Slq = S*lq + c0 = S_pi*lq #lq*Sdq*sqrt(pi) + nu = (.5*lq)*B + nu2 = nu*nu + + #1xM terms + z_lq = z/lq[0, index2] + z_lq2 = -z_lq*z_lq + #NxQ terms + t_lq = t/lq + #NxM terms + zt_lq = z_lq - t_lq[:, index2] + zt_lq2 = -zt_lq*zt_lq + + # Upsilon calculations + fullind = np.ix_(index, index2) + z2 = z_lq + nu[fullind] + z1 = z2 - t_lq[:, index2] + upsi = np.exp(nu2[fullind] - B[index]*(t-z) + lnDifErf(z1,z2) ) + upsi[t[:, 0] == 0., :] = 0. + + #Gradient wrt z + za1 = c0*B + #za2 = S_w + gz = za1[fullind]*upsi + S[fullind]*( np.exp(z_lq2 - B[index]*t) -np.exp(zt_lq2) ) + + return gz + +def lnDifErf(z1,z2): + #Z2 is always positive + logdiferf = np.zeros(z1.shape) + ind = np.where(z1>0.) + ind2 = np.where(z1<=0.) + if ind[0].shape > 0: + z1i = z1[ind] + z12 = z1i*z1i + z2i = z2[ind] + logdiferf[ind] = -z12 + np.log(erfcx(z1i) - erfcx(z2i)*np.exp(z12-z2i**2)) + + if ind2[0].shape > 0: + z1i = z1[ind2] + z2i = z2[ind2] + logdiferf[ind2] = np.log(erf(z2i) - erf(z1i)) + + return logdiferf \ No newline at end of file diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index c31d68dd..654b1938 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -24,3 +24,5 @@ from .one_vs_all_sparse_classification import OneVsAllSparseClassification from .dpgplvm import DPBayesianGPLVM from .state_space_model import StateSpace + +from .ibp_lfm import IBPLFM diff --git a/GPy/models/ibp_lfm.py b/GPy/models/ibp_lfm.py new file mode 100644 index 00000000..aa629ce6 --- /dev/null +++ b/GPy/models/ibp_lfm.py @@ -0,0 +1,535 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np + +from ..core.sparse_gp_mpi import SparseGP_MPI +from .. import kern +from ..util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, pdinv +from ..util import diag +from ..core.parameterization import Param +from ..likelihoods import Gaussian +from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch +from ..inference.latent_function_inference.posterior import Posterior +from GPy.core.parameterization.variational import VariationalPrior +from ..core.parameterization.parameterized import Parameterized +from paramz.transformations import Logexp, Logistic, __fixed__ +log_2_pi = np.log(2*np.pi) + +class VarDTC_minibatch_IBPLFM(VarDTC_minibatch): + ''' + Modifications of VarDTC_minibatch for IBP LFM + ''' + + def __init__(self, batchsize=None, limit=3, mpi_comm=None): + super(VarDTC_minibatch_IBPLFM, self).__init__(batchsize, limit, mpi_comm) + + def gatherPsiStat(self, kern, X, Z, Y, beta, Zp): + + het_noise = beta.size > 1 + + assert beta.size == 1 + + trYYT = self.get_trYYT(Y) + if self.Y_speedup and not het_noise: + Y = self.get_YYTfactor(Y) + + num_inducing = Z.shape[0] + num_data, output_dim = Y.shape + batchsize = num_data if self.batchsize is None else self.batchsize + + psi2_full = np.zeros((num_inducing, num_inducing)) # MxM + psi1Y_full = np.zeros((output_dim, num_inducing)) # DxM + psi0_full = 0. + YRY_full = 0. + + for n_start in range(0, num_data, batchsize): + n_end = min(batchsize+n_start, num_data) + if batchsize == num_data: + Y_slice = Y + X_slice = X + else: + Y_slice = Y[n_start:n_end] + X_slice = X[n_start:n_end] + + if het_noise: + b = beta[n_start] + YRY_full += np.inner(Y_slice, Y_slice)*b + else: + b = beta + + psi0 = kern.Kdiag(X_slice) #Kff^q + psi1 = kern.K(X_slice, Z) #Kfu + + indX = X_slice.values + indX = np.int_(np.round(indX[:, -1])) + + Zp = Zp.gamma.values + # Extend Zp across columns + indZ = Z.values + indZ = np.int_(np.round(indZ[:, -1])) - Zp.shape[0] + Zpq = Zp[:, indZ] + + for d in np.unique(indX): + indd = indX == d + psi1d = psi1[indd, :] + Zpd = Zp[d, :] + Zp2 = Zpd[:, None]*Zpd[None, :] - np.diag(np.power(Zpd, 2)) + np.diag(Zpd) + psi2_full += (np.dot(psi1d.T, psi1d)*Zp2[np.ix_(indZ, indZ)])*b #Zp2*Kufd*Kfud*beta + + psi0_full += np.sum(psi0*Zp[indX, :])*b + psi1Y_full += np.dot(Y_slice.T, psi1*Zpq[indX, :])*b + + if not het_noise: + YRY_full = trYYT*beta + + if self.mpi_comm is not None: + from mpi4py import MPI + psi0_all = np.array(psi0_full) + psi1Y_all = psi1Y_full.copy() + psi2_all = psi2_full.copy() + YRY_all = np.array(YRY_full) + self.mpi_comm.Allreduce([psi0_full, MPI.DOUBLE], [psi0_all, MPI.DOUBLE]) + self.mpi_comm.Allreduce([psi1Y_full, MPI.DOUBLE], [psi1Y_all, MPI.DOUBLE]) + self.mpi_comm.Allreduce([psi2_full, MPI.DOUBLE], [psi2_all, MPI.DOUBLE]) + self.mpi_comm.Allreduce([YRY_full, MPI.DOUBLE], [YRY_all, MPI.DOUBLE]) + return psi0_all, psi1Y_all, psi2_all, YRY_all + + return psi0_full, psi1Y_full, psi2_full, YRY_full + + + def inference_likelihood(self, kern, X, Z, likelihood, Y, Zp): + """ + The first phase of inference: + Compute: log-likelihood, dL_dKmm + + Cached intermediate results: Kmm, KmmInv, + """ + + num_data, output_dim = Y.shape + input_dim = Z.shape[0] + if self.mpi_comm is not None: + from mpi4py import MPI + num_data_all = np.array(num_data,dtype=np.int32) + self.mpi_comm.Allreduce([np.int32(num_data), MPI.INT], [num_data_all, MPI.INT]) + num_data = num_data_all + + #see whether we've got a different noise variance for each datum + beta = 1./np.fmax(likelihood.variance, 1e-6) + het_noise = beta.size > 1 + if het_noise: + self.batchsize = 1 + + psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, Zp) + + #====================================================================== + # Compute Common Components + #====================================================================== + + Kmm = kern.K(Z).copy() + diag.add(Kmm, self.const_jitter) + if not np.isfinite(Kmm).all(): + print(Kmm) + Lm = jitchol(Kmm) + LmInv = dtrtri(Lm) + + LmInvPsi2LmInvT = np.dot(LmInv, np.dot(psi2_full, LmInv.T)) + Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT + LL = jitchol(Lambda) + LLInv = dtrtri(LL) + logdet_L = 2.*np.sum(np.log(np.diag(LL))) + LmLLInv = np.dot(LLInv, LmInv) + + b = np.dot(psi1Y_full, LmLLInv.T) + bbt = np.sum(np.square(b)) + v = np.dot(b, LmLLInv).T + LLinvPsi1TYYTPsi1LLinvT = tdot(b.T) + + tmp = -np.dot(np.dot(LLInv.T, LLinvPsi1TYYTPsi1LLinvT + output_dim*np.eye(input_dim)), LLInv) + dL_dpsi2R = .5*np.dot(np.dot(LmInv.T, tmp + output_dim*np.eye(input_dim)), LmInv) + + # Cache intermediate results + self.midRes['dL_dpsi2R'] = dL_dpsi2R + self.midRes['v'] = v + + #====================================================================== + # Compute log-likelihood + #====================================================================== + if het_noise: + logL_R = -np.sum(np.log(beta)) + else: + logL_R = -num_data*np.log(beta) + logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)*.5 - output_dim*logdet_L*.5 + + #====================================================================== + # Compute dL_dKmm + #====================================================================== + + dL_dKmm = dL_dpsi2R - .5*output_dim*np.dot(np.dot(LmInv.T, LmInvPsi2LmInvT), LmInv) + + #====================================================================== + # Compute the Posterior distribution of inducing points p(u|Y) + #====================================================================== + + if not self.Y_speedup or het_noise: + wd_inv = backsub_both_sides(Lm, np.eye(input_dim)- backsub_both_sides(LL, np.identity(input_dim), transpose='left'), transpose='left') + post = Posterior(woodbury_inv=wd_inv, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm) + else: + post = None + + #====================================================================== + # Compute dL_dthetaL for uncertian input and non-heter noise + #====================================================================== + + if not het_noise: + dL_dthetaL = .5*(YRY_full*beta + beta*output_dim*psi0_full - num_data*output_dim*beta) - beta*(dL_dpsi2R*psi2_full).sum() - beta*(v.T*psi1Y_full).sum() + self.midRes['dL_dthetaL'] = dL_dthetaL + + return logL, dL_dKmm, post + + def inference_minibatch(self, kern, X, Z, likelihood, Y, Zp): + """ + The second phase of inference: Computing the derivatives over a minibatch of Y + Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL + return a flag showing whether it reached the end of Y (isEnd) + """ + + num_data, output_dim = Y.shape + + #see whether we've got a different noise variance for each datum + beta = 1./np.fmax(likelihood.variance, 1e-6) + het_noise = beta.size > 1 + # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! + #self.YYTfactor = beta*self.get_YYTfactor(Y) + if self.Y_speedup and not het_noise: + YYT_factor = self.get_YYTfactor(Y) + else: + YYT_factor = Y + + n_start = self.batch_pos + batchsize = num_data if self.batchsize is None else self.batchsize + n_end = min(batchsize+n_start, num_data) + if n_end == num_data: + isEnd = True + self.batch_pos = 0 + else: + isEnd = False + self.batch_pos = n_end + + if batchsize == num_data: + Y_slice = YYT_factor + X_slice = X + else: + Y_slice = YYT_factor[n_start:n_end] + X_slice = X[n_start:n_end] + + psi0 = kern.Kdiag(X_slice) #Kffdiag + psi1 = kern.K(X_slice, Z) #Kfu + betapsi1 = np.einsum('n,nm->nm', beta, psi1) + + X_slice = X_slice.values + Z = Z.values + + Zp = Zp.gamma.values + indX = np.int_(X_slice[:, -1]) + indZ = np.int_(Z[:, -1]) - Zp.shape[0] + + betaY = beta*Y_slice + + #====================================================================== + # Load Intermediate Results + #====================================================================== + + dL_dpsi2R = self.midRes['dL_dpsi2R'] + v = self.midRes['v'] + + #====================================================================== + # Compute dL_dpsi + #====================================================================== + + dL_dpsi0 = -.5*output_dim*(beta * Zp[indX, :]) #XxQ #TODO: Check this gradient + + dL_dpsi1 = np.dot(betaY, v.T) + dL_dEZp = psi1*dL_dpsi1 + dL_dpsi1 = Zp[np.ix_(indX, indZ)]*dL_dpsi1 + dL_dgamma = np.zeros(Zp.shape) + for d in np.unique(indX): + indd = indX == d + betapsi1d = betapsi1[indd, :] + psi1d = psi1[indd, :] + Zpd = Zp[d, :] + Zp2 = Zpd[:, None]*Zpd[None, :] - np.diag(np.power(Zpd, 2)) + np.diag(Zpd) + dL_dpsi1[indd, :] += np.dot(betapsi1d, Zp2[np.ix_(indZ, indZ)] * dL_dpsi2R)*2. + + dL_EZp2 = dL_dpsi2R * (np.dot(psi1d.T, psi1d) * beta)*2. # Zpd*Kufd*Kfud*beta + #Gradient of Likelihood wrt gamma is calculated here + EZ = Zp[d, indZ] + for q in range(Zp.shape[1]): + EZt = EZ.copy() + indq = indZ == q + EZt[indq] = .5 + dL_dgamma[d, q] = np.sum(dL_dEZp[np.ix_(indd, indq)]) + np.sum(dL_EZp2[:, indq]*EZt[:, None]) -\ + .5*beta*(np.sum(psi0[indd, q])) + + #====================================================================== + # Compute dL_dthetaL + #====================================================================== + if isEnd: + dL_dthetaL = self.midRes['dL_dthetaL'] + else: + dL_dthetaL = 0. + + grad_dict = {'dL_dKdiag': dL_dpsi0, + 'dL_dKnm': dL_dpsi1, + 'dL_dthetaL': dL_dthetaL, + 'dL_dgamma': dL_dgamma} + + return isEnd, (n_start, n_end), grad_dict + + +def update_gradients(model, mpi_comm=None): + if mpi_comm is None: + Y = model.Y + X = model.X + else: + Y = model.Y_local + X = model.X[model.N_range[0]:model.N_range[1]] + + model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y, model.Zp) + + het_noise = model.likelihood.variance.size > 1 + + if het_noise: + dL_dthetaL = np.empty((model.Y.shape[0],)) + else: + dL_dthetaL = np.float64(0.) + + kern_grad = model.kern.gradient.copy() + kern_grad[:] = 0. + model.Z.gradient = 0. + gamma_gradient = model.Zp.gamma.copy() + gamma_gradient[:] = 0. + + isEnd = False + while not isEnd: + isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, X, model.Z, model.likelihood, Y, model.Zp) + + if (n_range[1]-n_range[0]) == X.shape[0]: + X_slice = X + elif mpi_comm is None: + X_slice = model.X[n_range[0]:n_range[1]] + else: + X_slice = model.X[model.N_range[0]+n_range[0]:model.N_range[0]+n_range[1]] + + #gradients w.r.t. kernel + model.kern.update_gradients_diag(grad_dict['dL_dKdiag'], X_slice) + kern_grad += model.kern.gradient + + model.kern.update_gradients_full(grad_dict['dL_dKnm'], X_slice, model.Z) + kern_grad += model.kern.gradient + + #gradients w.r.t. Z + model.Z.gradient += model.kern.gradients_X(grad_dict['dL_dKnm'].T, model.Z, X_slice) + + #gradients w.r.t. posterior parameters of Zp + gamma_gradient += grad_dict['dL_dgamma'] + + if het_noise: + dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL'] + else: + dL_dthetaL += grad_dict['dL_dthetaL'] + + # Gather the gradients from multiple MPI nodes + if mpi_comm is not None: + from mpi4py import MPI + if het_noise: + raise "het_noise not implemented!" + kern_grad_all = kern_grad.copy() + Z_grad_all = model.Z.gradient.copy() + gamma_grad_all = gamma_gradient.copy() + mpi_comm.Allreduce([kern_grad, MPI.DOUBLE], [kern_grad_all, MPI.DOUBLE]) + mpi_comm.Allreduce([model.Z.gradient, MPI.DOUBLE], [Z_grad_all, MPI.DOUBLE]) + mpi_comm.Allreduce([gamma_gradient, MPI.DOUBLE], [gamma_grad_all, MPI.DOUBLE]) + kern_grad = kern_grad_all + model.Z.gradient = Z_grad_all + gamma_gradient = gamma_grad_all + + #gradients w.r.t. kernel + model.kern.update_gradients_full(dL_dKmm, model.Z, None) + model.kern.gradient += kern_grad + + #gradients w.r.t. Z + model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z) + + #gradient w.r.t. gamma + model.Zp.gamma.gradient = gamma_gradient + + # Update Log-likelihood + KL_div = model.variational_prior.KL_divergence(model.Zp) + # update for the KL divergence + model.variational_prior.update_gradients_KL(model.Zp) + + model._log_marginal_likelihood += KL_div + + # dL_dthetaL + model.likelihood.update_gradients(dL_dthetaL) + + +class IBPPosterior(Parameterized): + ''' + The IBP distribution for variational approximations. + ''' + def __init__(self, binary_prob, tau=None, name='Sensitivity space', *a, **kw): + """ + binary_prob : the probability of including a latent function over an output. + """ + super(IBPPosterior, self).__init__(name=name, *a, **kw) + self.gamma = Param("binary_prob", binary_prob, Logistic(1e-10, 1. - 1e-10)) + self.link_parameter(self.gamma) + if tau is not None: + assert tau.size == 2*self.gamma_.shape[1] + self.tau = Param("tau", tau, Logexp()) + else: + self.tau = Param("tau", np.ones((2, self.gamma.shape[1])), Logexp()) + self.link_parameter(self.tau) + + def set_gradients(self, grad): + self.gamma.gradient, self.tau.gradient = grad + + def __getitem__(self, s): + pass + # if isinstance(s, (int, slice, tuple, list, np.ndarray)): + # import copy + # n = self.__new__(self.__class__, self.name) + # dc = self.__dict__.copy() + # dc['binary_prob'] = self.binary_prob[s] + # dc['tau'] = self.tau + # dc['parameters'] = copy.copy(self.parameters) + # n.__dict__.update(dc) + # n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob'] + # n.parameters[dc['tau']._parent_index_] = dc['tau'] + # n._gradient_array_ = None + # oversize = self.size - self.gamma.size - self.tau.size + # n.size = n.gamma.size + n.tau.size + oversize + # return n + # else: + # return super(IBPPosterior, self).__getitem__(s) + +class IBPPrior(VariationalPrior): + def __init__(self, rank, alpha=2., name='IBPPrior', **kw): + super(IBPPrior, self).__init__(name=name, **kw) + from paramz.transformations import __fixed__ + self.rank = rank + self.alpha = Param('alpha', alpha, __fixed__) + self.link_parameter(self.alpha) + + def KL_divergence(self, variational_posterior): + from scipy.special import gamma, psi + + eta, tau = variational_posterior.gamma.values, variational_posterior.tau.values + + sum_eta = np.sum(eta, axis=0) #sum_d gamma(d,q) + D_seta = eta.shape[0] - sum_eta + ad = self.alpha/eta.shape[1] + psitau1 = psi(tau[0, :]) + psitau2 = psi(tau[1, :]) + sumtau = np.sum(tau, axis=0) + psitau = psi(sumtau) + # E[log p(z)] + part1 = np.sum(sum_eta*psitau1 + D_seta*psitau2 - eta.shape[0]*psitau) + + # E[log p(pi)] + part1 += (ad - 1.)*np.sum(psitau1 - psitau) + eta.shape[1]*np.log(ad) + + #H(z) + part2 = np.sum(-(1.-eta)*np.log(1.-eta) - eta*np.log(eta)) + #H(pi) + part2 += np.sum(np.log(gamma(tau[0, :])*gamma(tau[1, :])/gamma(sumtau))-(tau[0, :]-1.)*psitau1-(tau[1, :]-1.)*psitau2\ + + (sumtau-2.)*psitau) + + return part1+part2 + + def update_gradients_KL(self, variational_posterior): + eta, tau = variational_posterior.gamma.values, variational_posterior.tau.values + + from scipy.special import psi, polygamma + dgamma = np.log(1. - eta) - np.log(eta) + psi(tau[0, :]) - psi(tau[1, :]) + variational_posterior.gamma.gradient += dgamma + ad = self.alpha/self.rank + sumeta = np.sum(eta, axis=0) + sumtau = np.sum(tau, axis=0) + common = (-eta.shape[0] - (ad - 1.) + (sumtau - 2.))*polygamma(1, sumtau) + variational_posterior.tau.gradient[0, :] = (sumeta + ad - tau[0, :])*polygamma(1, tau[0, :]) + common + variational_posterior.tau.gradient[1, :] = ((eta.shape[0] - sumeta) - (tau[1, :] - 1.))*polygamma(1, tau[1, :])\ + + common + + +class IBPLFM(SparseGP_MPI): + """ + Indian Buffet Process for Latent Force Models + + :param Y: observed data (np.ndarray) or GPy.likelihood + :type Y: np.ndarray| GPy.likelihood instance + :param X: input data (np.ndarray) [X:values, X:index], index refers to the number of the output + :type X: np.ndarray + :param input_dim: latent dimensionality + :type input_dim: int + : param rank: number of latent functions + + """ + def __init__(self, X, Y, input_dim=2, output_dim=1, rank=1, Gamma=None, num_inducing=10, + Z=None, kernel=None, inference_method=None, likelihood=None, name='IBP for LFM', alpha=2., beta=2., connM=None, tau=None, mpi_comm=None, normalizer=False, variational_prior=None,**kwargs): + + if kernel is None: + kernel = kern.EQ_ODE2(input_dim, output_dim, rank) + + if Gamma is None: + gamma = np.empty((output_dim, rank)) # The posterior probabilities of the binary variable in the variational approximation + gamma[:] = 0.5 + 0.1 * np.random.randn(output_dim, rank) + gamma[gamma>1.-1e-9] = 1.-1e-9 + gamma[gamma<1e-9] = 1e-9 + else: + gamma = Gamma.copy() + + #TODO: create a vector of inducing points + if Z is None: + Z = np.random.permutation(X.copy())[:num_inducing] + assert Z.shape[1] == X.shape[1] + + if likelihood is None: + likelihood = Gaussian() + + if inference_method is None: + inference_method = VarDTC_minibatch_IBPLFM(mpi_comm=mpi_comm) + + #Definition of variational terms + self.variational_prior = IBPPrior(rank=rank, alpha=alpha) if variational_prior is None else variational_prior + self.Zp = IBPPosterior(gamma, tau=tau) + + super(IBPLFM, self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs) + self.link_parameter(self.Zp, index=0) + + def set_Zp_gradients(self, Zp, Zp_grad): + """Set the gradients of the posterior distribution of Zp in its specific form.""" + Zp.gamma.gradient = Zp_grad + + def get_Zp_gradients(self, Zp): + """Get the gradients of the posterior distribution of Zp in its specific form.""" + return Zp.gamma.gradient + + def _propogate_Zp_val(self): + pass + + def parameters_changed(self): + #super(IBPLFM,self).parameters_changed() + if isinstance(self.inference_method, VarDTC_minibatch_IBPLFM): + update_gradients(self, mpi_comm=self.mpi_comm) + return + + # Add the KL divergence term + self._log_marginal_likelihood += self.variational_prior.KL_divergence(self.Zp) + #TODO Change the following according to this variational distribution + #self.Zp.gamma.gradient = self. + + # update for the KL divergence + self.variational_prior.update_gradients_KL(self.Zp) \ No newline at end of file From 52fb928dffbc4acbc279d428af6f070d22dfb798 Mon Sep 17 00:00:00 2001 From: cdguarnizo Date: Mon, 23 May 2016 01:28:01 -0500 Subject: [PATCH 6/7] Updates for eq_ode1 and eq_ode2 kernels --- GPy/kern/src/eq_ode1.py | 39 +++++++++++++++++- GPy/kern/src/eq_ode2.py | 91 +++++++++++++++++++++++++++++------------ GPy/models/ibp_lfm.py | 4 +- 3 files changed, 105 insertions(+), 29 deletions(-) diff --git a/GPy/kern/src/eq_ode1.py b/GPy/kern/src/eq_ode1.py index 7b218068..9c19bead 100644 --- a/GPy/kern/src/eq_ode1.py +++ b/GPy/kern/src/eq_ode1.py @@ -110,12 +110,32 @@ class EQ_ODE1(Kern): if not X_flag and X2_flag: index2 -= self.output_dim return self._Kfu(X, index, X2, index2) #Kfu - else: + elif X_flag and not X2_flag: index -= self.output_dim return self._Kfu(X2, index2, X, index).T #Kuf + elif X_flag and X2_flag: + index -= self.output_dim + index2 -= self.output_dim + return self._Kusu(X, index, X2, index2) #Ku_s u + else: + raise NotImplementedError #Kf_s f #Calculate the covariance function for diag(Kff(X,X)) def Kdiag(self, X): + if hasattr(X, 'values'): + index = np.int_(np.round(X[:, 1].values)) + else: + index = np.int_(np.round(X[:, 1])) + index = index.reshape(index.size,) + X_flag = index[0] >= self.output_dim + + if X_flag: #Kuudiag + return np.ones(X[:,0].shape) + else: #Kffdiag + kdiag = self._Kdiag(X) + return np.sum(kdiag, axis=1) + + def _Kdiag(self, X): #This way is not working, indexes are lost after using k._slice_X #index = np.asarray(X, dtype=np.int) #index = index.reshape(index.size,) @@ -306,6 +326,23 @@ class EQ_ODE1(Kern): kuu[indc, indr] = kuu[indr, indc] return kuu + def _Kusu(self, X, index, X2, index2): + index = index.reshape(index.size,) + index2 = index2.reshape(index2.size,) + t = X[:, 0].reshape(X.shape[0],1) + t2 = X2[:, 0].reshape(1,X2.shape[0]) + lq = self.lengthscale.values.reshape(self.rank,) + #Covariance matrix initialization + kuu = np.zeros((t.size, t2.size)) + for q in range(self.rank): + ind1 = index == q + ind2 = index2 == q + r = t[ind1]/lq[q] - t2[0,ind2]/lq[q] + r2 = r*r + #Calculation of covariance function + kuu[np.ix_(ind1, ind2)] = np.exp(-r2) + return kuu + #Evaluation of cross-covariance function def _Kfu(self, X, index, X2, index2): #terms that move along t diff --git a/GPy/kern/src/eq_ode2.py b/GPy/kern/src/eq_ode2.py index 8e735248..0166c511 100644 --- a/GPy/kern/src/eq_ode2.py +++ b/GPy/kern/src/eq_ode2.py @@ -44,7 +44,7 @@ class EQ_ODE2(Kern): lengthscale = np.asarray(lengthscale) assert lengthscale.size in [1, self.rank], "Bad number of lengthscales" if lengthscale.size != self.rank: - lengthscale = np.ones(self.input_dim)*lengthscale + lengthscale = np.ones(self.rank)*lengthscale if W is None: #W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank) @@ -71,7 +71,7 @@ class EQ_ODE2(Kern): #index = index.reshape(index.size,) if hasattr(X, 'values'): X = X.values - index = np.int_(X[:, 1]) + index = np.int_(np.round(X[:, 1])) index = index.reshape(index.size,) X_flag = index[0] >= self.output_dim if X2 is None: @@ -79,7 +79,7 @@ class EQ_ODE2(Kern): #Calculate covariance function for the latent functions index -= self.output_dim return self._Kuu(X, index) - else: + else: #Kff full raise NotImplementedError else: #This way is not working, indexes are lost after using k._slice_X @@ -87,19 +87,40 @@ class EQ_ODE2(Kern): #index2 = index2.reshape(index2.size,) if hasattr(X2, 'values'): X2 = X2.values - index2 = np.int_(X2[:, 1]) + index2 = np.int_(np.round(X2[:, 1])) index2 = index2.reshape(index2.size,) X2_flag = index2[0] >= self.output_dim #Calculate cross-covariance function if not X_flag and X2_flag: index2 -= self.output_dim return self._Kfu(X, index, X2, index2) #Kfu - else: + elif X_flag and not X2_flag: index -= self.output_dim return self._Kfu(X2, index2, X, index).T #Kuf + elif X_flag and X2_flag: + index -= self.output_dim + index2 -= self.output_dim + return self._Kusu(X, index, X2, index2) #Ku_s u + else: + raise NotImplementedError #Kf_s f #Calculate the covariance function for diag(Kff(X,X)) def Kdiag(self, X): + if hasattr(X, 'values'): + index = np.int_(np.round(X[:, 1].values)) + else: + index = np.int_(np.round(X[:, 1])) + index = index.reshape(index.size,) + X_flag = index[0] >= self.output_dim + + if X_flag: #Kuudiag + return np.ones(X[:,0].shape) + else: #Kffdiag + kdiag = self._Kdiag(X) + return np.sum(kdiag, axis=1) + + #Calculate the covariance function for diag(Kff(X,X)) + def _Kdiag(self, X): #This way is not working, indexes are lost after using k._slice_X #index = np.asarray(X, dtype=np.int) #index = index.reshape(index.size,) @@ -132,7 +153,7 @@ class EQ_ODE2(Kern): #Terms that move along q lq = self.lengthscale.values.reshape(1, self.lengthscale.size) S2 = S*S - kdiag = np.empty((t.size, )) + kdiag = np.empty((t.size, lq.size)) indD = np.arange(B.size) #(1) When wd is real @@ -187,8 +208,8 @@ class EQ_ODE2(Kern): upv[t1[:, 0] == 0, :] = 0. #Covariance calculation - kdiag[ind3t] = np.sum(np.real(K01[ind]*upm), axis=1) - kdiag[ind3t] += np.sum(np.real((c0[ind]*ec)*upv), axis=1) + kdiag[ind3t] = np.real(K01[ind]*upm) + kdiag[ind3t] += np.real((c0[ind]*ec)*upv) #(2) When w_d is complex if np.any(wbool): @@ -265,7 +286,7 @@ class EQ_ODE2(Kern): upvc[t1[:, 0] == 0, :] = 0. #Covariance calculation - kdiag[ind2t] = np.sum(K011[ind]*upm + K012[ind]*upmc + (c0[ind]*ec)*upv + (c0[ind]*ec2)*upvc, axis=1) + kdiag[ind2t] = K011[ind]*upm + K012[ind]*upmc + (c0[ind]*ec)*upv + (c0[ind]*ec2)*upvc return kdiag def update_gradients_full(self, dL_dK, X, X2 = None): @@ -336,16 +357,17 @@ class EQ_ODE2(Kern): index = index.reshape(index.size,) glq, gS, gB, gC = self._gkdiag(X, index) - tmp = dL_dKdiag.reshape(index.size, 1)*glq + if dL_dKdiag.size == X.shape[0]: + dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1)) + tmp = dL_dKdiag*glq self.lengthscale.gradient = tmp.sum(0) - #TODO: Avoid the reshape by a priori knowing the shape of dL_dKdiag - tmpB = dL_dKdiag*gB.reshape(dL_dKdiag.shape) - tmpC = dL_dKdiag*gC.reshape(dL_dKdiag.shape) - tmp = dL_dKdiag.reshape(index.size, 1)*gS + tmpB = dL_dKdiag*gB + tmpC = dL_dKdiag*gC + tmp = dL_dKdiag*gS for d in np.unique(index): ind = np.where(index == d) - self.B.gradient[d] = tmpB[ind].sum() - self.C.gradient[d] = tmpC[ind].sum() + self.B.gradient[d] = tmpB[ind, :].sum() + self.C.gradient[d] = tmpC[ind, :].sum() self.W.gradient[d, :] = tmp[ind].sum(0) def gradients_X(self, dL_dK, X, X2=None): @@ -410,6 +432,23 @@ class EQ_ODE2(Kern): kuu[indc, indr] = kuu[indr, indc] return kuu + def _Kusu(self, X, index, X2, index2): + index = index.reshape(index.size,) + index2 = index2.reshape(index2.size,) + t = X[:, 0].reshape(X.shape[0],1) + t2 = X2[:, 0].reshape(1,X2.shape[0]) + lq = self.lengthscale.values.reshape(self.rank,) + #Covariance matrix initialization + kuu = np.zeros((t.size, t2.size)) + for q in range(self.rank): + ind1 = index == q + ind2 = index2 == q + r = t[ind1]/lq[q] - t2[0,ind2]/lq[q] + r2 = r*r + #Calculation of covariance function + kuu[np.ix_(ind1, ind2)] = np.exp(-r2) + return kuu + #Evaluation of cross-covariance function def _Kfu(self, X, index, X2, index2): #terms that move along t @@ -632,8 +671,8 @@ class EQ_ODE2(Kern): lq = self.lengthscale.values.reshape(1, self.rank) lq2 = lq*lq - gB = np.empty((t.size,)) - gC = np.empty((t.size,)) + gB = np.empty((t.size, lq.size)) + gC = np.empty((t.size, lq.size)) glq = np.empty((t.size, lq.size)) gS = np.empty((t.size, lq.size)) @@ -723,8 +762,8 @@ class EQ_ODE2(Kern): Ba4_1 = (S2lq*lq)*dgam_dB/w2 Ba4 = Ba4_1*c - gB[ind3t] = np.sum(np.real(Ba1[ind]*upm) - np.real(((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv)\ - + np.real(Ba4[ind]*upmd) + np.real((Ba4_1[ind]*ec)*upvd), axis=1) + gB[ind3t] = np.real(Ba1[ind]*upm) - np.real(((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv)\ + + np.real(Ba4[ind]*upmd) + np.real((Ba4_1[ind]*ec)*upvd) # gradient wrt C dw_dC = - alphad*dw_dB @@ -738,8 +777,8 @@ class EQ_ODE2(Kern): Ca4_1 = (S2lq*lq)*dgam_dC/w2 Ca4 = Ca4_1*c - gC[ind3t] = np.sum(np.real(Ca1[ind]*upm) - np.real(((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv)\ - + np.real(Ca4[ind]*upmd) + np.real((Ca4_1[ind]*ec)*upvd), axis=1) + gC[ind3t] = np.real(Ca1[ind]*upm) - np.real(((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv)\ + + np.real(Ca4[ind]*upmd) + np.real((Ca4_1[ind]*ec)*upvd) #Gradient wrt lengthscale #DxQ terms @@ -868,10 +907,10 @@ class EQ_ODE2(Kern): Ba2_1c = c0*(dgamc_dB*(0.5/gamc2 - 0.25*lq2) + 0.5/(w2*gamc)) Ba2_2c = c0*dgamc_dB/gamc - gB[ind2t] = np.sum(Ba1[ind]*upm - ((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv\ + gB[ind2t] = Ba1[ind]*upm - ((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv\ + Ba4[ind]*upmd + (Ba4_1[ind]*ec)*upvd\ + Ba1c[ind]*upmc - ((Ba2_1c[ind] + Ba2_2c[ind]*t1)*egamct - Ba3c[ind]*egamt)*upvc\ - + Ba4c[ind]*upmdc + (Ba4_1c[ind]*ec2)*upvdc, axis=1) + + Ba4c[ind]*upmdc + (Ba4_1c[ind]*ec2)*upvdc ##Gradient wrt C dw_dC = 0.5*alphad/w @@ -895,10 +934,10 @@ class EQ_ODE2(Kern): Ca4_1c = S2lq2*(dgamc_dC/w2) Ca4c = Ca4_1c*c2 - gC[ind2t] = np.sum(Ca1[ind]*upm - ((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv\ + gC[ind2t] = Ca1[ind]*upm - ((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv\ + Ca4[ind]*upmd + (Ca4_1[ind]*ec)*upvd\ + Ca1c[ind]*upmc - ((Ca2_1c[ind] + Ca2_2c[ind]*t1)*egamct - (Ca3_1c[ind] + Ca3_2c[ind]*t1)*egamt)*upvc\ - + Ca4c[ind]*upmdc + (Ca4_1c[ind]*ec2)*upvdc, axis=1) + + Ca4c[ind]*upmdc + (Ca4_1c[ind]*ec2)*upvdc #Gradient wrt lengthscale #DxQ terms diff --git a/GPy/models/ibp_lfm.py b/GPy/models/ibp_lfm.py index aa629ce6..c90ffa40 100644 --- a/GPy/models/ibp_lfm.py +++ b/GPy/models/ibp_lfm.py @@ -58,7 +58,7 @@ class VarDTC_minibatch_IBPLFM(VarDTC_minibatch): else: b = beta - psi0 = kern.Kdiag(X_slice) #Kff^q + psi0 = kern._Kdiag(X_slice) #Kff^q psi1 = kern.K(X_slice, Z) #Kfu indX = X_slice.values @@ -223,7 +223,7 @@ class VarDTC_minibatch_IBPLFM(VarDTC_minibatch): Y_slice = YYT_factor[n_start:n_end] X_slice = X[n_start:n_end] - psi0 = kern.Kdiag(X_slice) #Kffdiag + psi0 = kern._Kdiag(X_slice) #Kffdiag psi1 = kern.K(X_slice, Z) #Kfu betapsi1 = np.einsum('n,nm->nm', beta, psi1) From eeb35621cce91bfb80e4d5814a0ba39a332d6401 Mon Sep 17 00:00:00 2001 From: Michael T Smith Date: Thu, 2 Jun 2016 15:57:08 +0100 Subject: [PATCH 7/7] Integral kernels added, these allow 'histogram' or 'binned' data to be modelled --- GPy/kern/__init__.py | 3 + GPy/kern/src/integral.py | 84 ++++++++++++ GPy/kern/src/integral_limits.py | 94 ++++++++++++++ .../src/multidimensional_integral_limits.py | 120 ++++++++++++++++++ 4 files changed, 301 insertions(+) create mode 100644 GPy/kern/src/integral.py create mode 100644 GPy/kern/src/integral_limits.py create mode 100644 GPy/kern/src/multidimensional_integral_limits.py diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 3c3de65c..69e89de7 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -24,6 +24,9 @@ from .src.ODE_st import ODE_st from .src.ODE_t import ODE_t from .src.poly import Poly from .src.eq_ode2 import EQ_ODE2 +from .src.integral import Integral +from .src.integral_limits import Integral_Limits +from .src.multidimensional_integral_limits import Multidimensional_Integral_Limits from .src.trunclinear import TruncLinear,TruncLinear_inf from .src.splitKern import SplitKern,DEtime from .src.splitKern import DEtime as DiffGenomeKern diff --git a/GPy/kern/src/integral.py b/GPy/kern/src/integral.py new file mode 100644 index 00000000..d2827390 --- /dev/null +++ b/GPy/kern/src/integral.py @@ -0,0 +1,84 @@ +# Written by Mike Smith michaeltsmith.org.uk + +import numpy as np +from .kern import Kern +from ...core.parameterization import Param +from paramz.transformations import Logexp +import math + +class Integral(Kern): #todo do I need to inherit from Stationary + """ + Integral kernel between... + """ + + def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'): + super(Integral, self).__init__(input_dim, active_dims, name) + + if lengthscale is None: + lengthscale = np.ones(1) + else: + lengthscale = np.asarray(lengthscale) + + self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values... + self.variances = Param('variances', variances, Logexp()) #and here. + self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise. + + def h(self, z): + return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2)) + + def dk_dl(self, t, tprime, l): #derivative of the kernel wrt lengthscale + return l * ( self.h(t/l) - self.h((t - tprime)/l) + self.h(tprime/l) - 1) + + def update_gradients_full(self, dL_dK, X, X2=None): + if X2 is None: #we're finding dK_xx/dTheta + dK_dl = np.zeros([X.shape[0],X.shape[0]]) + dK_dv = np.zeros([X.shape[0],X.shape[0]]) + for i,x in enumerate(X): + for j,x2 in enumerate(X): + dK_dl[i,j] = self.variances[0]*self.dk_dl(x[0],x2[0],self.lengthscale[0]) #TODO Multiple length scales + dK_dv[i,j] = self.k_xx(x[0],x2[0],self.lengthscale[0]) #the gradient wrt the variance is k_xx. + self.lengthscale.gradient = np.sum(dK_dl * dL_dK) + self.variances.gradient = np.sum(dK_dv * dL_dK) + #print "V%0.5f" % self.variances.gradient + #print "L%0.5f" % self.lengthscale.gradient + else: #we're finding dK_xf/Dtheta + print "NEED TO HANDLE TODO!" + + #useful little function to help calculate the covariances. + def g(self,z): + return 1.0 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2)) + + #covariance between gradients (it's the gradients that we want out... maybe we should have a way of getting K_ff too? Currently you get the diag of K_ff from Kdiag) + def k_xx(self,t,tprime,l): + return 0.5 * (l**2) * ( self.g(t/l) - self.g((t - tprime)/l) + self.g(tprime/l) - 1) + + def k_ff(self,t,tprime,l): + return np.exp(-((t-tprime)**2)/(l**2)) #rbf + + #covariance between the gradient and the actual value + def k_xf(self,t,tprime,l): + return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf(tprime/l)) + + def K(self, X, X2=None): + if X2 is None: + K_xx = np.zeros([X.shape[0],X.shape[0]]) + for i,x in enumerate(X): + for j,x2 in enumerate(X): + K_xx[i,j] = self.k_xx(x[0],x2[0],self.lengthscale[0]) + return K_xx * self.variances[0] + else: + K_xf = np.zeros([X.shape[0],X2.shape[0]]) + for i,x in enumerate(X): + for j,x2 in enumerate(X2): + K_xf[i,j] = self.k_xf(x[0],x2[0],self.lengthscale[0]) + #print self.variances[0] + return K_xf * self.variances[0] + + def Kdiag(self, X): + """I've used the fact that we call this method for K_ff when finding the covariance as a hack so + I know if I should return K_ff or K_xx. In this case we're returning K_ff!! + $K_{ff}^{post} = K_{ff} - K_{fx} K_{xx}^{-1} K_{xf}$""" + K_ff = np.zeros(X.shape[0]) + for i,x in enumerate(X): + K_ff[i] = self.k_ff(x[0],x[0],self.lengthscale[0]) + return K_ff * self.variances[0] diff --git a/GPy/kern/src/integral_limits.py b/GPy/kern/src/integral_limits.py new file mode 100644 index 00000000..a1b60859 --- /dev/null +++ b/GPy/kern/src/integral_limits.py @@ -0,0 +1,94 @@ +# Written by Mike Smith michaeltsmith.org.uk + +import numpy as np +from .kern import Kern +from ...core.parameterization import Param +from paramz.transformations import Logexp +import math + +class Integral_Limits(Kern): #todo do I need to inherit from Stationary + """ + Integral kernel, can include limits on each integral value. + """ + + def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'): + super(Integral_Limits, self).__init__(input_dim, active_dims, name) + + if lengthscale is None: + lengthscale = np.ones(1) + else: + lengthscale = np.asarray(lengthscale) + + self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values... + self.variances = Param('variances', variances, Logexp()) #and here. + self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise. + + def h(self, z): + return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2)) + + def dk_dl(self, t, tprime, s, sprime, l): #derivative of the kernel wrt lengthscale + return l * ( self.h((t-sprime)/l) - self.h((t - tprime)/l) + self.h((tprime-s)/l) - self.h((s-sprime)/l)) + + def update_gradients_full(self, dL_dK, X, X2=None): + if X2 is None: #we're finding dK_xx/dTheta + dK_dl = np.zeros([X.shape[0],X.shape[0]]) + dK_dv = np.zeros([X.shape[0],X.shape[0]]) + for i,x in enumerate(X): + for j,x2 in enumerate(X): + dK_dl[i,j] = self.variances[0]*self.dk_dl(x[0],x2[0],x[1],x2[1],self.lengthscale[0]) + dK_dv[i,j] = self.k_xx(x[0],x2[0],x[1],x2[1],self.lengthscale[0]) #the gradient wrt the variance is k_xx. + self.lengthscale.gradient = np.sum(dK_dl * dL_dK) + self.variances.gradient = np.sum(dK_dv * dL_dK) + #print "V%0.5f" % self.variances.gradient + #print "L%0.5f" % self.lengthscale.gradient + else: #we're finding dK_xf/Dtheta + print "NEED TO HANDLE TODO!" + + #useful little function to help calculate the covariances. + def g(self,z): + return 1.0 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2)) + + def k_xx(self,t,tprime,s,sprime,l): + """Covariance between observed values. + + s and t are one domain of the integral (i.e. the integral between s and t) + sprime and tprime are another domain of the integral (i.e. the integral between sprime and tprime) + + We're interested in how correlated these two integrals are. + + Note: We've not multiplied by the variance, this is done in K.""" + return 0.5 * (l**2) * ( self.g((t-sprime)/l) + self.g((tprime-s)/l) - self.g((t - tprime)/l) - self.g((s-sprime)/l)) + + def k_ff(self,t,tprime,l): + """Doesn't need s or sprime as we're looking at the 'derivatives', so no domains over which to integrate are required""" + return np.exp(-((t-tprime)**2)/(l**2)) #rbf + + def k_xf(self,t,tprime,s,l): + """Covariance between the gradient (latent value) and the actual (observed) value. + + Note that sprime isn't actually used in this expression, presumably because the 'primes' are the gradient (latent) values which don't + involve an integration, and thus there is no domain over which they're integrated, just a single value that we want.""" + return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf((tprime-s)/l)) + + def K(self, X, X2=None): + if X2 is None: + K_xx = np.zeros([X.shape[0],X.shape[0]]) + for i,x in enumerate(X): + for j,x2 in enumerate(X): + K_xx[i,j] = self.k_xx(x[0],x2[0],x[1],x2[1],self.lengthscale[0]) + return K_xx * self.variances[0] + else: + K_xf = np.zeros([X.shape[0],X2.shape[0]]) + for i,x in enumerate(X): + for j,x2 in enumerate(X2): + K_xf[i,j] = self.k_xf(x[0],x2[0],x[1],self.lengthscale[0]) #x2[1] unused, see k_xf docstring for explanation. + return K_xf * self.variances[0] + + def Kdiag(self, X): + """I've used the fact that we call this method for K_ff when finding the covariance as a hack so + I know if I should return K_ff or K_xx. In this case we're returning K_ff!! + $K_{ff}^{post} = K_{ff} - K_{fx} K_{xx}^{-1} K_{xf}$""" + K_ff = np.zeros(X.shape[0]) + for i,x in enumerate(X): + K_ff[i] = self.k_ff(x[0],x[0],self.lengthscale[0]) + return K_ff * self.variances[0] diff --git a/GPy/kern/src/multidimensional_integral_limits.py b/GPy/kern/src/multidimensional_integral_limits.py new file mode 100644 index 00000000..91983f53 --- /dev/null +++ b/GPy/kern/src/multidimensional_integral_limits.py @@ -0,0 +1,120 @@ +# Written by Mike Smith michaeltsmith.org.uk + +import numpy as np +from .kern import Kern +from ...core.parameterization import Param +from paramz.transformations import Logexp +import math + +class Multidimensional_Integral_Limits(Kern): #todo do I need to inherit from Stationary + """ + Integral kernel, can include limits on each integral value. + """ + + def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'): + super(Multidimensional_Integral_Limits, self).__init__(input_dim, active_dims, name) + + if lengthscale is None: + lengthscale = np.ones(1) + else: + lengthscale = np.asarray(lengthscale) + + self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values... + self.variances = Param('variances', variances, Logexp()) #and here. + self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise. + + def h(self, z): + return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2)) + + def dk_dl(self, t, tprime, s, sprime, l): #derivative of the kernel wrt lengthscale + return l * ( self.h((t-sprime)/l) - self.h((t - tprime)/l) + self.h((tprime-s)/l) - self.h((s-sprime)/l)) + + def update_gradients_full(self, dL_dK, X, X2=None): + #print self.variances + if X2 is None: #we're finding dK_xx/dTheta + dK_dl_term = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]]) + k_term = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]]) + dK_dl = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]]) + dK_dv = np.zeros([X.shape[0],X.shape[0]]) + for il,l in enumerate(self.lengthscale): + idx = il*2 + for i,x in enumerate(X): + for j,x2 in enumerate(X): + dK_dl_term[i,j,il] = self.dk_dl(x[idx],x2[idx],x[idx+1],x2[idx+1],l) + k_term[i,j,il] = self.k_xx(x[idx],x2[idx],x[idx+1],x2[idx+1],l) + for il,l in enumerate(self.lengthscale): + dK_dl = self.variances[0] * dK_dl_term[:,:,il] + for jl, l in enumerate(self.lengthscale): + if jl!=il: + dK_dl *= k_term[:,:,jl] + #dK_dl = np.dot(dK_dl,k_term[:,:,il]) + #print k_term[:,:,il] + self.lengthscale.gradient[il] = np.sum(dK_dl * dL_dK) + dK_dv = self.calc_K_xx_wo_variance(X) #the gradient wrt the variance is k_xx. + self.variances.gradient = np.sum(dK_dv * dL_dK) + else: #we're finding dK_xf/Dtheta + print "NEED TO HANDLE TODO!" + #print self.variances[0],self.lengthscale[0],self.lengthscale[1] #np.sum(dK_dv*dL_dK) + + + #useful little function to help calculate the covariances. + def g(self,z): + return 1.0 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2)) + + def k_xx(self,t,tprime,s,sprime,l): + """Covariance between observed values. + + s and t are one domain of the integral (i.e. the integral between s and t) + sprime and tprime are another domain of the integral (i.e. the integral between sprime and tprime) + + We're interested in how correlated these two integrals are. + + Note: We've not multiplied by the variance, this is done in K.""" + return 0.5 * (l**2) * ( self.g((t-sprime)/l) + self.g((tprime-s)/l) - self.g((t - tprime)/l) - self.g((s-sprime)/l)) + + def k_ff(self,t,tprime,l): + """Doesn't need s or sprime as we're looking at the 'derivatives', so no domains over which to integrate are required""" + return np.exp(-((t-tprime)**2)/(l**2)) #rbf + + def k_xf(self,t,tprime,s,l): + """Covariance between the gradient (latent value) and the actual (observed) value. + + Note that sprime isn't actually used in this expression, presumably because the 'primes' are the gradient (latent) values which don't + involve an integration, and thus there is no domain over which they're integrated, just a single value that we want.""" + return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf((tprime-s)/l)) + + def calc_K_xx_wo_variance(self,X): + """Calculates K_xx without the variance term""" + K_xx = np.ones([X.shape[0],X.shape[0]]) #ones now as a product occurs over each dimension + for i,x in enumerate(X): + for j,x2 in enumerate(X): + for il,l in enumerate(self.lengthscale): + idx = il*2 #each pair of input dimensions describe the limits on one actual dimension in the data + K_xx[i,j] *= self.k_xx(x[idx],x2[idx],x[idx+1],x2[idx+1],l) + return K_xx + + def K(self, X, X2=None): + if X2 is None: + #print "X x X" + K_xx = self.calc_K_xx_wo_variance(X) + return K_xx * self.variances[0] + else: + #print "X x X2" + K_xf = np.ones([X.shape[0],X2.shape[0]]) + for i,x in enumerate(X): + for j,x2 in enumerate(X2): + for il,l in enumerate(self.lengthscale): + idx = il*2 + K_xf[i,j] *= self.k_xf(x[idx],x2[idx],x[idx+1],l) + return K_xf * self.variances[0] + + def Kdiag(self, X): + """I've used the fact that we call this method for K_ff when finding the covariance as a hack so + I know if I should return K_ff or K_xx. In this case we're returning K_ff!! + $K_{ff}^{post} = K_{ff} - K_{fx} K_{xx}^{-1} K_{xf}$""" + K_ff = np.ones(X.shape[0]) + for i,x in enumerate(X): + for il,l in enumerate(self.lengthscale): + idx = il*2 + K_ff[i] *= self.k_ff(x[idx],x[idx],l) + return K_ff * self.variances[0]