Merge branch 'feature-multioutput-grad-obs' of git://github.com/esiivola/GPy into esiivola-feature-multioutput-grad-obs

This commit is contained in:
mzwiessele 2019-07-22 10:54:28 +01:00
commit 22ce7ad207
24 changed files with 795 additions and 53 deletions

View file

@ -581,3 +581,53 @@ def warped_gp_cubic_sine(max_iters=100):
m.plot(title="Standard GP")
warp_m.plot_warping()
pb.show()
def multioutput_gp_with_derivative_observations():
def plot_gp_vs_real(m, x, yreal, size_inputs, title, fixed_input=1, xlim=[0,11], ylim=[-1.5,3]):
fig, ax = pb.subplots()
ax.set_title(title)
pb.plot(x, yreal, "r", label='Real function')
rows = slice(0, size_inputs[0]) if fixed_input == 0 else slice(size_inputs[0], size_inputs[0]+size_inputs[1])
m.plot(fixed_inputs=[(1, fixed_input)], which_data_rows=rows, xlim=xlim, ylim=ylim, ax=ax)
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
N=10 # Number of observations
M=10 # Number of derivative observations
Npred=100 # Number of prediction points
sigma = 0.05 # Noise of observations
sigma_der = 0.05 # Noise of derivative observations
x = np.array([np.linspace(1,10,N)]).T
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
xd = np.array([np.linspace(2,8,M)]).T
yd = fd(xd) + np.array(sigma_der*np.random.normal(0,1,(M,1)))
xpred = np.array([np.linspace(0,11,Npred)]).T
ypred_true = f(xpred)
ydpred_true = fd(xpred)
# squared exponential kernel:
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
se_der = GPy.kern.DiffKern(se, 0)
#Then
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
gauss_der = GPy.likelihoods.Gaussian(variance=sigma_der**2)
# Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
# Now we have the regular observations first and derivative observations second, meaning that the kernels and
# the likelihoods must follow the same order. Crosscovariances are automatically taken car of
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss])
# Optimize the model
m.optimize(messages=0, ipython_notebook=False)
#Plot the model, the syntax is same as for multioutput models:
plot_gp_vs_real(m, xpred, ydpred_true, [x.shape[0], xd.shape[0]], title='Latent function derivatives', fixed_input=1, xlim=[0,11], ylim=[-1.5,3])
plot_gp_vs_real(m, xpred, ypred_true, [x.shape[0], xd.shape[0]], title='Latent function', fixed_input=0, xlim=[0,11], ylim=[-1.5,3])
#making predictions for the values:
mu, var = m.predict_noiseless(Xnew=[xpred, np.empty((0,1))])

View file

@ -44,3 +44,5 @@ from .src.sde_static import sde_White, sde_Bias
from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad
from .src.sde_brownian import sde_Brownian
from .src.multioutput_kern import MultioutputKern
from .src.multioutput_derivative_kern import MultioutputDerivativeKern
from .src.diff_kern import DiffKern

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from .kern import Kern
from .independent_outputs import index_to_slices
from ...util.multioutput import index_to_slices
from ...core.parameterization import Param
from paramz.transformations import Logexp
import numpy as np

View file

@ -5,7 +5,7 @@ from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
import numpy as np
from .independent_outputs import index_to_slices
from ...util.multioutput import index_to_slices
class ODE_UYC(Kern):
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., ubias =1. ,active_dims=None, name='ode_uyc'):

View file

@ -4,7 +4,7 @@ from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
import numpy as np
from .independent_outputs import index_to_slices
from ...util.multioutput import index_to_slices
class ODE_st(Kern):

View file

@ -2,7 +2,7 @@ from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
import numpy as np
from .independent_outputs import index_to_slices
from ...util.multioutput import index_to_slices
class ODE_t(Kern):

88
GPy/kern/src/diff_kern.py Normal file
View file

@ -0,0 +1,88 @@
# Copyright (c) 2018, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from .kern import Kern
import numpy as np
from paramz.caching import Cache_this
class DiffKern(Kern):
"""
Diff kernel is a thin wrapper for using partial derivatives of kernels as kernels. Eg. in combination with
Multioutput kernel this allows the user to train GPs with observations of latent function and latent
function derivatives. NOTE: DiffKern only works when used with Multioutput kernel. Do not use the kernel as standalone
The parameters the kernel needs are:
-'base_kern': a member of Kernel class that is used for observations
-'dimension': integer that indigates in which dimensions the partial derivative observations are
"""
def __init__(self, base_kern, dimension):
super(DiffKern, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='DiffKern')
self.base_kern = base_kern
self.dimension = dimension
def parameters_changed(self):
self.base_kern.parameters_changed()
@Cache_this(limit=3, ignore_args=())
def K(self, X, X2=None, dimX2 = None): #X in dimension self.dimension
if X2 is None:
X2 = X
if dimX2 is None:
dimX2 = self.dimension
return self.base_kern.dK2_dXdX2(X,X2, self.dimension, dimX2)
@Cache_this(limit=3, ignore_args=())
def Kdiag(self, X):
return np.diag(self.base_kern.dK2_dXdX2(X,X, self.dimension, self.dimension))
@Cache_this(limit=3, ignore_args=())
def dK_dX_wrap(self, X, X2): #X in dimension self.dimension
return self.base_kern.dK_dX(X,X2, self.dimension)
@Cache_this(limit=3, ignore_args=())
def dK_dX2_wrap(self, X, X2): #X in dimension self.dimension
return self.base_kern.dK_dX2(X,X2, self.dimension)
def reset_gradients(self):
self.base_kern.reset_gradients()
@property
def gradient(self):
return self.base_kern.gradient
@gradient.setter
def gradient(self, gradient):
self.base_kern.gradient = gradient
def update_gradients_full(self, dL_dK, X, X2=None, dimX2=None):
if dimX2 is None:
dimX2 = self.dimension
gradients = self.base_kern.dgradients2_dXdX2(X,X2,self.dimension,dimX2)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
def update_gradients_diag(self, dL_dK_diag, X):
gradients = self.base_kern.dgradients2_dXdX2(X,X, self.dimension, self.dimension)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK_diag, gradient, f=np.diag) for gradient in gradients])
def update_gradients_dK_dX(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
gradients = self.base_kern.dgradients_dX(X,X2, self.dimension)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
def update_gradients_dK_dX2(self, dL_dK, X, X2=None):
gradients = self.base_kern.dgradients_dX2(X,X2, self.dimension)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
def gradients_X(self, dL_dK, X, X2):
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:, self.dimension]
return np.sum(tmp, axis=1)
def gradients_X2(self, dL_dK, X, X2):
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:, :, self.dimension, :]
return np.sum(tmp, axis=1)
def _convert_gradients(self, l,g, f = lambda x:x):
if type(g) is np.ndarray:
return np.sum(f(l)*f(g))
else:
return np.array([np.sum(f(l)*f(gi)) for gi in g])

View file

@ -5,34 +5,8 @@
from .kern import CombinationKernel
import numpy as np
import itertools
from ...util.multioutput import index_to_slices
def index_to_slices(index):
"""
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
e.g.
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
returns
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
or, a more complicated example
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
returns
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
"""
if len(index)==0:
return[]
#contruct the return structure
ind = np.asarray(index,dtype=np.int)
ret = [[] for i in range(ind.max()+1)]
#find the switchpoints
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret
class IndependentOutputs(CombinationKernel):
"""

View file

@ -116,6 +116,12 @@ class Kern(Parameterized):
except:
return X[:, self._all_dims_active]
def _project_dim(self, dim):
try:
return np.where(self._all_dims_active == dim)[0][0]
except:
return None
def K(self, X, X2):
"""
Compute the kernel function.

View file

@ -19,16 +19,26 @@ def put_clean(dct, name, func):
class KernCallsViaSlicerMeta(ParametersChangedMeta):
def __new__(cls, name, bases, dct):
put_clean(dct, 'K', _slice_K)
put_clean(dct, 'dK_dX', _slice_dK_dX)
put_clean(dct, 'dK_dX2', _slice_dK_dX)
put_clean(dct, 'dK2_dXdX2', _slice_dK2_dXdX2)
put_clean(dct, 'Kdiag', _slice_Kdiag)
put_clean(dct, 'phi', _slice_Kdiag)
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
put_clean(dct, 'update_gradients_dK_dX', _slice_update_gradients_full)
put_clean(dct, 'update_gradients_dK_dX2', _slice_update_gradients_full)
put_clean(dct, 'gradients_X', _slice_gradients_X)
put_clean(dct, 'gradients_X2', _slice_gradients_X)
put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
put_clean(dct, 'dgradients_dX',_slice_partial_gradients_list_X)
put_clean(dct, 'dgradients_dX2',_slice_partial_gradients_list_X)
put_clean(dct, 'dgradients2_dXdX2',_slice_partial_gradients_list_XX)
put_clean(dct, 'psi0', _slice_psi)
put_clean(dct, 'psi1', _slice_psi)
put_clean(dct, 'psi2', _slice_psi)
@ -79,6 +89,20 @@ class _Slice_wrap(object):
return ret
return return_val
def handle_return_list(self, return_val):
if self.ret:
ret = [np.zeros(self.shape) for _ in range(len(return_val))]
if len(self.shape) == 3:
for i in range(len(return_val)):
ret[i][self.k._all_dims_active, :, :] = return_val[i]
#[ret[i].__setitem__((:, :, self.k._all_dims_active), return_val[i]) for i in range(len(return_val))]
elif len(self.shape) == 4:
for i in range(len(return_val)):
ret[i][np.ix_(self.k._all_dims_active, self.k._all_dims_active)] = return_val[i]
#[ret[i].__setitem__((:, :, self.k._all_dims_active, self.k._all_dims_active), return_val[i]) for i in range(len(return_val))]
return ret
return return_val
def _slice_K(f):
@wraps(f)
def wrap(self, X, X2 = None, *a, **kw):
@ -97,15 +121,15 @@ def _slice_Kdiag(f):
def _slice_update_gradients_full(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
def wrap(self, dL_dK, X, X2=None, *a, **kw):
with _Slice_wrap(self, X, X2) as s:
ret = f(self, dL_dK, s.X, s.X2)
ret = f(self, dL_dK, s.X, s.X2, *a, **kw)
return ret
return wrap
def _slice_update_gradients_diag(f):
@wraps(f)
def wrap(self, dL_dKdiag, X):
def wrap(self, dL_dKdiag, X, *a, **kw):
with _Slice_wrap(self, X, None) as s:
ret = f(self, dL_dKdiag, s.X)
return ret
@ -119,6 +143,99 @@ def _slice_gradients_X(f):
return ret
return wrap
def _slice_dK_dX(f):
@wraps(f)
def wrap(self, X, X2, dim, *a, **kw):
with _Slice_wrap(self, X, X2) as s:
d = s.k._project_dim(dim)
if d is None:
ret = np.zeros((X.shape[0], X2.shape[0]))
else:
ret = f(self, s.X, s.X2, d, *a, **kw)
return ret
return wrap
def _slice_dK2_dXdX2(f):
@wraps(f)
def wrap(self, X, X2, dimX, dimX2, *a, **kw):
with _Slice_wrap(self, X, X2) as s:
d = s.k._project_dim(dimX)
d2 = s.k._project_dim(dimX2)
if (d is None) or (d2 is None):
ret = np.zeros((X.shape[0], X2.shape[0]))
else:
ret = f(self, s.X, s.X2, d, d2, *a, **kw)
return ret
return wrap
def _slice_partial_gradients_X(f):
@wraps(f)
def wrap(self, X, X2, dim):
if X2 is None:
N, M = X.shape[0], X.shape[0]
else:
N, M = X.shape[0], X2.shape[0]
Q1 = X.shape[1]
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1)) as s:
ret = s.handle_return_array(f(self, s.X, s.X2, dim))
return ret
return wrap
def _slice_partial_gradients_list_X(f):
@wraps(f)
def wrap(self, X, X2, dim):
if X2 is None:
N, M = X.shape[0], X.shape[0]
else:
N, M = X.shape[0], X2.shape[0]
with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s:
d = s.k._project_dim(dim)
if d is None:
ret = [np.zeros((N, M)) for i in range(s.k.size)]
else:
ret = f(self, s.X, s.X2, d)
return ret
return wrap
def _slice_partial_gradients_XX(f):
@wraps(f)
def wrap(self, X, X2, dim, dimX2):
if X2 is None:
N, M = X.shape[0], X.shape[0]
Q1 = X.shape[1]
else:
N, M = X.shape[0], X2.shape[0]
Q1, Q2 = X.shape[1], X2.shape[1]
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s:
ret = s.handle_return_array(f(self, s.X, s.X2, dim, dimX2))
return ret
return wrap
def _slice_partial_gradients_list_XX(f):
@wraps(f)
def wrap(self, X, X2, dimX, dimX2):
if X2 is None:
N, M = X.shape[0], X.shape[0]
else:
N, M = X.shape[0], X2.shape[0]
with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s:
d = s.k._project_dim(dimX)
d2 = s.k._project_dim(dimX2)
if (d is None) or (d2 is None):
ret = [np.zeros((N, M)) for i in range(s.k.size)]
else:
ret = f(self, s.X, s.X2, d, d2)
return ret
return wrap
def _slice_gradient_derivatives_X(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
with _Slice_wrap(self, X, X2) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
return ret
return wrap
def _slice_gradients_X_diag(f):
@wraps(f)
def wrap(self, dL_dKdiag, X):

View file

@ -0,0 +1,84 @@
# Copyright (c) 2018, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from .kern import Kern, CombinationKernel
from .multioutput_kern import MultioutputKern, ZeroKern
import numpy as np
from functools import partial
class KernWrapper(Kern):
def __init__(self, fk, fug, fg, base_kern):
self.fk = fk
self.fug = fug
self.fg = fg
self.base_kern = base_kern
super(KernWrapper, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='KernWrapper',useGPU=False)
def K(self, X, X2=None):
return self.fk(X,X2=X2)
def update_gradients_full(self,dL_dK, X, X2=None):
return self.fug(dL_dK, X, X2=X2)
def gradients_X(self,dL_dK, X, X2=None):
return self.fg(dL_dK, X, X2=X2)
@property
def gradient(self):
return self.base_kern.gradient
@gradient.setter
def gradient(self, gradient):
self.base_kern.gradient = gradient
class MultioutputDerivativeKern(MultioutputKern):
"""
Multioutput derivative kernel is a meta class for combining different kernels for multioutput GPs.
Multioutput derivative kernel is only a thin wrapper for Multioutput kernel for user not having to define
cross covariances.
"""
def __init__(self, kernels, cross_covariances={}, name='MultioutputDerivativeKern'):
#kernels contains a list of kernels as input,
if not isinstance(kernels, list):
self.single_kern = True
self.kern = kernels
kernels = [kernels]
else:
self.single_kern = False
self.kern = kernels
# The combination kernel ALLWAYS puts the extra dimension last.
# Thus, the index dimension of this kernel is always the last dimension
# after slicing. This is why the index_dim is just the last column:
self.index_dim = -1
super(MultioutputKern, self).__init__(kernels=kernels, extra_dims=[self.index_dim], name=name, link_parameters=False)
nl = len(kernels)
#build covariance structure
covariance = [[None for i in range(nl)] for j in range(nl)]
linked = []
for i in range(0,nl):
unique=True
for j in range(0,nl):
if i==j or (kernels[i] is kernels[j]):
kern = kernels[i]
if i>j:
unique=False
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
kern = cross_covariances.get((i,j))
elif kernels[i].name == 'DiffKern' and kernels[i].base_kern == kernels[j]: # one is derivative of other
kern = KernWrapper(kernels[i].dK_dX_wrap,kernels[i].update_gradients_dK_dX,kernels[i].gradients_X, kernels[j])
unique=False
elif kernels[j].name == 'DiffKern' and kernels[j].base_kern == kernels[i]: # one is derivative of other
kern = KernWrapper(kernels[j].dK_dX2_wrap,kernels[j].update_gradients_dK_dX2,kernels[j].gradients_X2, kernels[i])
elif kernels[i].name == 'DiffKern' and kernels[j].name == 'DiffKern' and kernels[i].base_kern == kernels[j].base_kern: #both are partial derivatives
kern = KernWrapper(partial(kernels[i].K, dimX2=kernels[j].dimension), partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension),None, kernels[i].base_kern)
if i>j:
unique=False
else:
kern = ZeroKern()
covariance[i][j] = kern
if unique is True:
linked.append(i)
self.covariance = covariance
self.link_parameters(*[kernels[i] for i in linked])

View file

@ -1,7 +1,10 @@
# Copyright (c) 2018, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from .kern import Kern, CombinationKernel
import numpy as np
from functools import reduce, partial
from .independent_outputs import index_to_slices
from ...util.multioutput import index_to_slices
from paramz.caching import Cache_this
class ZeroKern(Kern):
@ -18,6 +21,13 @@ class ZeroKern(Kern):
def gradients_X(self,dL_dK, X, X2=None):
return np.zeros((X.shape[0],X.shape[1]))
@property
def gradient(self):
return np.empty((1,0))
@gradient.setter
def gradient(self, gradient):
pass
class MultioutputKern(CombinationKernel):
"""
@ -62,14 +72,13 @@ class MultioutputKern(CombinationKernel):
unique=True
for j in range(0,nl):
if i==j or (kernels[i] is kernels[j]):
covariance[i][j] = {'kern': kernels[i], 'K': kernels[i].K, 'update_gradients_full': kernels[i].update_gradients_full, 'gradients_X': kernels[i].gradients_X}
covariance[i][j] = kernels[i]
if i>j:
unique=False
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
covariance[i][j] = cross_covariances.get((i,j))
else: # zero covariance structure
kern = ZeroKern()
covariance[i][j] = {'kern': kern, 'K': kern.K, 'update_gradients_full': kern.update_gradients_full, 'gradients_X': kern.gradients_X}
covariance[i][j] = ZeroKern()
if unique is True:
linked.append(i)
self.covariance = covariance
@ -82,7 +91,7 @@ class MultioutputKern(CombinationKernel):
slices = index_to_slices(X[:,self.index_dim])
slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0]))
[[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j]['K'](X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))]
[[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j].K(X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))]
return target
@Cache_this(limit=3, ignore_args=())
@ -93,10 +102,10 @@ class MultioutputKern(CombinationKernel):
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
return target
def _update_gradients_full_wrapper(self, cov_struct, dL_dK, X, X2):
gradient = cov_struct['kern'].gradient.copy()
cov_struct['update_gradients_full'](dL_dK, X, X2)
cov_struct['kern'].gradient += gradient
def _update_gradients_full_wrapper(self, kern, dL_dK, X, X2):
gradient = kern.gradient.copy()
kern.update_gradients_full(dL_dK, X, X2)
kern.gradient += gradient
def _update_gradients_diag_wrapper(self, kern, dL_dKdiag, X):
gradient = kern.gradient.copy()
@ -118,14 +127,14 @@ class MultioutputKern(CombinationKernel):
def update_gradients_diag(self, dL_dKdiag, X):
self.reset_gradients()
slices = index_to_slices(X[:,self.index_dim])
[[ self._update_gradients_diag_wrapper(self.covariance[i][i]['kern'], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))]
[[ self._update_gradients_diag_wrapper(self.covariance[i][i], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))]
def gradients_X(self,dL_dK, X, X2=None):
slices = index_to_slices(X[:,self.index_dim])
target = np.zeros((X.shape[0], X.shape[1]) )
if X2 is not None:
slices2 = index_to_slices(X2[:,self.index_dim])
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
else:
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
return target

View file

@ -6,6 +6,7 @@ import numpy as np
from .stationary import Stationary
from .psi_comp import PSICOMP_RBF, PSICOMP_RBF_GPU
from ...core import Param
from paramz.caching import Cache_this
from paramz.transformations import Logexp
from .grid_kerns import GridRBF
@ -50,6 +51,27 @@ class RBF(Stationary):
def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r**2)
@Cache_this(limit=3, ignore_args=())
def dK_dX(self, X, X2, dimX):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
dist = X[:,None,dimX]-X2[None,:,dimX]
lengthscale2inv = (np.ones((X.shape[1]))/(self.lengthscale**2))[dimX]
return -1.*K*dist*lengthscale2inv
@Cache_this(limit=3, ignore_args=())
def dK_dX2(self, X, X2, dimX2):
return -self.dK_dX(X,X2, dimX2)
@Cache_this(limit=3, ignore_args=())
def dK2_dXdX2(self, X, X2, dimX, dimX2):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
if X2 is None:
X2=X
dist = X[:,None,:]-X2[None,:,:]
lengthscale2inv = np.ones((X.shape[1]))/(self.lengthscale**2)
return -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*lengthscale2inv[dimX2] + (dimX==dimX2)*K*lengthscale2inv[dimX]
def dK_dr(self, r):
return -r*self.K_of_r(r)
@ -59,6 +81,74 @@ class RBF(Stationary):
def dK2_drdr_diag(self):
return -self.variance # as the diagonal of r is always filled with zeros
@Cache_this(limit=3, ignore_args=())
def dK_dvariance(self,X,X2):
return self.K(X,X2)/self.variance
@Cache_this(limit=3, ignore_args=())
def dK2_dvariancedX(self, X, X2, dim):
return self.dK_dX(X,X2, dim)/self.variance
@Cache_this(limit=3, ignore_args=())
def dK2_dvariancedX2(self, X, X2, dim):
return self.dK_dX2(X,X2, dim)/self.variance
@Cache_this(limit=3, ignore_args=())
def dK3_dvariancedXdX2(self, X, X2, dim, dimX2):
return self.dK2_dXdX2(X, X2, dim, dimX2)/self.variance
@Cache_this(limit=3, ignore_args=())
def dK2_dlengthscaledX(self, X, X2, dimX):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
if X2 is None:
X2=X
dist = X[:,None,:]-X2[None,:,:]
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale)
if self.ARD:
g = []
for diml in range(X.shape[1]):
g += [-1.*K*dist[:,:,dimX]*(dist[:,:,diml]**2)*(lengthscaleinv[dimX]**2)*(lengthscaleinv[diml]**3) + 2.*dist[:,:,dimX]*(lengthscaleinv[diml]**3)*K*(dimX == diml)]
else:
g = -1.*K*dist[:,:,dimX]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**5) + 2.*dist[:,:,dimX]*(lengthscaleinv[dimX]**3)*K
return g
@Cache_this(limit=3, ignore_args=())
def dK2_dlengthscaledX2(self, X, X2, dimX2):
tmp = self.dK2_dlengthscaledX(X, X2, dimX2)
if self.ARD:
return [-1.*g for g in tmp]
else:
return -1*tmp
@Cache_this(limit=3, ignore_args=())
def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
if X2 is None:
X2=X
dist = X[:,None,:]-X2[None,:,:]
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale)
lengthscale2inv = lengthscaleinv**2
if self.ARD:
g = []
for diml in range(X.shape[1]):
tmp = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*(dist[:,:,diml]**2)*lengthscale2inv[dimX]*lengthscale2inv[dimX2]*(lengthscaleinv[diml]**3)
if dimX == dimX2:
tmp += K*lengthscale2inv[dimX]*(lengthscaleinv[diml]**3)*(dist[:,:,diml]**2)
if diml == dimX:
tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX2]*(lengthscaleinv[dimX]**3)
if diml == dimX2:
tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*(lengthscaleinv[dimX2]**3)
if dimX == dimX2:
tmp += -2.*K*(lengthscaleinv[dimX]**3)
g += [tmp]
else:
g = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**7) +4*K*dist[:,:,dimX]*dist[:,:,dimX2]*(lengthscaleinv[dimX]**5)
if dimX == dimX2:
g += -2.*K*(lengthscaleinv[dimX]**3) + K*(lengthscaleinv[dimX]**5)*np.sum(dist**2, axis=2)
return g
def __getstate__(self):
dc = super(RBF, self).__getstate__()
if self.useGPU:

View file

@ -4,7 +4,7 @@ A new kernel
import numpy as np
from .kern import Kern, CombinationKernel
from .independent_outputs import index_to_slices
from ...util.multioutput import index_to_slices
import itertools
class DEtime(Kern):

View file

@ -212,7 +212,6 @@ class Stationary(Kern):
r = self._scaled_dist(X, X2)
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
def update_gradients_direct(self, dL_dVar, dL_dLen):
"""
Specially intended for the Grid regression case.
@ -308,6 +307,21 @@ class Stationary(Kern):
return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
def dgradients_dX(self, X, X2, dimX):
g1 = self.dK2_dvariancedX(X, X2, dimX)
g2 = self.dK2_dlengthscaledX(X, X2, dimX)
return [g1, g2]
def dgradients_dX2(self, X, X2, dimX2):
g1 = self.dK2_dvariancedX2(X, X2, dimX2)
g2 = self.dK2_dlengthscaledX2(X, X2, dimX2)
return [g1, g2]
def dgradients2_dXdX2(self, X, X2, dimX, dimX2):
g1 = self.dK3_dvariancedXdX2(X, X2, dimX, dimX2)
g2 = self.dK3_dlengthscaledXdX2(X, X2, dimX, dimX2)
return [g1, g2]
def _gradients_X_pure(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK

View file

@ -66,7 +66,6 @@ class Binomial(Likelihood):
np.testing.assert_array_equal(N.shape, y.shape)
nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1)
Ny = N-y
t1 = np.zeros(y.shape)
t2 = np.zeros(y.shape)
@ -177,6 +176,41 @@ class Binomial(Likelihood):
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
pass
def moments_match_ep(self,obs,tau,v,Y_metadata_i=None):
"""
Calculation of moments using quadrature
:param obs: observed output
:param tau: cavity distribution 1st natural parameter (precision)
:param v: cavity distribution 2nd natural paramenter (mu*precision)
"""
#Compute first integral for zeroth moment.
#NOTE constant np.sqrt(2*pi/tau) added at the end of the function
if (isinstance(self.gp_link, link_functions.Probit) or isinstance(self.gp_link, link_functions.ScaledProbit)) and (Y_metadata_i is None or int(Y_metadata_i.get('trials', 1)) == int(1)): #Special case for probit likelihood. Can be found from Riihimaki et Vehtari 2010
if isinstance(self.gp_link, link_functions.ScaledProbit):
nu = self.gp_link.nu
else:
nu = 1.0
nu = self.gp_link.nu
mu = v/tau
sigma2 = 1./tau
t = np.asarray(1 + sigma2*(nu**2))
t[t<1e-20] = 1e-20
a = np.sqrt(t)
z = obs*mu/a
normc_z = max(self.gp_link.transf(z), 1e-20)
m0 = normc_z
normp_z = self.gp_link.dtransf_df(z)
m1 = mu + (obs*sigma2*normp_z)/(normc_z*a)
#print('tau: {}, v: {}, nu: {}, z: {}, normc_z: {}, normp_z: {}'.format(tau, v, nu.values, z, normc_z, normp_z))
m2 = sigma2 - ((sigma2**2)*normp_z)/((1./(nu**2)+sigma2)*normc_z)*(z + normp_z/(nu**2)/normc_z)
#print("m0: {}, m1: {}, m2: {}".format(m0,m1,m2))
#m0a, m1a, m2a = super(Binomial, self).moments_match_ep(obs,tau,v,Y_metadata_i)
#print("m0a: {}, m1a: {}, m2a: {}".format(m0a,m1a,m2a))
return m0, m1, m2
else:
return super(Binomial, self).moments_match_ep(obs,tau,v,Y_metadata_i)
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Probit):

View file

@ -138,6 +138,38 @@ class Probit(GPTransformation):
input_dict["class"] = "GPy.likelihoods.link_functions.Probit"
return input_dict
class ScaledProbit(Probit):
"""
.. math::
g(f) = \\Phi^{-1} (nu*mu)
"""
def __init__(self, nu=1.):
self.nu = float(nu)
def transf(self,f):
return std_norm_cdf(f*self.nu)
def dtransf_df(self,f):
return std_norm_pdf(f*self.nu)*self.nu
def d2transf_df2(self,f):
return -(f*self.nu) * std_norm_pdf(f*self.nu)*(self.nu**2)
def d3transf_df3(self,f):
return (safe_square(f*self.nu)-1.)*std_norm_pdf(f*self.nu)*(self.nu**3)
def to_dict(self):
"""
Convert the object into a json serializable dictionary.
Note: It uses the private method _save_to_input_dict of the parent.
:return dict: json serializable dictionary containing the needed information to instantiate the object
"""
input_dict = super(ScaledProbit, self)._save_to_input_dict()
input_dict["class"] = "GPy.likelihoods.link_functions.ScaledProbit"
return input_dict
class Cloglog(GPTransformation):
"""

View file

@ -14,7 +14,7 @@ from .gaussian import Gaussian
from ..core.parameterization import Param
from paramz.transformations import Logexp
from ..core.parameterization import Parameterized
from ..kern.src.independent_outputs import index_to_slices
from ..util.multioutput import index_to_slices
import itertools
class MultioutputLikelihood(MixedNoise):

View file

@ -30,3 +30,4 @@ from .gp_grid_regression import GPRegressionGrid
from .gp_multiout_regression import GPMultioutRegression
from .gp_multiout_regression_md import GPMultioutRegressionMD
from .tp_regression import TPRegression
from .multioutput_gp import MultioutputGP

View file

@ -0,0 +1,147 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import itertools
from ..core.model import Model
from ..core.parameterization.variational import VariationalPosterior
from ..core.mapping import Mapping
from .. import likelihoods
from ..likelihoods.gaussian import Gaussian
from .. import kern
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from ..util.normalizer import Standardize
from .. import util
from paramz import ObsAr
from ..core.gp import GP
from ..util.multioutput import index_to_slices
import logging
import warnings
logger = logging.getLogger("GP")
class MultioutputGP(GP):
"""
Gaussian process model for using observations from multiple likelihoods and different kernels
:param X_list: input observations in a list for each likelihood
:param Y: output observations in a list for each likelihood
:param kernel_list: kernels in a list for each likelihood
:param likelihood_list: likelihoods in a list
:param kernel_cross_covariances: Cross covariances between different likelihoods. See class MultioutputKern for more
:param inference_method: The :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference method to use for this GP
"""
def __init__(self, X_list, Y_list, kernel_list, likelihood_list, name='multioutputgp', kernel_cross_covariances={}, inference_method=None):
#Input and Output
X,Y,self.output_index = util.multioutput.build_XY(X_list,Y_list)
Ny = len(Y_list)
assert isinstance(kernel_list, list)
kernel = kern.MultioutputDerivativeKern(kernels=kernel_list, cross_covariances=kernel_cross_covariances)
assert isinstance(likelihood_list, list)
likelihood = likelihoods.MultioutputLikelihood(likelihood_list)
if inference_method is None:
if all([isinstance(l, Gaussian) for l in likelihood_list]):
inference_method = exact_gaussian_inference.ExactGaussianInference()
else:
inference_method = expectation_propagation.EP()
super(MultioutputGP, self).__init__(X,Y,kernel,likelihood, Y_metadata={'output_index':self.output_index, 'trials':np.ones(self.output_index.shape)}, inference_method = inference_method)
def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew,None)
if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).predict_noiseless(Xnew, full_cov, Y_metadata, kern)
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True):
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew,None)
if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).predict(Xnew, full_cov, Y_metadata, kern, likelihood, include_likelihood)
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None, likelihood=None):
if isinstance(X, list):
X, _, ind = util.multioutput.build_XY(X,None)
if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).predict_quantiles(X, quantiles, Y_metadata, kern, likelihood)
def predictive_gradients(self, Xnew, kern=None):
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
#if Y_metadata is None:
#Y_metadata={'output_index': ind}
return super(MultioutputGP, self).predictive_gradients(Xnew, kern)
def predictive_gradients(self, Xnew, kern=None): #XNEW IS NOT A LIST!!
"""
Compute the derivatives of the predicted latent function with respect to X*
Given a set of points at which to predict X* (size [N*,Q]), compute the
derivatives of the mean and variance. Resulting arrays are sized:
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
Note that this is not the same as computing the mean and variance of the derivative of the function!
dv_dX* -- [N*, Q], (since all outputs have the same variance)
:param X: The points at which to get the predictive gradients
:type X: np.ndarray (Xnew x self.input_dim)
:returns: dmu_dX, dv_dX
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
"""
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
slices = index_to_slices(Xnew[:,-1])
for i in range(len(slices)):
if ((self.kern.kern[i].name == 'diffKern' ) and len(slices[i])>0):
assert 0, "It is not (yet) possible to predict gradients of gradient observations, sorry :)"
if kern is None:
kern = self.kern
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1]-1,self.output_dim))
for i in range(self.output_dim):
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)[:,0:-1]
# gradients wrt the diagonal part k_{xx}
dv_dX = kern.gradients_X(np.eye(Xnew.shape[0]), Xnew)[:,0:-1]
#grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
if self.posterior.woodbury_inv.ndim == 3:
tmp = np.empty(dv_dX.shape + (self.posterior.woodbury_inv.shape[2],))
tmp[:] = dv_dX[:,:,None]
for i in range(self.posterior.woodbury_inv.shape[2]):
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv[:, :, i])
tmp[:, :, i] += kern.gradients_X(alpha, Xnew, self._predictive_variable)
else:
tmp = dv_dX
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv)
tmp += kern.gradients_X(alpha, Xnew, self._predictive_variable)[:,0:-1]
return mean_jac, tmp
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
if isinstance(x_test, list):
x_test, y_test, ind = util.multioutput.build_XY(x_test, y_test)
if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).log_predictive_density(x_test, y_test, Y_metadata)
def set_XY(self, X=None, Y=None):
if isinstance(X, list):
X, _, self.output_index = util.multioutput.build_XY(X, None)
if isinstance(Y, list):
_, Y, self.output_index = util.multioutput.build_XY(Y, Y)
self.update_model(False)
if Y is not None:
self.Y = ObsAr(Y)
self.Y_normalized = self.Y
if X is not None:
self.X = ObsAr(X)
self.Y_metadata={'output_index': self.output_index, 'trials': np.ones(self.output_index.shape)}
if isinstance(self.inference_method, expectation_propagation.EP):
self.inference_method.reset()
self.update_model(True)

View file

@ -7,7 +7,7 @@ from unittest.case import skip
import GPy
from GPy.core.parameterization.param import Param
import numpy as np
import random
from ..util.config import config

View file

@ -2,11 +2,12 @@ import numpy as np
import scipy
from scipy.special import cbrt
from GPy.models import GradientChecker
import random
_lim_val = np.finfo(np.float64).max
_lim_val_exp = np.log(_lim_val)
_lim_val_square = np.sqrt(_lim_val)
_lim_val_cube = cbrt(_lim_val)
from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside
from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside, ScaledProbit
class LinkFunctionTests(np.testing.TestCase):
def setUp(self):
@ -124,6 +125,11 @@ class LinkFunctionTests(np.testing.TestCase):
lim_of_inf = _lim_val
self.check_gradient(link, lim_of_inf, test_lim=True)
def test_scaledprobit_gradients(self):
link = ScaledProbit(nu=random.random())
lim_of_inf = _lim_val
self.check_gradient(link, lim_of_inf, test_lim=True)
def test_Cloglog_gradients(self):
link = Cloglog()
lim_of_inf = _lim_val_exp

View file

@ -1181,6 +1181,67 @@ class GradientTests(np.testing.TestCase):
with self.assertRaises(RuntimeError):
m.posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]]))
def test_multioutput_model_with_derivative_observations(self):
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
N=10
M=10
sigma=0.05
sigmader=0.05
x = np.array([np.linspace(1,10,N)]).T
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
xd = np.array([np.linspace(2,8,M)]).T
yd = fd(xd) + np.array(sigmader*np.random.normal(0,1,(M,1)))
# squared exponential kernel:
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
se_der = GPy.kern.DiffKern(se, 0)
#Then
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
gauss = GPy.likelihoods.Gaussian(variance=0.1)
gauss_der = GPy.likelihoods.Gaussian(variance=sigma**2)
# Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
# Now we have the regular observations first and derivative observations second, meaning that the kernels and
# the likelihoods must follow the same order
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss])
m.randomize()
self.assertTrue(m.checkgrad())
m.optimize(messages=0, ipython_notebook=False)
self.assertTrue(m.checkgrad())
def test_multioutput_model_with_ep(self):
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
N=10
sigma=0.05
sigmader=0.05
x = np.array([np.linspace(1,10,N)]).T
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
M=7
xd = np.array([np.linspace(2,8,M)]).T
yd = 2*(fd(xd)>0) -1
# squared exponential kernel:
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
se_der = GPy.kern.DiffKern(se, 0)
#Then
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
probit = GPy.likelihoods.Binomial(gp_link = GPy.likelihoods.link_functions.ScaledProbit(nu=100))
# Then create the model, we give everything in lists
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, probit], inference_method=GPy.inference.latent_function_inference.EP(ep_mode="nested"))
self.assertTrue(m.checkgrad())
def _create_missing_data_model(kernel, Q):
D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3
_, _, Ylist = GPy.examples.dimensionality_reduction._simulate_matern(D1, D2, D3, N, num_inducing, False)

View file

@ -2,6 +2,33 @@ import numpy as np
import warnings
import GPy
def index_to_slices(index):
"""
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
e.g.
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
returns
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
or, a more complicated example
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
returns
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
"""
if len(index)==0:
return[]
#contruct the return structure
ind = np.asarray(index,dtype=np.int)
ret = [[] for i in range(ind.max()+1)]
#find the switchpoints
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret
def get_slices(input_list):
num_outputs = len(input_list)