mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-18 13:55:14 +02:00
Merge branch 'feature-multioutput-grad-obs' of git://github.com/esiivola/GPy into esiivola-feature-multioutput-grad-obs
This commit is contained in:
commit
22ce7ad207
24 changed files with 795 additions and 53 deletions
|
|
@ -581,3 +581,53 @@ def warped_gp_cubic_sine(max_iters=100):
|
||||||
m.plot(title="Standard GP")
|
m.plot(title="Standard GP")
|
||||||
warp_m.plot_warping()
|
warp_m.plot_warping()
|
||||||
pb.show()
|
pb.show()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def multioutput_gp_with_derivative_observations():
|
||||||
|
def plot_gp_vs_real(m, x, yreal, size_inputs, title, fixed_input=1, xlim=[0,11], ylim=[-1.5,3]):
|
||||||
|
fig, ax = pb.subplots()
|
||||||
|
ax.set_title(title)
|
||||||
|
pb.plot(x, yreal, "r", label='Real function')
|
||||||
|
rows = slice(0, size_inputs[0]) if fixed_input == 0 else slice(size_inputs[0], size_inputs[0]+size_inputs[1])
|
||||||
|
m.plot(fixed_inputs=[(1, fixed_input)], which_data_rows=rows, xlim=xlim, ylim=ylim, ax=ax)
|
||||||
|
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
|
||||||
|
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
|
||||||
|
N=10 # Number of observations
|
||||||
|
M=10 # Number of derivative observations
|
||||||
|
Npred=100 # Number of prediction points
|
||||||
|
sigma = 0.05 # Noise of observations
|
||||||
|
sigma_der = 0.05 # Noise of derivative observations
|
||||||
|
x = np.array([np.linspace(1,10,N)]).T
|
||||||
|
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
|
||||||
|
|
||||||
|
xd = np.array([np.linspace(2,8,M)]).T
|
||||||
|
yd = fd(xd) + np.array(sigma_der*np.random.normal(0,1,(M,1)))
|
||||||
|
|
||||||
|
xpred = np.array([np.linspace(0,11,Npred)]).T
|
||||||
|
ypred_true = f(xpred)
|
||||||
|
ydpred_true = fd(xpred)
|
||||||
|
|
||||||
|
# squared exponential kernel:
|
||||||
|
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
|
||||||
|
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
|
||||||
|
se_der = GPy.kern.DiffKern(se, 0)
|
||||||
|
|
||||||
|
#Then
|
||||||
|
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
|
||||||
|
gauss_der = GPy.likelihoods.Gaussian(variance=sigma_der**2)
|
||||||
|
|
||||||
|
# Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
|
||||||
|
# Now we have the regular observations first and derivative observations second, meaning that the kernels and
|
||||||
|
# the likelihoods must follow the same order. Crosscovariances are automatically taken car of
|
||||||
|
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss])
|
||||||
|
|
||||||
|
# Optimize the model
|
||||||
|
m.optimize(messages=0, ipython_notebook=False)
|
||||||
|
|
||||||
|
#Plot the model, the syntax is same as for multioutput models:
|
||||||
|
plot_gp_vs_real(m, xpred, ydpred_true, [x.shape[0], xd.shape[0]], title='Latent function derivatives', fixed_input=1, xlim=[0,11], ylim=[-1.5,3])
|
||||||
|
plot_gp_vs_real(m, xpred, ypred_true, [x.shape[0], xd.shape[0]], title='Latent function', fixed_input=0, xlim=[0,11], ylim=[-1.5,3])
|
||||||
|
|
||||||
|
#making predictions for the values:
|
||||||
|
mu, var = m.predict_noiseless(Xnew=[xpred, np.empty((0,1))])
|
||||||
|
|
|
||||||
|
|
@ -44,3 +44,5 @@ from .src.sde_static import sde_White, sde_Bias
|
||||||
from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad
|
from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad
|
||||||
from .src.sde_brownian import sde_Brownian
|
from .src.sde_brownian import sde_Brownian
|
||||||
from .src.multioutput_kern import MultioutputKern
|
from .src.multioutput_kern import MultioutputKern
|
||||||
|
from .src.multioutput_derivative_kern import MultioutputDerivativeKern
|
||||||
|
from .src.diff_kern import DiffKern
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
from .kern import Kern
|
from .kern import Kern
|
||||||
from .independent_outputs import index_to_slices
|
from ...util.multioutput import index_to_slices
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
from paramz.transformations import Logexp
|
from paramz.transformations import Logexp
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from .kern import Kern
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
from paramz.transformations import Logexp
|
from paramz.transformations import Logexp
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .independent_outputs import index_to_slices
|
from ...util.multioutput import index_to_slices
|
||||||
|
|
||||||
class ODE_UYC(Kern):
|
class ODE_UYC(Kern):
|
||||||
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., ubias =1. ,active_dims=None, name='ode_uyc'):
|
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., ubias =1. ,active_dims=None, name='ode_uyc'):
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ from .kern import Kern
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
from paramz.transformations import Logexp
|
from paramz.transformations import Logexp
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .independent_outputs import index_to_slices
|
from ...util.multioutput import index_to_slices
|
||||||
|
|
||||||
|
|
||||||
class ODE_st(Kern):
|
class ODE_st(Kern):
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ from .kern import Kern
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
from paramz.transformations import Logexp
|
from paramz.transformations import Logexp
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .independent_outputs import index_to_slices
|
from ...util.multioutput import index_to_slices
|
||||||
|
|
||||||
|
|
||||||
class ODE_t(Kern):
|
class ODE_t(Kern):
|
||||||
|
|
|
||||||
88
GPy/kern/src/diff_kern.py
Normal file
88
GPy/kern/src/diff_kern.py
Normal file
|
|
@ -0,0 +1,88 @@
|
||||||
|
# Copyright (c) 2018, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
from .kern import Kern
|
||||||
|
import numpy as np
|
||||||
|
from paramz.caching import Cache_this
|
||||||
|
|
||||||
|
class DiffKern(Kern):
|
||||||
|
"""
|
||||||
|
Diff kernel is a thin wrapper for using partial derivatives of kernels as kernels. Eg. in combination with
|
||||||
|
Multioutput kernel this allows the user to train GPs with observations of latent function and latent
|
||||||
|
function derivatives. NOTE: DiffKern only works when used with Multioutput kernel. Do not use the kernel as standalone
|
||||||
|
|
||||||
|
The parameters the kernel needs are:
|
||||||
|
-'base_kern': a member of Kernel class that is used for observations
|
||||||
|
-'dimension': integer that indigates in which dimensions the partial derivative observations are
|
||||||
|
"""
|
||||||
|
def __init__(self, base_kern, dimension):
|
||||||
|
super(DiffKern, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='DiffKern')
|
||||||
|
self.base_kern = base_kern
|
||||||
|
self.dimension = dimension
|
||||||
|
|
||||||
|
def parameters_changed(self):
|
||||||
|
self.base_kern.parameters_changed()
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def K(self, X, X2=None, dimX2 = None): #X in dimension self.dimension
|
||||||
|
if X2 is None:
|
||||||
|
X2 = X
|
||||||
|
if dimX2 is None:
|
||||||
|
dimX2 = self.dimension
|
||||||
|
return self.base_kern.dK2_dXdX2(X,X2, self.dimension, dimX2)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def Kdiag(self, X):
|
||||||
|
return np.diag(self.base_kern.dK2_dXdX2(X,X, self.dimension, self.dimension))
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK_dX_wrap(self, X, X2): #X in dimension self.dimension
|
||||||
|
return self.base_kern.dK_dX(X,X2, self.dimension)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK_dX2_wrap(self, X, X2): #X in dimension self.dimension
|
||||||
|
return self.base_kern.dK_dX2(X,X2, self.dimension)
|
||||||
|
|
||||||
|
def reset_gradients(self):
|
||||||
|
self.base_kern.reset_gradients()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def gradient(self):
|
||||||
|
return self.base_kern.gradient
|
||||||
|
|
||||||
|
@gradient.setter
|
||||||
|
def gradient(self, gradient):
|
||||||
|
self.base_kern.gradient = gradient
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None, dimX2=None):
|
||||||
|
if dimX2 is None:
|
||||||
|
dimX2 = self.dimension
|
||||||
|
gradients = self.base_kern.dgradients2_dXdX2(X,X2,self.dimension,dimX2)
|
||||||
|
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dK_diag, X):
|
||||||
|
gradients = self.base_kern.dgradients2_dXdX2(X,X, self.dimension, self.dimension)
|
||||||
|
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK_diag, gradient, f=np.diag) for gradient in gradients])
|
||||||
|
|
||||||
|
def update_gradients_dK_dX(self, dL_dK, X, X2=None):
|
||||||
|
if X2 is None:
|
||||||
|
X2 = X
|
||||||
|
gradients = self.base_kern.dgradients_dX(X,X2, self.dimension)
|
||||||
|
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
|
||||||
|
|
||||||
|
def update_gradients_dK_dX2(self, dL_dK, X, X2=None):
|
||||||
|
gradients = self.base_kern.dgradients_dX2(X,X2, self.dimension)
|
||||||
|
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
|
||||||
|
|
||||||
|
def gradients_X(self, dL_dK, X, X2):
|
||||||
|
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:, self.dimension]
|
||||||
|
return np.sum(tmp, axis=1)
|
||||||
|
|
||||||
|
def gradients_X2(self, dL_dK, X, X2):
|
||||||
|
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:, :, self.dimension, :]
|
||||||
|
return np.sum(tmp, axis=1)
|
||||||
|
|
||||||
|
def _convert_gradients(self, l,g, f = lambda x:x):
|
||||||
|
if type(g) is np.ndarray:
|
||||||
|
return np.sum(f(l)*f(g))
|
||||||
|
else:
|
||||||
|
return np.array([np.sum(f(l)*f(gi)) for gi in g])
|
||||||
|
|
@ -5,34 +5,8 @@
|
||||||
from .kern import CombinationKernel
|
from .kern import CombinationKernel
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import itertools
|
import itertools
|
||||||
|
from ...util.multioutput import index_to_slices
|
||||||
|
|
||||||
def index_to_slices(index):
|
|
||||||
"""
|
|
||||||
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
|
|
||||||
|
|
||||||
e.g.
|
|
||||||
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
|
|
||||||
returns
|
|
||||||
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
|
|
||||||
|
|
||||||
or, a more complicated example
|
|
||||||
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
|
|
||||||
returns
|
|
||||||
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
|
|
||||||
"""
|
|
||||||
if len(index)==0:
|
|
||||||
return[]
|
|
||||||
|
|
||||||
#contruct the return structure
|
|
||||||
ind = np.asarray(index,dtype=np.int)
|
|
||||||
ret = [[] for i in range(ind.max()+1)]
|
|
||||||
|
|
||||||
#find the switchpoints
|
|
||||||
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
|
|
||||||
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
|
|
||||||
|
|
||||||
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
|
|
||||||
return ret
|
|
||||||
|
|
||||||
class IndependentOutputs(CombinationKernel):
|
class IndependentOutputs(CombinationKernel):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -116,6 +116,12 @@ class Kern(Parameterized):
|
||||||
except:
|
except:
|
||||||
return X[:, self._all_dims_active]
|
return X[:, self._all_dims_active]
|
||||||
|
|
||||||
|
def _project_dim(self, dim):
|
||||||
|
try:
|
||||||
|
return np.where(self._all_dims_active == dim)[0][0]
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
|
||||||
def K(self, X, X2):
|
def K(self, X, X2):
|
||||||
"""
|
"""
|
||||||
Compute the kernel function.
|
Compute the kernel function.
|
||||||
|
|
|
||||||
|
|
@ -19,16 +19,26 @@ def put_clean(dct, name, func):
|
||||||
class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
||||||
def __new__(cls, name, bases, dct):
|
def __new__(cls, name, bases, dct):
|
||||||
put_clean(dct, 'K', _slice_K)
|
put_clean(dct, 'K', _slice_K)
|
||||||
|
put_clean(dct, 'dK_dX', _slice_dK_dX)
|
||||||
|
put_clean(dct, 'dK_dX2', _slice_dK_dX)
|
||||||
|
put_clean(dct, 'dK2_dXdX2', _slice_dK2_dXdX2)
|
||||||
put_clean(dct, 'Kdiag', _slice_Kdiag)
|
put_clean(dct, 'Kdiag', _slice_Kdiag)
|
||||||
put_clean(dct, 'phi', _slice_Kdiag)
|
put_clean(dct, 'phi', _slice_Kdiag)
|
||||||
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
|
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
|
||||||
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
|
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
|
||||||
|
put_clean(dct, 'update_gradients_dK_dX', _slice_update_gradients_full)
|
||||||
|
put_clean(dct, 'update_gradients_dK_dX2', _slice_update_gradients_full)
|
||||||
put_clean(dct, 'gradients_X', _slice_gradients_X)
|
put_clean(dct, 'gradients_X', _slice_gradients_X)
|
||||||
|
put_clean(dct, 'gradients_X2', _slice_gradients_X)
|
||||||
put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
|
put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
|
||||||
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
|
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
|
||||||
put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag)
|
put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag)
|
||||||
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
|
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
|
||||||
|
|
||||||
|
put_clean(dct, 'dgradients_dX',_slice_partial_gradients_list_X)
|
||||||
|
put_clean(dct, 'dgradients_dX2',_slice_partial_gradients_list_X)
|
||||||
|
put_clean(dct, 'dgradients2_dXdX2',_slice_partial_gradients_list_XX)
|
||||||
|
|
||||||
put_clean(dct, 'psi0', _slice_psi)
|
put_clean(dct, 'psi0', _slice_psi)
|
||||||
put_clean(dct, 'psi1', _slice_psi)
|
put_clean(dct, 'psi1', _slice_psi)
|
||||||
put_clean(dct, 'psi2', _slice_psi)
|
put_clean(dct, 'psi2', _slice_psi)
|
||||||
|
|
@ -79,6 +89,20 @@ class _Slice_wrap(object):
|
||||||
return ret
|
return ret
|
||||||
return return_val
|
return return_val
|
||||||
|
|
||||||
|
def handle_return_list(self, return_val):
|
||||||
|
if self.ret:
|
||||||
|
ret = [np.zeros(self.shape) for _ in range(len(return_val))]
|
||||||
|
if len(self.shape) == 3:
|
||||||
|
for i in range(len(return_val)):
|
||||||
|
ret[i][self.k._all_dims_active, :, :] = return_val[i]
|
||||||
|
#[ret[i].__setitem__((:, :, self.k._all_dims_active), return_val[i]) for i in range(len(return_val))]
|
||||||
|
elif len(self.shape) == 4:
|
||||||
|
for i in range(len(return_val)):
|
||||||
|
ret[i][np.ix_(self.k._all_dims_active, self.k._all_dims_active)] = return_val[i]
|
||||||
|
#[ret[i].__setitem__((:, :, self.k._all_dims_active, self.k._all_dims_active), return_val[i]) for i in range(len(return_val))]
|
||||||
|
return ret
|
||||||
|
return return_val
|
||||||
|
|
||||||
def _slice_K(f):
|
def _slice_K(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, X, X2 = None, *a, **kw):
|
def wrap(self, X, X2 = None, *a, **kw):
|
||||||
|
|
@ -97,15 +121,15 @@ def _slice_Kdiag(f):
|
||||||
|
|
||||||
def _slice_update_gradients_full(f):
|
def _slice_update_gradients_full(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, dL_dK, X, X2=None):
|
def wrap(self, dL_dK, X, X2=None, *a, **kw):
|
||||||
with _Slice_wrap(self, X, X2) as s:
|
with _Slice_wrap(self, X, X2) as s:
|
||||||
ret = f(self, dL_dK, s.X, s.X2)
|
ret = f(self, dL_dK, s.X, s.X2, *a, **kw)
|
||||||
return ret
|
return ret
|
||||||
return wrap
|
return wrap
|
||||||
|
|
||||||
def _slice_update_gradients_diag(f):
|
def _slice_update_gradients_diag(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, dL_dKdiag, X):
|
def wrap(self, dL_dKdiag, X, *a, **kw):
|
||||||
with _Slice_wrap(self, X, None) as s:
|
with _Slice_wrap(self, X, None) as s:
|
||||||
ret = f(self, dL_dKdiag, s.X)
|
ret = f(self, dL_dKdiag, s.X)
|
||||||
return ret
|
return ret
|
||||||
|
|
@ -119,6 +143,99 @@ def _slice_gradients_X(f):
|
||||||
return ret
|
return ret
|
||||||
return wrap
|
return wrap
|
||||||
|
|
||||||
|
def _slice_dK_dX(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2, dim, *a, **kw):
|
||||||
|
with _Slice_wrap(self, X, X2) as s:
|
||||||
|
d = s.k._project_dim(dim)
|
||||||
|
if d is None:
|
||||||
|
ret = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
else:
|
||||||
|
ret = f(self, s.X, s.X2, d, *a, **kw)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_dK2_dXdX2(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2, dimX, dimX2, *a, **kw):
|
||||||
|
with _Slice_wrap(self, X, X2) as s:
|
||||||
|
d = s.k._project_dim(dimX)
|
||||||
|
d2 = s.k._project_dim(dimX2)
|
||||||
|
if (d is None) or (d2 is None):
|
||||||
|
ret = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
else:
|
||||||
|
ret = f(self, s.X, s.X2, d, d2, *a, **kw)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_partial_gradients_X(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2, dim):
|
||||||
|
if X2 is None:
|
||||||
|
N, M = X.shape[0], X.shape[0]
|
||||||
|
else:
|
||||||
|
N, M = X.shape[0], X2.shape[0]
|
||||||
|
Q1 = X.shape[1]
|
||||||
|
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1)) as s:
|
||||||
|
ret = s.handle_return_array(f(self, s.X, s.X2, dim))
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_partial_gradients_list_X(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2, dim):
|
||||||
|
if X2 is None:
|
||||||
|
N, M = X.shape[0], X.shape[0]
|
||||||
|
else:
|
||||||
|
N, M = X.shape[0], X2.shape[0]
|
||||||
|
with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s:
|
||||||
|
d = s.k._project_dim(dim)
|
||||||
|
if d is None:
|
||||||
|
ret = [np.zeros((N, M)) for i in range(s.k.size)]
|
||||||
|
else:
|
||||||
|
ret = f(self, s.X, s.X2, d)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_partial_gradients_XX(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2, dim, dimX2):
|
||||||
|
if X2 is None:
|
||||||
|
N, M = X.shape[0], X.shape[0]
|
||||||
|
Q1 = X.shape[1]
|
||||||
|
else:
|
||||||
|
N, M = X.shape[0], X2.shape[0]
|
||||||
|
Q1, Q2 = X.shape[1], X2.shape[1]
|
||||||
|
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s:
|
||||||
|
ret = s.handle_return_array(f(self, s.X, s.X2, dim, dimX2))
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_partial_gradients_list_XX(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, X, X2, dimX, dimX2):
|
||||||
|
if X2 is None:
|
||||||
|
N, M = X.shape[0], X.shape[0]
|
||||||
|
else:
|
||||||
|
N, M = X.shape[0], X2.shape[0]
|
||||||
|
with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s:
|
||||||
|
d = s.k._project_dim(dimX)
|
||||||
|
d2 = s.k._project_dim(dimX2)
|
||||||
|
if (d is None) or (d2 is None):
|
||||||
|
ret = [np.zeros((N, M)) for i in range(s.k.size)]
|
||||||
|
else:
|
||||||
|
ret = f(self, s.X, s.X2, d, d2)
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
|
def _slice_gradient_derivatives_X(f):
|
||||||
|
@wraps(f)
|
||||||
|
def wrap(self, dL_dK, X, X2=None):
|
||||||
|
with _Slice_wrap(self, X, X2) as s:
|
||||||
|
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
|
||||||
|
return ret
|
||||||
|
return wrap
|
||||||
|
|
||||||
def _slice_gradients_X_diag(f):
|
def _slice_gradients_X_diag(f):
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def wrap(self, dL_dKdiag, X):
|
def wrap(self, dL_dKdiag, X):
|
||||||
|
|
|
||||||
84
GPy/kern/src/multioutput_derivative_kern.py
Normal file
84
GPy/kern/src/multioutput_derivative_kern.py
Normal file
|
|
@ -0,0 +1,84 @@
|
||||||
|
# Copyright (c) 2018, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
from .kern import Kern, CombinationKernel
|
||||||
|
from .multioutput_kern import MultioutputKern, ZeroKern
|
||||||
|
import numpy as np
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
class KernWrapper(Kern):
|
||||||
|
def __init__(self, fk, fug, fg, base_kern):
|
||||||
|
self.fk = fk
|
||||||
|
self.fug = fug
|
||||||
|
self.fg = fg
|
||||||
|
self.base_kern = base_kern
|
||||||
|
super(KernWrapper, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='KernWrapper',useGPU=False)
|
||||||
|
|
||||||
|
def K(self, X, X2=None):
|
||||||
|
return self.fk(X,X2=X2)
|
||||||
|
|
||||||
|
def update_gradients_full(self,dL_dK, X, X2=None):
|
||||||
|
return self.fug(dL_dK, X, X2=X2)
|
||||||
|
|
||||||
|
def gradients_X(self,dL_dK, X, X2=None):
|
||||||
|
return self.fg(dL_dK, X, X2=X2)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def gradient(self):
|
||||||
|
return self.base_kern.gradient
|
||||||
|
|
||||||
|
@gradient.setter
|
||||||
|
def gradient(self, gradient):
|
||||||
|
self.base_kern.gradient = gradient
|
||||||
|
|
||||||
|
class MultioutputDerivativeKern(MultioutputKern):
|
||||||
|
"""
|
||||||
|
Multioutput derivative kernel is a meta class for combining different kernels for multioutput GPs.
|
||||||
|
Multioutput derivative kernel is only a thin wrapper for Multioutput kernel for user not having to define
|
||||||
|
cross covariances.
|
||||||
|
"""
|
||||||
|
def __init__(self, kernels, cross_covariances={}, name='MultioutputDerivativeKern'):
|
||||||
|
#kernels contains a list of kernels as input,
|
||||||
|
if not isinstance(kernels, list):
|
||||||
|
self.single_kern = True
|
||||||
|
self.kern = kernels
|
||||||
|
kernels = [kernels]
|
||||||
|
else:
|
||||||
|
self.single_kern = False
|
||||||
|
self.kern = kernels
|
||||||
|
# The combination kernel ALLWAYS puts the extra dimension last.
|
||||||
|
# Thus, the index dimension of this kernel is always the last dimension
|
||||||
|
# after slicing. This is why the index_dim is just the last column:
|
||||||
|
self.index_dim = -1
|
||||||
|
super(MultioutputKern, self).__init__(kernels=kernels, extra_dims=[self.index_dim], name=name, link_parameters=False)
|
||||||
|
|
||||||
|
nl = len(kernels)
|
||||||
|
|
||||||
|
#build covariance structure
|
||||||
|
covariance = [[None for i in range(nl)] for j in range(nl)]
|
||||||
|
linked = []
|
||||||
|
for i in range(0,nl):
|
||||||
|
unique=True
|
||||||
|
for j in range(0,nl):
|
||||||
|
if i==j or (kernels[i] is kernels[j]):
|
||||||
|
kern = kernels[i]
|
||||||
|
if i>j:
|
||||||
|
unique=False
|
||||||
|
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
|
||||||
|
kern = cross_covariances.get((i,j))
|
||||||
|
elif kernels[i].name == 'DiffKern' and kernels[i].base_kern == kernels[j]: # one is derivative of other
|
||||||
|
kern = KernWrapper(kernels[i].dK_dX_wrap,kernels[i].update_gradients_dK_dX,kernels[i].gradients_X, kernels[j])
|
||||||
|
unique=False
|
||||||
|
elif kernels[j].name == 'DiffKern' and kernels[j].base_kern == kernels[i]: # one is derivative of other
|
||||||
|
kern = KernWrapper(kernels[j].dK_dX2_wrap,kernels[j].update_gradients_dK_dX2,kernels[j].gradients_X2, kernels[i])
|
||||||
|
elif kernels[i].name == 'DiffKern' and kernels[j].name == 'DiffKern' and kernels[i].base_kern == kernels[j].base_kern: #both are partial derivatives
|
||||||
|
kern = KernWrapper(partial(kernels[i].K, dimX2=kernels[j].dimension), partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension),None, kernels[i].base_kern)
|
||||||
|
if i>j:
|
||||||
|
unique=False
|
||||||
|
else:
|
||||||
|
kern = ZeroKern()
|
||||||
|
covariance[i][j] = kern
|
||||||
|
if unique is True:
|
||||||
|
linked.append(i)
|
||||||
|
self.covariance = covariance
|
||||||
|
self.link_parameters(*[kernels[i] for i in linked])
|
||||||
|
|
@ -1,7 +1,10 @@
|
||||||
|
# Copyright (c) 2018, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
from .kern import Kern, CombinationKernel
|
from .kern import Kern, CombinationKernel
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from functools import reduce, partial
|
from functools import reduce, partial
|
||||||
from .independent_outputs import index_to_slices
|
from ...util.multioutput import index_to_slices
|
||||||
from paramz.caching import Cache_this
|
from paramz.caching import Cache_this
|
||||||
|
|
||||||
class ZeroKern(Kern):
|
class ZeroKern(Kern):
|
||||||
|
|
@ -18,6 +21,13 @@ class ZeroKern(Kern):
|
||||||
|
|
||||||
def gradients_X(self,dL_dK, X, X2=None):
|
def gradients_X(self,dL_dK, X, X2=None):
|
||||||
return np.zeros((X.shape[0],X.shape[1]))
|
return np.zeros((X.shape[0],X.shape[1]))
|
||||||
|
@property
|
||||||
|
def gradient(self):
|
||||||
|
return np.empty((1,0))
|
||||||
|
|
||||||
|
@gradient.setter
|
||||||
|
def gradient(self, gradient):
|
||||||
|
pass
|
||||||
|
|
||||||
class MultioutputKern(CombinationKernel):
|
class MultioutputKern(CombinationKernel):
|
||||||
"""
|
"""
|
||||||
|
|
@ -62,14 +72,13 @@ class MultioutputKern(CombinationKernel):
|
||||||
unique=True
|
unique=True
|
||||||
for j in range(0,nl):
|
for j in range(0,nl):
|
||||||
if i==j or (kernels[i] is kernels[j]):
|
if i==j or (kernels[i] is kernels[j]):
|
||||||
covariance[i][j] = {'kern': kernels[i], 'K': kernels[i].K, 'update_gradients_full': kernels[i].update_gradients_full, 'gradients_X': kernels[i].gradients_X}
|
covariance[i][j] = kernels[i]
|
||||||
if i>j:
|
if i>j:
|
||||||
unique=False
|
unique=False
|
||||||
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
|
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
|
||||||
covariance[i][j] = cross_covariances.get((i,j))
|
covariance[i][j] = cross_covariances.get((i,j))
|
||||||
else: # zero covariance structure
|
else: # zero covariance structure
|
||||||
kern = ZeroKern()
|
covariance[i][j] = ZeroKern()
|
||||||
covariance[i][j] = {'kern': kern, 'K': kern.K, 'update_gradients_full': kern.update_gradients_full, 'gradients_X': kern.gradients_X}
|
|
||||||
if unique is True:
|
if unique is True:
|
||||||
linked.append(i)
|
linked.append(i)
|
||||||
self.covariance = covariance
|
self.covariance = covariance
|
||||||
|
|
@ -82,7 +91,7 @@ class MultioutputKern(CombinationKernel):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
[[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j]['K'](X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))]
|
[[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j].K(X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
|
@ -93,10 +102,10 @@ class MultioutputKern(CombinationKernel):
|
||||||
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def _update_gradients_full_wrapper(self, cov_struct, dL_dK, X, X2):
|
def _update_gradients_full_wrapper(self, kern, dL_dK, X, X2):
|
||||||
gradient = cov_struct['kern'].gradient.copy()
|
gradient = kern.gradient.copy()
|
||||||
cov_struct['update_gradients_full'](dL_dK, X, X2)
|
kern.update_gradients_full(dL_dK, X, X2)
|
||||||
cov_struct['kern'].gradient += gradient
|
kern.gradient += gradient
|
||||||
|
|
||||||
def _update_gradients_diag_wrapper(self, kern, dL_dKdiag, X):
|
def _update_gradients_diag_wrapper(self, kern, dL_dKdiag, X):
|
||||||
gradient = kern.gradient.copy()
|
gradient = kern.gradient.copy()
|
||||||
|
|
@ -118,14 +127,14 @@ class MultioutputKern(CombinationKernel):
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
self.reset_gradients()
|
self.reset_gradients()
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
[[ self._update_gradients_diag_wrapper(self.covariance[i][i]['kern'], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))]
|
[[ self._update_gradients_diag_wrapper(self.covariance[i][i], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))]
|
||||||
|
|
||||||
def gradients_X(self,dL_dK, X, X2=None):
|
def gradients_X(self,dL_dK, X, X2=None):
|
||||||
slices = index_to_slices(X[:,self.index_dim])
|
slices = index_to_slices(X[:,self.index_dim])
|
||||||
target = np.zeros((X.shape[0], X.shape[1]) )
|
target = np.zeros((X.shape[0], X.shape[1]) )
|
||||||
if X2 is not None:
|
if X2 is not None:
|
||||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||||
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
|
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
|
||||||
else:
|
else:
|
||||||
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
|
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
|
||||||
return target
|
return target
|
||||||
|
|
@ -6,6 +6,7 @@ import numpy as np
|
||||||
from .stationary import Stationary
|
from .stationary import Stationary
|
||||||
from .psi_comp import PSICOMP_RBF, PSICOMP_RBF_GPU
|
from .psi_comp import PSICOMP_RBF, PSICOMP_RBF_GPU
|
||||||
from ...core import Param
|
from ...core import Param
|
||||||
|
from paramz.caching import Cache_this
|
||||||
from paramz.transformations import Logexp
|
from paramz.transformations import Logexp
|
||||||
from .grid_kerns import GridRBF
|
from .grid_kerns import GridRBF
|
||||||
|
|
||||||
|
|
@ -50,6 +51,27 @@ class RBF(Stationary):
|
||||||
def K_of_r(self, r):
|
def K_of_r(self, r):
|
||||||
return self.variance * np.exp(-0.5 * r**2)
|
return self.variance * np.exp(-0.5 * r**2)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK_dX(self, X, X2, dimX):
|
||||||
|
r = self._scaled_dist(X, X2)
|
||||||
|
K = self.K_of_r(r)
|
||||||
|
dist = X[:,None,dimX]-X2[None,:,dimX]
|
||||||
|
lengthscale2inv = (np.ones((X.shape[1]))/(self.lengthscale**2))[dimX]
|
||||||
|
return -1.*K*dist*lengthscale2inv
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK_dX2(self, X, X2, dimX2):
|
||||||
|
return -self.dK_dX(X,X2, dimX2)
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK2_dXdX2(self, X, X2, dimX, dimX2):
|
||||||
|
r = self._scaled_dist(X, X2)
|
||||||
|
K = self.K_of_r(r)
|
||||||
|
if X2 is None:
|
||||||
|
X2=X
|
||||||
|
dist = X[:,None,:]-X2[None,:,:]
|
||||||
|
lengthscale2inv = np.ones((X.shape[1]))/(self.lengthscale**2)
|
||||||
|
return -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*lengthscale2inv[dimX2] + (dimX==dimX2)*K*lengthscale2inv[dimX]
|
||||||
|
|
||||||
def dK_dr(self, r):
|
def dK_dr(self, r):
|
||||||
return -r*self.K_of_r(r)
|
return -r*self.K_of_r(r)
|
||||||
|
|
||||||
|
|
@ -59,6 +81,74 @@ class RBF(Stationary):
|
||||||
def dK2_drdr_diag(self):
|
def dK2_drdr_diag(self):
|
||||||
return -self.variance # as the diagonal of r is always filled with zeros
|
return -self.variance # as the diagonal of r is always filled with zeros
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK_dvariance(self,X,X2):
|
||||||
|
return self.K(X,X2)/self.variance
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK2_dvariancedX(self, X, X2, dim):
|
||||||
|
return self.dK_dX(X,X2, dim)/self.variance
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK2_dvariancedX2(self, X, X2, dim):
|
||||||
|
return self.dK_dX2(X,X2, dim)/self.variance
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK3_dvariancedXdX2(self, X, X2, dim, dimX2):
|
||||||
|
return self.dK2_dXdX2(X, X2, dim, dimX2)/self.variance
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK2_dlengthscaledX(self, X, X2, dimX):
|
||||||
|
r = self._scaled_dist(X, X2)
|
||||||
|
K = self.K_of_r(r)
|
||||||
|
if X2 is None:
|
||||||
|
X2=X
|
||||||
|
dist = X[:,None,:]-X2[None,:,:]
|
||||||
|
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale)
|
||||||
|
if self.ARD:
|
||||||
|
g = []
|
||||||
|
for diml in range(X.shape[1]):
|
||||||
|
g += [-1.*K*dist[:,:,dimX]*(dist[:,:,diml]**2)*(lengthscaleinv[dimX]**2)*(lengthscaleinv[diml]**3) + 2.*dist[:,:,dimX]*(lengthscaleinv[diml]**3)*K*(dimX == diml)]
|
||||||
|
else:
|
||||||
|
g = -1.*K*dist[:,:,dimX]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**5) + 2.*dist[:,:,dimX]*(lengthscaleinv[dimX]**3)*K
|
||||||
|
return g
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK2_dlengthscaledX2(self, X, X2, dimX2):
|
||||||
|
tmp = self.dK2_dlengthscaledX(X, X2, dimX2)
|
||||||
|
if self.ARD:
|
||||||
|
return [-1.*g for g in tmp]
|
||||||
|
else:
|
||||||
|
return -1*tmp
|
||||||
|
|
||||||
|
@Cache_this(limit=3, ignore_args=())
|
||||||
|
def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2):
|
||||||
|
r = self._scaled_dist(X, X2)
|
||||||
|
K = self.K_of_r(r)
|
||||||
|
if X2 is None:
|
||||||
|
X2=X
|
||||||
|
dist = X[:,None,:]-X2[None,:,:]
|
||||||
|
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale)
|
||||||
|
lengthscale2inv = lengthscaleinv**2
|
||||||
|
if self.ARD:
|
||||||
|
g = []
|
||||||
|
for diml in range(X.shape[1]):
|
||||||
|
tmp = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*(dist[:,:,diml]**2)*lengthscale2inv[dimX]*lengthscale2inv[dimX2]*(lengthscaleinv[diml]**3)
|
||||||
|
if dimX == dimX2:
|
||||||
|
tmp += K*lengthscale2inv[dimX]*(lengthscaleinv[diml]**3)*(dist[:,:,diml]**2)
|
||||||
|
if diml == dimX:
|
||||||
|
tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX2]*(lengthscaleinv[dimX]**3)
|
||||||
|
if diml == dimX2:
|
||||||
|
tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*(lengthscaleinv[dimX2]**3)
|
||||||
|
if dimX == dimX2:
|
||||||
|
tmp += -2.*K*(lengthscaleinv[dimX]**3)
|
||||||
|
g += [tmp]
|
||||||
|
else:
|
||||||
|
g = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**7) +4*K*dist[:,:,dimX]*dist[:,:,dimX2]*(lengthscaleinv[dimX]**5)
|
||||||
|
if dimX == dimX2:
|
||||||
|
g += -2.*K*(lengthscaleinv[dimX]**3) + K*(lengthscaleinv[dimX]**5)*np.sum(dist**2, axis=2)
|
||||||
|
return g
|
||||||
|
|
||||||
def __getstate__(self):
|
def __getstate__(self):
|
||||||
dc = super(RBF, self).__getstate__()
|
dc = super(RBF, self).__getstate__()
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ A new kernel
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .kern import Kern, CombinationKernel
|
from .kern import Kern, CombinationKernel
|
||||||
from .independent_outputs import index_to_slices
|
from ...util.multioutput import index_to_slices
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
class DEtime(Kern):
|
class DEtime(Kern):
|
||||||
|
|
|
||||||
|
|
@ -212,7 +212,6 @@ class Stationary(Kern):
|
||||||
r = self._scaled_dist(X, X2)
|
r = self._scaled_dist(X, X2)
|
||||||
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
|
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
|
||||||
|
|
||||||
|
|
||||||
def update_gradients_direct(self, dL_dVar, dL_dLen):
|
def update_gradients_direct(self, dL_dVar, dL_dLen):
|
||||||
"""
|
"""
|
||||||
Specially intended for the Grid regression case.
|
Specially intended for the Grid regression case.
|
||||||
|
|
@ -308,6 +307,21 @@ class Stationary(Kern):
|
||||||
return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
|
return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
|
||||||
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
|
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
|
||||||
|
|
||||||
|
def dgradients_dX(self, X, X2, dimX):
|
||||||
|
g1 = self.dK2_dvariancedX(X, X2, dimX)
|
||||||
|
g2 = self.dK2_dlengthscaledX(X, X2, dimX)
|
||||||
|
return [g1, g2]
|
||||||
|
|
||||||
|
def dgradients_dX2(self, X, X2, dimX2):
|
||||||
|
g1 = self.dK2_dvariancedX2(X, X2, dimX2)
|
||||||
|
g2 = self.dK2_dlengthscaledX2(X, X2, dimX2)
|
||||||
|
return [g1, g2]
|
||||||
|
|
||||||
|
def dgradients2_dXdX2(self, X, X2, dimX, dimX2):
|
||||||
|
g1 = self.dK3_dvariancedXdX2(X, X2, dimX, dimX2)
|
||||||
|
g2 = self.dK3_dlengthscaledXdX2(X, X2, dimX, dimX2)
|
||||||
|
return [g1, g2]
|
||||||
|
|
||||||
def _gradients_X_pure(self, dL_dK, X, X2=None):
|
def _gradients_X_pure(self, dL_dK, X, X2=None):
|
||||||
invdist = self._inv_dist(X, X2)
|
invdist = self._inv_dist(X, X2)
|
||||||
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,6 @@ class Binomial(Likelihood):
|
||||||
np.testing.assert_array_equal(N.shape, y.shape)
|
np.testing.assert_array_equal(N.shape, y.shape)
|
||||||
|
|
||||||
nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1)
|
nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1)
|
||||||
|
|
||||||
Ny = N-y
|
Ny = N-y
|
||||||
t1 = np.zeros(y.shape)
|
t1 = np.zeros(y.shape)
|
||||||
t2 = np.zeros(y.shape)
|
t2 = np.zeros(y.shape)
|
||||||
|
|
@ -177,6 +176,41 @@ class Binomial(Likelihood):
|
||||||
|
|
||||||
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
|
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def moments_match_ep(self,obs,tau,v,Y_metadata_i=None):
|
||||||
|
"""
|
||||||
|
Calculation of moments using quadrature
|
||||||
|
:param obs: observed output
|
||||||
|
:param tau: cavity distribution 1st natural parameter (precision)
|
||||||
|
:param v: cavity distribution 2nd natural paramenter (mu*precision)
|
||||||
|
"""
|
||||||
|
#Compute first integral for zeroth moment.
|
||||||
|
#NOTE constant np.sqrt(2*pi/tau) added at the end of the function
|
||||||
|
if (isinstance(self.gp_link, link_functions.Probit) or isinstance(self.gp_link, link_functions.ScaledProbit)) and (Y_metadata_i is None or int(Y_metadata_i.get('trials', 1)) == int(1)): #Special case for probit likelihood. Can be found from Riihimaki et Vehtari 2010
|
||||||
|
if isinstance(self.gp_link, link_functions.ScaledProbit):
|
||||||
|
nu = self.gp_link.nu
|
||||||
|
else:
|
||||||
|
nu = 1.0
|
||||||
|
nu = self.gp_link.nu
|
||||||
|
mu = v/tau
|
||||||
|
sigma2 = 1./tau
|
||||||
|
t = np.asarray(1 + sigma2*(nu**2))
|
||||||
|
t[t<1e-20] = 1e-20
|
||||||
|
a = np.sqrt(t)
|
||||||
|
z = obs*mu/a
|
||||||
|
normc_z = max(self.gp_link.transf(z), 1e-20)
|
||||||
|
m0 = normc_z
|
||||||
|
normp_z = self.gp_link.dtransf_df(z)
|
||||||
|
m1 = mu + (obs*sigma2*normp_z)/(normc_z*a)
|
||||||
|
#print('tau: {}, v: {}, nu: {}, z: {}, normc_z: {}, normp_z: {}'.format(tau, v, nu.values, z, normc_z, normp_z))
|
||||||
|
m2 = sigma2 - ((sigma2**2)*normp_z)/((1./(nu**2)+sigma2)*normc_z)*(z + normp_z/(nu**2)/normc_z)
|
||||||
|
#print("m0: {}, m1: {}, m2: {}".format(m0,m1,m2))
|
||||||
|
#m0a, m1a, m2a = super(Binomial, self).moments_match_ep(obs,tau,v,Y_metadata_i)
|
||||||
|
#print("m0a: {}, m1a: {}, m2a: {}".format(m0a,m1a,m2a))
|
||||||
|
return m0, m1, m2
|
||||||
|
else:
|
||||||
|
return super(Binomial, self).moments_match_ep(obs,tau,v,Y_metadata_i)
|
||||||
|
|
||||||
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
|
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
|
||||||
if isinstance(self.gp_link, link_functions.Probit):
|
if isinstance(self.gp_link, link_functions.Probit):
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -138,6 +138,38 @@ class Probit(GPTransformation):
|
||||||
input_dict["class"] = "GPy.likelihoods.link_functions.Probit"
|
input_dict["class"] = "GPy.likelihoods.link_functions.Probit"
|
||||||
return input_dict
|
return input_dict
|
||||||
|
|
||||||
|
class ScaledProbit(Probit):
|
||||||
|
"""
|
||||||
|
.. math::
|
||||||
|
g(f) = \\Phi^{-1} (nu*mu)
|
||||||
|
"""
|
||||||
|
def __init__(self, nu=1.):
|
||||||
|
self.nu = float(nu)
|
||||||
|
|
||||||
|
def transf(self,f):
|
||||||
|
return std_norm_cdf(f*self.nu)
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
return std_norm_pdf(f*self.nu)*self.nu
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
return -(f*self.nu) * std_norm_pdf(f*self.nu)*(self.nu**2)
|
||||||
|
|
||||||
|
def d3transf_df3(self,f):
|
||||||
|
return (safe_square(f*self.nu)-1.)*std_norm_pdf(f*self.nu)*(self.nu**3)
|
||||||
|
|
||||||
|
def to_dict(self):
|
||||||
|
"""
|
||||||
|
Convert the object into a json serializable dictionary.
|
||||||
|
|
||||||
|
Note: It uses the private method _save_to_input_dict of the parent.
|
||||||
|
|
||||||
|
:return dict: json serializable dictionary containing the needed information to instantiate the object
|
||||||
|
"""
|
||||||
|
|
||||||
|
input_dict = super(ScaledProbit, self)._save_to_input_dict()
|
||||||
|
input_dict["class"] = "GPy.likelihoods.link_functions.ScaledProbit"
|
||||||
|
return input_dict
|
||||||
|
|
||||||
class Cloglog(GPTransformation):
|
class Cloglog(GPTransformation):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ from .gaussian import Gaussian
|
||||||
from ..core.parameterization import Param
|
from ..core.parameterization import Param
|
||||||
from paramz.transformations import Logexp
|
from paramz.transformations import Logexp
|
||||||
from ..core.parameterization import Parameterized
|
from ..core.parameterization import Parameterized
|
||||||
from ..kern.src.independent_outputs import index_to_slices
|
from ..util.multioutput import index_to_slices
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
class MultioutputLikelihood(MixedNoise):
|
class MultioutputLikelihood(MixedNoise):
|
||||||
|
|
|
||||||
|
|
@ -30,3 +30,4 @@ from .gp_grid_regression import GPRegressionGrid
|
||||||
from .gp_multiout_regression import GPMultioutRegression
|
from .gp_multiout_regression import GPMultioutRegression
|
||||||
from .gp_multiout_regression_md import GPMultioutRegressionMD
|
from .gp_multiout_regression_md import GPMultioutRegressionMD
|
||||||
from .tp_regression import TPRegression
|
from .tp_regression import TPRegression
|
||||||
|
from .multioutput_gp import MultioutputGP
|
||||||
|
|
|
||||||
147
GPy/models/multioutput_gp.py
Normal file
147
GPy/models/multioutput_gp.py
Normal file
|
|
@ -0,0 +1,147 @@
|
||||||
|
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import itertools
|
||||||
|
from ..core.model import Model
|
||||||
|
from ..core.parameterization.variational import VariationalPosterior
|
||||||
|
from ..core.mapping import Mapping
|
||||||
|
from .. import likelihoods
|
||||||
|
from ..likelihoods.gaussian import Gaussian
|
||||||
|
from .. import kern
|
||||||
|
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
|
||||||
|
from ..util.normalizer import Standardize
|
||||||
|
from .. import util
|
||||||
|
from paramz import ObsAr
|
||||||
|
from ..core.gp import GP
|
||||||
|
|
||||||
|
from ..util.multioutput import index_to_slices
|
||||||
|
import logging
|
||||||
|
import warnings
|
||||||
|
logger = logging.getLogger("GP")
|
||||||
|
|
||||||
|
class MultioutputGP(GP):
|
||||||
|
"""
|
||||||
|
Gaussian process model for using observations from multiple likelihoods and different kernels
|
||||||
|
:param X_list: input observations in a list for each likelihood
|
||||||
|
:param Y: output observations in a list for each likelihood
|
||||||
|
:param kernel_list: kernels in a list for each likelihood
|
||||||
|
:param likelihood_list: likelihoods in a list
|
||||||
|
:param kernel_cross_covariances: Cross covariances between different likelihoods. See class MultioutputKern for more
|
||||||
|
:param inference_method: The :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference method to use for this GP
|
||||||
|
"""
|
||||||
|
def __init__(self, X_list, Y_list, kernel_list, likelihood_list, name='multioutputgp', kernel_cross_covariances={}, inference_method=None):
|
||||||
|
#Input and Output
|
||||||
|
X,Y,self.output_index = util.multioutput.build_XY(X_list,Y_list)
|
||||||
|
Ny = len(Y_list)
|
||||||
|
|
||||||
|
assert isinstance(kernel_list, list)
|
||||||
|
kernel = kern.MultioutputDerivativeKern(kernels=kernel_list, cross_covariances=kernel_cross_covariances)
|
||||||
|
|
||||||
|
assert isinstance(likelihood_list, list)
|
||||||
|
likelihood = likelihoods.MultioutputLikelihood(likelihood_list)
|
||||||
|
|
||||||
|
if inference_method is None:
|
||||||
|
if all([isinstance(l, Gaussian) for l in likelihood_list]):
|
||||||
|
inference_method = exact_gaussian_inference.ExactGaussianInference()
|
||||||
|
else:
|
||||||
|
inference_method = expectation_propagation.EP()
|
||||||
|
|
||||||
|
super(MultioutputGP, self).__init__(X,Y,kernel,likelihood, Y_metadata={'output_index':self.output_index, 'trials':np.ones(self.output_index.shape)}, inference_method = inference_method)
|
||||||
|
|
||||||
|
def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
|
||||||
|
if isinstance(Xnew, list):
|
||||||
|
Xnew, _, ind = util.multioutput.build_XY(Xnew,None)
|
||||||
|
if Y_metadata is None:
|
||||||
|
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
|
||||||
|
return super(MultioutputGP, self).predict_noiseless(Xnew, full_cov, Y_metadata, kern)
|
||||||
|
|
||||||
|
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True):
|
||||||
|
if isinstance(Xnew, list):
|
||||||
|
Xnew, _, ind = util.multioutput.build_XY(Xnew,None)
|
||||||
|
if Y_metadata is None:
|
||||||
|
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
|
||||||
|
return super(MultioutputGP, self).predict(Xnew, full_cov, Y_metadata, kern, likelihood, include_likelihood)
|
||||||
|
|
||||||
|
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None, likelihood=None):
|
||||||
|
if isinstance(X, list):
|
||||||
|
X, _, ind = util.multioutput.build_XY(X,None)
|
||||||
|
if Y_metadata is None:
|
||||||
|
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
|
||||||
|
return super(MultioutputGP, self).predict_quantiles(X, quantiles, Y_metadata, kern, likelihood)
|
||||||
|
|
||||||
|
def predictive_gradients(self, Xnew, kern=None):
|
||||||
|
if isinstance(Xnew, list):
|
||||||
|
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
|
||||||
|
#if Y_metadata is None:
|
||||||
|
#Y_metadata={'output_index': ind}
|
||||||
|
return super(MultioutputGP, self).predictive_gradients(Xnew, kern)
|
||||||
|
|
||||||
|
def predictive_gradients(self, Xnew, kern=None): #XNEW IS NOT A LIST!!
|
||||||
|
"""
|
||||||
|
Compute the derivatives of the predicted latent function with respect to X*
|
||||||
|
Given a set of points at which to predict X* (size [N*,Q]), compute the
|
||||||
|
derivatives of the mean and variance. Resulting arrays are sized:
|
||||||
|
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
|
||||||
|
Note that this is not the same as computing the mean and variance of the derivative of the function!
|
||||||
|
dv_dX* -- [N*, Q], (since all outputs have the same variance)
|
||||||
|
:param X: The points at which to get the predictive gradients
|
||||||
|
:type X: np.ndarray (Xnew x self.input_dim)
|
||||||
|
:returns: dmu_dX, dv_dX
|
||||||
|
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(Xnew, list):
|
||||||
|
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
|
||||||
|
|
||||||
|
slices = index_to_slices(Xnew[:,-1])
|
||||||
|
|
||||||
|
for i in range(len(slices)):
|
||||||
|
if ((self.kern.kern[i].name == 'diffKern' ) and len(slices[i])>0):
|
||||||
|
assert 0, "It is not (yet) possible to predict gradients of gradient observations, sorry :)"
|
||||||
|
|
||||||
|
if kern is None:
|
||||||
|
kern = self.kern
|
||||||
|
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1]-1,self.output_dim))
|
||||||
|
for i in range(self.output_dim):
|
||||||
|
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)[:,0:-1]
|
||||||
|
|
||||||
|
# gradients wrt the diagonal part k_{xx}
|
||||||
|
dv_dX = kern.gradients_X(np.eye(Xnew.shape[0]), Xnew)[:,0:-1]
|
||||||
|
#grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
|
||||||
|
if self.posterior.woodbury_inv.ndim == 3:
|
||||||
|
tmp = np.empty(dv_dX.shape + (self.posterior.woodbury_inv.shape[2],))
|
||||||
|
tmp[:] = dv_dX[:,:,None]
|
||||||
|
for i in range(self.posterior.woodbury_inv.shape[2]):
|
||||||
|
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv[:, :, i])
|
||||||
|
tmp[:, :, i] += kern.gradients_X(alpha, Xnew, self._predictive_variable)
|
||||||
|
else:
|
||||||
|
tmp = dv_dX
|
||||||
|
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv)
|
||||||
|
tmp += kern.gradients_X(alpha, Xnew, self._predictive_variable)[:,0:-1]
|
||||||
|
return mean_jac, tmp
|
||||||
|
|
||||||
|
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
|
||||||
|
if isinstance(x_test, list):
|
||||||
|
x_test, y_test, ind = util.multioutput.build_XY(x_test, y_test)
|
||||||
|
if Y_metadata is None:
|
||||||
|
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
|
||||||
|
return super(MultioutputGP, self).log_predictive_density(x_test, y_test, Y_metadata)
|
||||||
|
|
||||||
|
def set_XY(self, X=None, Y=None):
|
||||||
|
if isinstance(X, list):
|
||||||
|
X, _, self.output_index = util.multioutput.build_XY(X, None)
|
||||||
|
if isinstance(Y, list):
|
||||||
|
_, Y, self.output_index = util.multioutput.build_XY(Y, Y)
|
||||||
|
|
||||||
|
self.update_model(False)
|
||||||
|
if Y is not None:
|
||||||
|
self.Y = ObsAr(Y)
|
||||||
|
self.Y_normalized = self.Y
|
||||||
|
if X is not None:
|
||||||
|
self.X = ObsAr(X)
|
||||||
|
|
||||||
|
self.Y_metadata={'output_index': self.output_index, 'trials': np.ones(self.output_index.shape)}
|
||||||
|
if isinstance(self.inference_method, expectation_propagation.EP):
|
||||||
|
self.inference_method.reset()
|
||||||
|
self.update_model(True)
|
||||||
|
|
@ -7,7 +7,7 @@ from unittest.case import skip
|
||||||
import GPy
|
import GPy
|
||||||
from GPy.core.parameterization.param import Param
|
from GPy.core.parameterization.param import Param
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import random
|
||||||
from ..util.config import config
|
from ..util.config import config
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,12 @@ import numpy as np
|
||||||
import scipy
|
import scipy
|
||||||
from scipy.special import cbrt
|
from scipy.special import cbrt
|
||||||
from GPy.models import GradientChecker
|
from GPy.models import GradientChecker
|
||||||
|
import random
|
||||||
_lim_val = np.finfo(np.float64).max
|
_lim_val = np.finfo(np.float64).max
|
||||||
_lim_val_exp = np.log(_lim_val)
|
_lim_val_exp = np.log(_lim_val)
|
||||||
_lim_val_square = np.sqrt(_lim_val)
|
_lim_val_square = np.sqrt(_lim_val)
|
||||||
_lim_val_cube = cbrt(_lim_val)
|
_lim_val_cube = cbrt(_lim_val)
|
||||||
from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside
|
from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside, ScaledProbit
|
||||||
|
|
||||||
class LinkFunctionTests(np.testing.TestCase):
|
class LinkFunctionTests(np.testing.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
|
@ -124,6 +125,11 @@ class LinkFunctionTests(np.testing.TestCase):
|
||||||
lim_of_inf = _lim_val
|
lim_of_inf = _lim_val
|
||||||
self.check_gradient(link, lim_of_inf, test_lim=True)
|
self.check_gradient(link, lim_of_inf, test_lim=True)
|
||||||
|
|
||||||
|
def test_scaledprobit_gradients(self):
|
||||||
|
link = ScaledProbit(nu=random.random())
|
||||||
|
lim_of_inf = _lim_val
|
||||||
|
self.check_gradient(link, lim_of_inf, test_lim=True)
|
||||||
|
|
||||||
def test_Cloglog_gradients(self):
|
def test_Cloglog_gradients(self):
|
||||||
link = Cloglog()
|
link = Cloglog()
|
||||||
lim_of_inf = _lim_val_exp
|
lim_of_inf = _lim_val_exp
|
||||||
|
|
|
||||||
|
|
@ -1181,6 +1181,67 @@ class GradientTests(np.testing.TestCase):
|
||||||
with self.assertRaises(RuntimeError):
|
with self.assertRaises(RuntimeError):
|
||||||
m.posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]]))
|
m.posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]]))
|
||||||
|
|
||||||
|
def test_multioutput_model_with_derivative_observations(self):
|
||||||
|
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
|
||||||
|
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
|
||||||
|
N=10
|
||||||
|
M=10
|
||||||
|
sigma=0.05
|
||||||
|
sigmader=0.05
|
||||||
|
x = np.array([np.linspace(1,10,N)]).T
|
||||||
|
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
|
||||||
|
|
||||||
|
xd = np.array([np.linspace(2,8,M)]).T
|
||||||
|
yd = fd(xd) + np.array(sigmader*np.random.normal(0,1,(M,1)))
|
||||||
|
|
||||||
|
# squared exponential kernel:
|
||||||
|
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
|
||||||
|
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
|
||||||
|
se_der = GPy.kern.DiffKern(se, 0)
|
||||||
|
|
||||||
|
#Then
|
||||||
|
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
|
||||||
|
gauss = GPy.likelihoods.Gaussian(variance=0.1)
|
||||||
|
gauss_der = GPy.likelihoods.Gaussian(variance=sigma**2)
|
||||||
|
|
||||||
|
# Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
|
||||||
|
# Now we have the regular observations first and derivative observations second, meaning that the kernels and
|
||||||
|
# the likelihoods must follow the same order
|
||||||
|
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss])
|
||||||
|
m.randomize()
|
||||||
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
m.optimize(messages=0, ipython_notebook=False)
|
||||||
|
|
||||||
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
def test_multioutput_model_with_ep(self):
|
||||||
|
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
|
||||||
|
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
|
||||||
|
N=10
|
||||||
|
sigma=0.05
|
||||||
|
sigmader=0.05
|
||||||
|
x = np.array([np.linspace(1,10,N)]).T
|
||||||
|
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
|
||||||
|
|
||||||
|
M=7
|
||||||
|
xd = np.array([np.linspace(2,8,M)]).T
|
||||||
|
yd = 2*(fd(xd)>0) -1
|
||||||
|
|
||||||
|
# squared exponential kernel:
|
||||||
|
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
|
||||||
|
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
|
||||||
|
se_der = GPy.kern.DiffKern(se, 0)
|
||||||
|
|
||||||
|
#Then
|
||||||
|
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
|
||||||
|
probit = GPy.likelihoods.Binomial(gp_link = GPy.likelihoods.link_functions.ScaledProbit(nu=100))
|
||||||
|
|
||||||
|
# Then create the model, we give everything in lists
|
||||||
|
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, probit], inference_method=GPy.inference.latent_function_inference.EP(ep_mode="nested"))
|
||||||
|
|
||||||
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
def _create_missing_data_model(kernel, Q):
|
def _create_missing_data_model(kernel, Q):
|
||||||
D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3
|
D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3
|
||||||
_, _, Ylist = GPy.examples.dimensionality_reduction._simulate_matern(D1, D2, D3, N, num_inducing, False)
|
_, _, Ylist = GPy.examples.dimensionality_reduction._simulate_matern(D1, D2, D3, N, num_inducing, False)
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,33 @@ import numpy as np
|
||||||
import warnings
|
import warnings
|
||||||
import GPy
|
import GPy
|
||||||
|
|
||||||
|
def index_to_slices(index):
|
||||||
|
"""
|
||||||
|
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
|
||||||
|
|
||||||
|
e.g.
|
||||||
|
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
|
||||||
|
returns
|
||||||
|
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
|
||||||
|
|
||||||
|
or, a more complicated example
|
||||||
|
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
|
||||||
|
returns
|
||||||
|
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
|
||||||
|
"""
|
||||||
|
if len(index)==0:
|
||||||
|
return[]
|
||||||
|
|
||||||
|
#contruct the return structure
|
||||||
|
ind = np.asarray(index,dtype=np.int)
|
||||||
|
ret = [[] for i in range(ind.max()+1)]
|
||||||
|
|
||||||
|
#find the switchpoints
|
||||||
|
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
|
||||||
|
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
|
||||||
|
|
||||||
|
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
|
||||||
|
return ret
|
||||||
|
|
||||||
def get_slices(input_list):
|
def get_slices(input_list):
|
||||||
num_outputs = len(input_list)
|
num_outputs = len(input_list)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue