Merge branch 'feature-multioutput-grad-obs' of git://github.com/esiivola/GPy into esiivola-feature-multioutput-grad-obs

This commit is contained in:
mzwiessele 2019-07-22 10:54:28 +01:00
commit 22ce7ad207
24 changed files with 795 additions and 53 deletions

View file

@ -581,3 +581,53 @@ def warped_gp_cubic_sine(max_iters=100):
m.plot(title="Standard GP") m.plot(title="Standard GP")
warp_m.plot_warping() warp_m.plot_warping()
pb.show() pb.show()
def multioutput_gp_with_derivative_observations():
def plot_gp_vs_real(m, x, yreal, size_inputs, title, fixed_input=1, xlim=[0,11], ylim=[-1.5,3]):
fig, ax = pb.subplots()
ax.set_title(title)
pb.plot(x, yreal, "r", label='Real function')
rows = slice(0, size_inputs[0]) if fixed_input == 0 else slice(size_inputs[0], size_inputs[0]+size_inputs[1])
m.plot(fixed_inputs=[(1, fixed_input)], which_data_rows=rows, xlim=xlim, ylim=ylim, ax=ax)
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
N=10 # Number of observations
M=10 # Number of derivative observations
Npred=100 # Number of prediction points
sigma = 0.05 # Noise of observations
sigma_der = 0.05 # Noise of derivative observations
x = np.array([np.linspace(1,10,N)]).T
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
xd = np.array([np.linspace(2,8,M)]).T
yd = fd(xd) + np.array(sigma_der*np.random.normal(0,1,(M,1)))
xpred = np.array([np.linspace(0,11,Npred)]).T
ypred_true = f(xpred)
ydpred_true = fd(xpred)
# squared exponential kernel:
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
se_der = GPy.kern.DiffKern(se, 0)
#Then
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
gauss_der = GPy.likelihoods.Gaussian(variance=sigma_der**2)
# Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
# Now we have the regular observations first and derivative observations second, meaning that the kernels and
# the likelihoods must follow the same order. Crosscovariances are automatically taken car of
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss])
# Optimize the model
m.optimize(messages=0, ipython_notebook=False)
#Plot the model, the syntax is same as for multioutput models:
plot_gp_vs_real(m, xpred, ydpred_true, [x.shape[0], xd.shape[0]], title='Latent function derivatives', fixed_input=1, xlim=[0,11], ylim=[-1.5,3])
plot_gp_vs_real(m, xpred, ypred_true, [x.shape[0], xd.shape[0]], title='Latent function', fixed_input=0, xlim=[0,11], ylim=[-1.5,3])
#making predictions for the values:
mu, var = m.predict_noiseless(Xnew=[xpred, np.empty((0,1))])

View file

@ -44,3 +44,5 @@ from .src.sde_static import sde_White, sde_Bias
from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad
from .src.sde_brownian import sde_Brownian from .src.sde_brownian import sde_Brownian
from .src.multioutput_kern import MultioutputKern from .src.multioutput_kern import MultioutputKern
from .src.multioutput_derivative_kern import MultioutputDerivativeKern
from .src.diff_kern import DiffKern

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from .kern import Kern from .kern import Kern
from .independent_outputs import index_to_slices from ...util.multioutput import index_to_slices
from ...core.parameterization import Param from ...core.parameterization import Param
from paramz.transformations import Logexp from paramz.transformations import Logexp
import numpy as np import numpy as np

View file

@ -5,7 +5,7 @@ from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from paramz.transformations import Logexp from paramz.transformations import Logexp
import numpy as np import numpy as np
from .independent_outputs import index_to_slices from ...util.multioutput import index_to_slices
class ODE_UYC(Kern): class ODE_UYC(Kern):
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., ubias =1. ,active_dims=None, name='ode_uyc'): def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., ubias =1. ,active_dims=None, name='ode_uyc'):

View file

@ -4,7 +4,7 @@ from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from paramz.transformations import Logexp from paramz.transformations import Logexp
import numpy as np import numpy as np
from .independent_outputs import index_to_slices from ...util.multioutput import index_to_slices
class ODE_st(Kern): class ODE_st(Kern):

View file

@ -2,7 +2,7 @@ from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from paramz.transformations import Logexp from paramz.transformations import Logexp
import numpy as np import numpy as np
from .independent_outputs import index_to_slices from ...util.multioutput import index_to_slices
class ODE_t(Kern): class ODE_t(Kern):

88
GPy/kern/src/diff_kern.py Normal file
View file

@ -0,0 +1,88 @@
# Copyright (c) 2018, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from .kern import Kern
import numpy as np
from paramz.caching import Cache_this
class DiffKern(Kern):
"""
Diff kernel is a thin wrapper for using partial derivatives of kernels as kernels. Eg. in combination with
Multioutput kernel this allows the user to train GPs with observations of latent function and latent
function derivatives. NOTE: DiffKern only works when used with Multioutput kernel. Do not use the kernel as standalone
The parameters the kernel needs are:
-'base_kern': a member of Kernel class that is used for observations
-'dimension': integer that indigates in which dimensions the partial derivative observations are
"""
def __init__(self, base_kern, dimension):
super(DiffKern, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='DiffKern')
self.base_kern = base_kern
self.dimension = dimension
def parameters_changed(self):
self.base_kern.parameters_changed()
@Cache_this(limit=3, ignore_args=())
def K(self, X, X2=None, dimX2 = None): #X in dimension self.dimension
if X2 is None:
X2 = X
if dimX2 is None:
dimX2 = self.dimension
return self.base_kern.dK2_dXdX2(X,X2, self.dimension, dimX2)
@Cache_this(limit=3, ignore_args=())
def Kdiag(self, X):
return np.diag(self.base_kern.dK2_dXdX2(X,X, self.dimension, self.dimension))
@Cache_this(limit=3, ignore_args=())
def dK_dX_wrap(self, X, X2): #X in dimension self.dimension
return self.base_kern.dK_dX(X,X2, self.dimension)
@Cache_this(limit=3, ignore_args=())
def dK_dX2_wrap(self, X, X2): #X in dimension self.dimension
return self.base_kern.dK_dX2(X,X2, self.dimension)
def reset_gradients(self):
self.base_kern.reset_gradients()
@property
def gradient(self):
return self.base_kern.gradient
@gradient.setter
def gradient(self, gradient):
self.base_kern.gradient = gradient
def update_gradients_full(self, dL_dK, X, X2=None, dimX2=None):
if dimX2 is None:
dimX2 = self.dimension
gradients = self.base_kern.dgradients2_dXdX2(X,X2,self.dimension,dimX2)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
def update_gradients_diag(self, dL_dK_diag, X):
gradients = self.base_kern.dgradients2_dXdX2(X,X, self.dimension, self.dimension)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK_diag, gradient, f=np.diag) for gradient in gradients])
def update_gradients_dK_dX(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
gradients = self.base_kern.dgradients_dX(X,X2, self.dimension)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
def update_gradients_dK_dX2(self, dL_dK, X, X2=None):
gradients = self.base_kern.dgradients_dX2(X,X2, self.dimension)
self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients])
def gradients_X(self, dL_dK, X, X2):
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:, self.dimension]
return np.sum(tmp, axis=1)
def gradients_X2(self, dL_dK, X, X2):
tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:, :, self.dimension, :]
return np.sum(tmp, axis=1)
def _convert_gradients(self, l,g, f = lambda x:x):
if type(g) is np.ndarray:
return np.sum(f(l)*f(g))
else:
return np.array([np.sum(f(l)*f(gi)) for gi in g])

View file

@ -5,34 +5,8 @@
from .kern import CombinationKernel from .kern import CombinationKernel
import numpy as np import numpy as np
import itertools import itertools
from ...util.multioutput import index_to_slices
def index_to_slices(index):
"""
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
e.g.
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
returns
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
or, a more complicated example
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
returns
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
"""
if len(index)==0:
return[]
#contruct the return structure
ind = np.asarray(index,dtype=np.int)
ret = [[] for i in range(ind.max()+1)]
#find the switchpoints
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret
class IndependentOutputs(CombinationKernel): class IndependentOutputs(CombinationKernel):
""" """

View file

@ -116,6 +116,12 @@ class Kern(Parameterized):
except: except:
return X[:, self._all_dims_active] return X[:, self._all_dims_active]
def _project_dim(self, dim):
try:
return np.where(self._all_dims_active == dim)[0][0]
except:
return None
def K(self, X, X2): def K(self, X, X2):
""" """
Compute the kernel function. Compute the kernel function.

View file

@ -19,16 +19,26 @@ def put_clean(dct, name, func):
class KernCallsViaSlicerMeta(ParametersChangedMeta): class KernCallsViaSlicerMeta(ParametersChangedMeta):
def __new__(cls, name, bases, dct): def __new__(cls, name, bases, dct):
put_clean(dct, 'K', _slice_K) put_clean(dct, 'K', _slice_K)
put_clean(dct, 'dK_dX', _slice_dK_dX)
put_clean(dct, 'dK_dX2', _slice_dK_dX)
put_clean(dct, 'dK2_dXdX2', _slice_dK2_dXdX2)
put_clean(dct, 'Kdiag', _slice_Kdiag) put_clean(dct, 'Kdiag', _slice_Kdiag)
put_clean(dct, 'phi', _slice_Kdiag) put_clean(dct, 'phi', _slice_Kdiag)
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full) put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag) put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
put_clean(dct, 'update_gradients_dK_dX', _slice_update_gradients_full)
put_clean(dct, 'update_gradients_dK_dX2', _slice_update_gradients_full)
put_clean(dct, 'gradients_X', _slice_gradients_X) put_clean(dct, 'gradients_X', _slice_gradients_X)
put_clean(dct, 'gradients_X2', _slice_gradients_X)
put_clean(dct, 'gradients_X_X2', _slice_gradients_X) put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
put_clean(dct, 'gradients_XX', _slice_gradients_XX) put_clean(dct, 'gradients_XX', _slice_gradients_XX)
put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag) put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
put_clean(dct, 'dgradients_dX',_slice_partial_gradients_list_X)
put_clean(dct, 'dgradients_dX2',_slice_partial_gradients_list_X)
put_clean(dct, 'dgradients2_dXdX2',_slice_partial_gradients_list_XX)
put_clean(dct, 'psi0', _slice_psi) put_clean(dct, 'psi0', _slice_psi)
put_clean(dct, 'psi1', _slice_psi) put_clean(dct, 'psi1', _slice_psi)
put_clean(dct, 'psi2', _slice_psi) put_clean(dct, 'psi2', _slice_psi)
@ -79,6 +89,20 @@ class _Slice_wrap(object):
return ret return ret
return return_val return return_val
def handle_return_list(self, return_val):
if self.ret:
ret = [np.zeros(self.shape) for _ in range(len(return_val))]
if len(self.shape) == 3:
for i in range(len(return_val)):
ret[i][self.k._all_dims_active, :, :] = return_val[i]
#[ret[i].__setitem__((:, :, self.k._all_dims_active), return_val[i]) for i in range(len(return_val))]
elif len(self.shape) == 4:
for i in range(len(return_val)):
ret[i][np.ix_(self.k._all_dims_active, self.k._all_dims_active)] = return_val[i]
#[ret[i].__setitem__((:, :, self.k._all_dims_active, self.k._all_dims_active), return_val[i]) for i in range(len(return_val))]
return ret
return return_val
def _slice_K(f): def _slice_K(f):
@wraps(f) @wraps(f)
def wrap(self, X, X2 = None, *a, **kw): def wrap(self, X, X2 = None, *a, **kw):
@ -97,15 +121,15 @@ def _slice_Kdiag(f):
def _slice_update_gradients_full(f): def _slice_update_gradients_full(f):
@wraps(f) @wraps(f)
def wrap(self, dL_dK, X, X2=None): def wrap(self, dL_dK, X, X2=None, *a, **kw):
with _Slice_wrap(self, X, X2) as s: with _Slice_wrap(self, X, X2) as s:
ret = f(self, dL_dK, s.X, s.X2) ret = f(self, dL_dK, s.X, s.X2, *a, **kw)
return ret return ret
return wrap return wrap
def _slice_update_gradients_diag(f): def _slice_update_gradients_diag(f):
@wraps(f) @wraps(f)
def wrap(self, dL_dKdiag, X): def wrap(self, dL_dKdiag, X, *a, **kw):
with _Slice_wrap(self, X, None) as s: with _Slice_wrap(self, X, None) as s:
ret = f(self, dL_dKdiag, s.X) ret = f(self, dL_dKdiag, s.X)
return ret return ret
@ -119,6 +143,99 @@ def _slice_gradients_X(f):
return ret return ret
return wrap return wrap
def _slice_dK_dX(f):
@wraps(f)
def wrap(self, X, X2, dim, *a, **kw):
with _Slice_wrap(self, X, X2) as s:
d = s.k._project_dim(dim)
if d is None:
ret = np.zeros((X.shape[0], X2.shape[0]))
else:
ret = f(self, s.X, s.X2, d, *a, **kw)
return ret
return wrap
def _slice_dK2_dXdX2(f):
@wraps(f)
def wrap(self, X, X2, dimX, dimX2, *a, **kw):
with _Slice_wrap(self, X, X2) as s:
d = s.k._project_dim(dimX)
d2 = s.k._project_dim(dimX2)
if (d is None) or (d2 is None):
ret = np.zeros((X.shape[0], X2.shape[0]))
else:
ret = f(self, s.X, s.X2, d, d2, *a, **kw)
return ret
return wrap
def _slice_partial_gradients_X(f):
@wraps(f)
def wrap(self, X, X2, dim):
if X2 is None:
N, M = X.shape[0], X.shape[0]
else:
N, M = X.shape[0], X2.shape[0]
Q1 = X.shape[1]
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1)) as s:
ret = s.handle_return_array(f(self, s.X, s.X2, dim))
return ret
return wrap
def _slice_partial_gradients_list_X(f):
@wraps(f)
def wrap(self, X, X2, dim):
if X2 is None:
N, M = X.shape[0], X.shape[0]
else:
N, M = X.shape[0], X2.shape[0]
with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s:
d = s.k._project_dim(dim)
if d is None:
ret = [np.zeros((N, M)) for i in range(s.k.size)]
else:
ret = f(self, s.X, s.X2, d)
return ret
return wrap
def _slice_partial_gradients_XX(f):
@wraps(f)
def wrap(self, X, X2, dim, dimX2):
if X2 is None:
N, M = X.shape[0], X.shape[0]
Q1 = X.shape[1]
else:
N, M = X.shape[0], X2.shape[0]
Q1, Q2 = X.shape[1], X2.shape[1]
with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s:
ret = s.handle_return_array(f(self, s.X, s.X2, dim, dimX2))
return ret
return wrap
def _slice_partial_gradients_list_XX(f):
@wraps(f)
def wrap(self, X, X2, dimX, dimX2):
if X2 is None:
N, M = X.shape[0], X.shape[0]
else:
N, M = X.shape[0], X2.shape[0]
with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s:
d = s.k._project_dim(dimX)
d2 = s.k._project_dim(dimX2)
if (d is None) or (d2 is None):
ret = [np.zeros((N, M)) for i in range(s.k.size)]
else:
ret = f(self, s.X, s.X2, d, d2)
return ret
return wrap
def _slice_gradient_derivatives_X(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
with _Slice_wrap(self, X, X2) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
return ret
return wrap
def _slice_gradients_X_diag(f): def _slice_gradients_X_diag(f):
@wraps(f) @wraps(f)
def wrap(self, dL_dKdiag, X): def wrap(self, dL_dKdiag, X):

View file

@ -0,0 +1,84 @@
# Copyright (c) 2018, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from .kern import Kern, CombinationKernel
from .multioutput_kern import MultioutputKern, ZeroKern
import numpy as np
from functools import partial
class KernWrapper(Kern):
def __init__(self, fk, fug, fg, base_kern):
self.fk = fk
self.fug = fug
self.fg = fg
self.base_kern = base_kern
super(KernWrapper, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='KernWrapper',useGPU=False)
def K(self, X, X2=None):
return self.fk(X,X2=X2)
def update_gradients_full(self,dL_dK, X, X2=None):
return self.fug(dL_dK, X, X2=X2)
def gradients_X(self,dL_dK, X, X2=None):
return self.fg(dL_dK, X, X2=X2)
@property
def gradient(self):
return self.base_kern.gradient
@gradient.setter
def gradient(self, gradient):
self.base_kern.gradient = gradient
class MultioutputDerivativeKern(MultioutputKern):
"""
Multioutput derivative kernel is a meta class for combining different kernels for multioutput GPs.
Multioutput derivative kernel is only a thin wrapper for Multioutput kernel for user not having to define
cross covariances.
"""
def __init__(self, kernels, cross_covariances={}, name='MultioutputDerivativeKern'):
#kernels contains a list of kernels as input,
if not isinstance(kernels, list):
self.single_kern = True
self.kern = kernels
kernels = [kernels]
else:
self.single_kern = False
self.kern = kernels
# The combination kernel ALLWAYS puts the extra dimension last.
# Thus, the index dimension of this kernel is always the last dimension
# after slicing. This is why the index_dim is just the last column:
self.index_dim = -1
super(MultioutputKern, self).__init__(kernels=kernels, extra_dims=[self.index_dim], name=name, link_parameters=False)
nl = len(kernels)
#build covariance structure
covariance = [[None for i in range(nl)] for j in range(nl)]
linked = []
for i in range(0,nl):
unique=True
for j in range(0,nl):
if i==j or (kernels[i] is kernels[j]):
kern = kernels[i]
if i>j:
unique=False
elif cross_covariances.get((i,j)) is not None: #cross covariance is given
kern = cross_covariances.get((i,j))
elif kernels[i].name == 'DiffKern' and kernels[i].base_kern == kernels[j]: # one is derivative of other
kern = KernWrapper(kernels[i].dK_dX_wrap,kernels[i].update_gradients_dK_dX,kernels[i].gradients_X, kernels[j])
unique=False
elif kernels[j].name == 'DiffKern' and kernels[j].base_kern == kernels[i]: # one is derivative of other
kern = KernWrapper(kernels[j].dK_dX2_wrap,kernels[j].update_gradients_dK_dX2,kernels[j].gradients_X2, kernels[i])
elif kernels[i].name == 'DiffKern' and kernels[j].name == 'DiffKern' and kernels[i].base_kern == kernels[j].base_kern: #both are partial derivatives
kern = KernWrapper(partial(kernels[i].K, dimX2=kernels[j].dimension), partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension),None, kernels[i].base_kern)
if i>j:
unique=False
else:
kern = ZeroKern()
covariance[i][j] = kern
if unique is True:
linked.append(i)
self.covariance = covariance
self.link_parameters(*[kernels[i] for i in linked])

View file

@ -1,7 +1,10 @@
# Copyright (c) 2018, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from .kern import Kern, CombinationKernel from .kern import Kern, CombinationKernel
import numpy as np import numpy as np
from functools import reduce, partial from functools import reduce, partial
from .independent_outputs import index_to_slices from ...util.multioutput import index_to_slices
from paramz.caching import Cache_this from paramz.caching import Cache_this
class ZeroKern(Kern): class ZeroKern(Kern):
@ -18,6 +21,13 @@ class ZeroKern(Kern):
def gradients_X(self,dL_dK, X, X2=None): def gradients_X(self,dL_dK, X, X2=None):
return np.zeros((X.shape[0],X.shape[1])) return np.zeros((X.shape[0],X.shape[1]))
@property
def gradient(self):
return np.empty((1,0))
@gradient.setter
def gradient(self, gradient):
pass
class MultioutputKern(CombinationKernel): class MultioutputKern(CombinationKernel):
""" """
@ -62,14 +72,13 @@ class MultioutputKern(CombinationKernel):
unique=True unique=True
for j in range(0,nl): for j in range(0,nl):
if i==j or (kernels[i] is kernels[j]): if i==j or (kernels[i] is kernels[j]):
covariance[i][j] = {'kern': kernels[i], 'K': kernels[i].K, 'update_gradients_full': kernels[i].update_gradients_full, 'gradients_X': kernels[i].gradients_X} covariance[i][j] = kernels[i]
if i>j: if i>j:
unique=False unique=False
elif cross_covariances.get((i,j)) is not None: #cross covariance is given elif cross_covariances.get((i,j)) is not None: #cross covariance is given
covariance[i][j] = cross_covariances.get((i,j)) covariance[i][j] = cross_covariances.get((i,j))
else: # zero covariance structure else: # zero covariance structure
kern = ZeroKern() covariance[i][j] = ZeroKern()
covariance[i][j] = {'kern': kern, 'K': kern.K, 'update_gradients_full': kern.update_gradients_full, 'gradients_X': kern.gradients_X}
if unique is True: if unique is True:
linked.append(i) linked.append(i)
self.covariance = covariance self.covariance = covariance
@ -82,7 +91,7 @@ class MultioutputKern(CombinationKernel):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
slices2 = index_to_slices(X2[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0])) target = np.zeros((X.shape[0], X2.shape[0]))
[[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j]['K'](X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))] [[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j].K(X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))]
return target return target
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
@ -93,10 +102,10 @@ class MultioutputKern(CombinationKernel):
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)] [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
return target return target
def _update_gradients_full_wrapper(self, cov_struct, dL_dK, X, X2): def _update_gradients_full_wrapper(self, kern, dL_dK, X, X2):
gradient = cov_struct['kern'].gradient.copy() gradient = kern.gradient.copy()
cov_struct['update_gradients_full'](dL_dK, X, X2) kern.update_gradients_full(dL_dK, X, X2)
cov_struct['kern'].gradient += gradient kern.gradient += gradient
def _update_gradients_diag_wrapper(self, kern, dL_dKdiag, X): def _update_gradients_diag_wrapper(self, kern, dL_dKdiag, X):
gradient = kern.gradient.copy() gradient = kern.gradient.copy()
@ -118,14 +127,14 @@ class MultioutputKern(CombinationKernel):
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self.reset_gradients() self.reset_gradients()
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
[[ self._update_gradients_diag_wrapper(self.covariance[i][i]['kern'], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))] [[ self._update_gradients_diag_wrapper(self.covariance[i][i], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))]
def gradients_X(self,dL_dK, X, X2=None): def gradients_X(self,dL_dK, X, X2=None):
slices = index_to_slices(X[:,self.index_dim]) slices = index_to_slices(X[:,self.index_dim])
target = np.zeros((X.shape[0], X.shape[1]) ) target = np.zeros((X.shape[0], X.shape[1]) )
if X2 is not None: if X2 is not None:
slices2 = index_to_slices(X2[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim])
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))]
else: else:
[[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))]
return target return target

View file

@ -6,6 +6,7 @@ import numpy as np
from .stationary import Stationary from .stationary import Stationary
from .psi_comp import PSICOMP_RBF, PSICOMP_RBF_GPU from .psi_comp import PSICOMP_RBF, PSICOMP_RBF_GPU
from ...core import Param from ...core import Param
from paramz.caching import Cache_this
from paramz.transformations import Logexp from paramz.transformations import Logexp
from .grid_kerns import GridRBF from .grid_kerns import GridRBF
@ -50,6 +51,27 @@ class RBF(Stationary):
def K_of_r(self, r): def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r**2) return self.variance * np.exp(-0.5 * r**2)
@Cache_this(limit=3, ignore_args=())
def dK_dX(self, X, X2, dimX):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
dist = X[:,None,dimX]-X2[None,:,dimX]
lengthscale2inv = (np.ones((X.shape[1]))/(self.lengthscale**2))[dimX]
return -1.*K*dist*lengthscale2inv
@Cache_this(limit=3, ignore_args=())
def dK_dX2(self, X, X2, dimX2):
return -self.dK_dX(X,X2, dimX2)
@Cache_this(limit=3, ignore_args=())
def dK2_dXdX2(self, X, X2, dimX, dimX2):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
if X2 is None:
X2=X
dist = X[:,None,:]-X2[None,:,:]
lengthscale2inv = np.ones((X.shape[1]))/(self.lengthscale**2)
return -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*lengthscale2inv[dimX2] + (dimX==dimX2)*K*lengthscale2inv[dimX]
def dK_dr(self, r): def dK_dr(self, r):
return -r*self.K_of_r(r) return -r*self.K_of_r(r)
@ -59,6 +81,74 @@ class RBF(Stationary):
def dK2_drdr_diag(self): def dK2_drdr_diag(self):
return -self.variance # as the diagonal of r is always filled with zeros return -self.variance # as the diagonal of r is always filled with zeros
@Cache_this(limit=3, ignore_args=())
def dK_dvariance(self,X,X2):
return self.K(X,X2)/self.variance
@Cache_this(limit=3, ignore_args=())
def dK2_dvariancedX(self, X, X2, dim):
return self.dK_dX(X,X2, dim)/self.variance
@Cache_this(limit=3, ignore_args=())
def dK2_dvariancedX2(self, X, X2, dim):
return self.dK_dX2(X,X2, dim)/self.variance
@Cache_this(limit=3, ignore_args=())
def dK3_dvariancedXdX2(self, X, X2, dim, dimX2):
return self.dK2_dXdX2(X, X2, dim, dimX2)/self.variance
@Cache_this(limit=3, ignore_args=())
def dK2_dlengthscaledX(self, X, X2, dimX):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
if X2 is None:
X2=X
dist = X[:,None,:]-X2[None,:,:]
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale)
if self.ARD:
g = []
for diml in range(X.shape[1]):
g += [-1.*K*dist[:,:,dimX]*(dist[:,:,diml]**2)*(lengthscaleinv[dimX]**2)*(lengthscaleinv[diml]**3) + 2.*dist[:,:,dimX]*(lengthscaleinv[diml]**3)*K*(dimX == diml)]
else:
g = -1.*K*dist[:,:,dimX]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**5) + 2.*dist[:,:,dimX]*(lengthscaleinv[dimX]**3)*K
return g
@Cache_this(limit=3, ignore_args=())
def dK2_dlengthscaledX2(self, X, X2, dimX2):
tmp = self.dK2_dlengthscaledX(X, X2, dimX2)
if self.ARD:
return [-1.*g for g in tmp]
else:
return -1*tmp
@Cache_this(limit=3, ignore_args=())
def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
if X2 is None:
X2=X
dist = X[:,None,:]-X2[None,:,:]
lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale)
lengthscale2inv = lengthscaleinv**2
if self.ARD:
g = []
for diml in range(X.shape[1]):
tmp = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*(dist[:,:,diml]**2)*lengthscale2inv[dimX]*lengthscale2inv[dimX2]*(lengthscaleinv[diml]**3)
if dimX == dimX2:
tmp += K*lengthscale2inv[dimX]*(lengthscaleinv[diml]**3)*(dist[:,:,diml]**2)
if diml == dimX:
tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX2]*(lengthscaleinv[dimX]**3)
if diml == dimX2:
tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*(lengthscaleinv[dimX2]**3)
if dimX == dimX2:
tmp += -2.*K*(lengthscaleinv[dimX]**3)
g += [tmp]
else:
g = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**7) +4*K*dist[:,:,dimX]*dist[:,:,dimX2]*(lengthscaleinv[dimX]**5)
if dimX == dimX2:
g += -2.*K*(lengthscaleinv[dimX]**3) + K*(lengthscaleinv[dimX]**5)*np.sum(dist**2, axis=2)
return g
def __getstate__(self): def __getstate__(self):
dc = super(RBF, self).__getstate__() dc = super(RBF, self).__getstate__()
if self.useGPU: if self.useGPU:

View file

@ -4,7 +4,7 @@ A new kernel
import numpy as np import numpy as np
from .kern import Kern, CombinationKernel from .kern import Kern, CombinationKernel
from .independent_outputs import index_to_slices from ...util.multioutput import index_to_slices
import itertools import itertools
class DEtime(Kern): class DEtime(Kern):

View file

@ -212,7 +212,6 @@ class Stationary(Kern):
r = self._scaled_dist(X, X2) r = self._scaled_dist(X, X2)
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
def update_gradients_direct(self, dL_dVar, dL_dLen): def update_gradients_direct(self, dL_dVar, dL_dLen):
""" """
Specially intended for the Grid regression case. Specially intended for the Grid regression case.
@ -308,6 +307,21 @@ class Stationary(Kern):
return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],)) return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape) #return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
def dgradients_dX(self, X, X2, dimX):
g1 = self.dK2_dvariancedX(X, X2, dimX)
g2 = self.dK2_dlengthscaledX(X, X2, dimX)
return [g1, g2]
def dgradients_dX2(self, X, X2, dimX2):
g1 = self.dK2_dvariancedX2(X, X2, dimX2)
g2 = self.dK2_dlengthscaledX2(X, X2, dimX2)
return [g1, g2]
def dgradients2_dXdX2(self, X, X2, dimX, dimX2):
g1 = self.dK3_dvariancedXdX2(X, X2, dimX, dimX2)
g2 = self.dK3_dlengthscaledXdX2(X, X2, dimX, dimX2)
return [g1, g2]
def _gradients_X_pure(self, dL_dK, X, X2=None): def _gradients_X_pure(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2) invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK dL_dr = self.dK_dr_via_X(X, X2) * dL_dK

View file

@ -66,7 +66,6 @@ class Binomial(Likelihood):
np.testing.assert_array_equal(N.shape, y.shape) np.testing.assert_array_equal(N.shape, y.shape)
nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1) nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1)
Ny = N-y Ny = N-y
t1 = np.zeros(y.shape) t1 = np.zeros(y.shape)
t2 = np.zeros(y.shape) t2 = np.zeros(y.shape)
@ -177,6 +176,41 @@ class Binomial(Likelihood):
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None): def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
pass pass
def moments_match_ep(self,obs,tau,v,Y_metadata_i=None):
"""
Calculation of moments using quadrature
:param obs: observed output
:param tau: cavity distribution 1st natural parameter (precision)
:param v: cavity distribution 2nd natural paramenter (mu*precision)
"""
#Compute first integral for zeroth moment.
#NOTE constant np.sqrt(2*pi/tau) added at the end of the function
if (isinstance(self.gp_link, link_functions.Probit) or isinstance(self.gp_link, link_functions.ScaledProbit)) and (Y_metadata_i is None or int(Y_metadata_i.get('trials', 1)) == int(1)): #Special case for probit likelihood. Can be found from Riihimaki et Vehtari 2010
if isinstance(self.gp_link, link_functions.ScaledProbit):
nu = self.gp_link.nu
else:
nu = 1.0
nu = self.gp_link.nu
mu = v/tau
sigma2 = 1./tau
t = np.asarray(1 + sigma2*(nu**2))
t[t<1e-20] = 1e-20
a = np.sqrt(t)
z = obs*mu/a
normc_z = max(self.gp_link.transf(z), 1e-20)
m0 = normc_z
normp_z = self.gp_link.dtransf_df(z)
m1 = mu + (obs*sigma2*normp_z)/(normc_z*a)
#print('tau: {}, v: {}, nu: {}, z: {}, normc_z: {}, normp_z: {}'.format(tau, v, nu.values, z, normc_z, normp_z))
m2 = sigma2 - ((sigma2**2)*normp_z)/((1./(nu**2)+sigma2)*normc_z)*(z + normp_z/(nu**2)/normc_z)
#print("m0: {}, m1: {}, m2: {}".format(m0,m1,m2))
#m0a, m1a, m2a = super(Binomial, self).moments_match_ep(obs,tau,v,Y_metadata_i)
#print("m0a: {}, m1a: {}, m2a: {}".format(m0a,m1a,m2a))
return m0, m1, m2
else:
return super(Binomial, self).moments_match_ep(obs,tau,v,Y_metadata_i)
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None): def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Probit): if isinstance(self.gp_link, link_functions.Probit):

View file

@ -138,6 +138,38 @@ class Probit(GPTransformation):
input_dict["class"] = "GPy.likelihoods.link_functions.Probit" input_dict["class"] = "GPy.likelihoods.link_functions.Probit"
return input_dict return input_dict
class ScaledProbit(Probit):
"""
.. math::
g(f) = \\Phi^{-1} (nu*mu)
"""
def __init__(self, nu=1.):
self.nu = float(nu)
def transf(self,f):
return std_norm_cdf(f*self.nu)
def dtransf_df(self,f):
return std_norm_pdf(f*self.nu)*self.nu
def d2transf_df2(self,f):
return -(f*self.nu) * std_norm_pdf(f*self.nu)*(self.nu**2)
def d3transf_df3(self,f):
return (safe_square(f*self.nu)-1.)*std_norm_pdf(f*self.nu)*(self.nu**3)
def to_dict(self):
"""
Convert the object into a json serializable dictionary.
Note: It uses the private method _save_to_input_dict of the parent.
:return dict: json serializable dictionary containing the needed information to instantiate the object
"""
input_dict = super(ScaledProbit, self)._save_to_input_dict()
input_dict["class"] = "GPy.likelihoods.link_functions.ScaledProbit"
return input_dict
class Cloglog(GPTransformation): class Cloglog(GPTransformation):
""" """

View file

@ -14,7 +14,7 @@ from .gaussian import Gaussian
from ..core.parameterization import Param from ..core.parameterization import Param
from paramz.transformations import Logexp from paramz.transformations import Logexp
from ..core.parameterization import Parameterized from ..core.parameterization import Parameterized
from ..kern.src.independent_outputs import index_to_slices from ..util.multioutput import index_to_slices
import itertools import itertools
class MultioutputLikelihood(MixedNoise): class MultioutputLikelihood(MixedNoise):

View file

@ -30,3 +30,4 @@ from .gp_grid_regression import GPRegressionGrid
from .gp_multiout_regression import GPMultioutRegression from .gp_multiout_regression import GPMultioutRegression
from .gp_multiout_regression_md import GPMultioutRegressionMD from .gp_multiout_regression_md import GPMultioutRegressionMD
from .tp_regression import TPRegression from .tp_regression import TPRegression
from .multioutput_gp import MultioutputGP

View file

@ -0,0 +1,147 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import itertools
from ..core.model import Model
from ..core.parameterization.variational import VariationalPosterior
from ..core.mapping import Mapping
from .. import likelihoods
from ..likelihoods.gaussian import Gaussian
from .. import kern
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from ..util.normalizer import Standardize
from .. import util
from paramz import ObsAr
from ..core.gp import GP
from ..util.multioutput import index_to_slices
import logging
import warnings
logger = logging.getLogger("GP")
class MultioutputGP(GP):
"""
Gaussian process model for using observations from multiple likelihoods and different kernels
:param X_list: input observations in a list for each likelihood
:param Y: output observations in a list for each likelihood
:param kernel_list: kernels in a list for each likelihood
:param likelihood_list: likelihoods in a list
:param kernel_cross_covariances: Cross covariances between different likelihoods. See class MultioutputKern for more
:param inference_method: The :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference method to use for this GP
"""
def __init__(self, X_list, Y_list, kernel_list, likelihood_list, name='multioutputgp', kernel_cross_covariances={}, inference_method=None):
#Input and Output
X,Y,self.output_index = util.multioutput.build_XY(X_list,Y_list)
Ny = len(Y_list)
assert isinstance(kernel_list, list)
kernel = kern.MultioutputDerivativeKern(kernels=kernel_list, cross_covariances=kernel_cross_covariances)
assert isinstance(likelihood_list, list)
likelihood = likelihoods.MultioutputLikelihood(likelihood_list)
if inference_method is None:
if all([isinstance(l, Gaussian) for l in likelihood_list]):
inference_method = exact_gaussian_inference.ExactGaussianInference()
else:
inference_method = expectation_propagation.EP()
super(MultioutputGP, self).__init__(X,Y,kernel,likelihood, Y_metadata={'output_index':self.output_index, 'trials':np.ones(self.output_index.shape)}, inference_method = inference_method)
def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew,None)
if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).predict_noiseless(Xnew, full_cov, Y_metadata, kern)
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True):
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew,None)
if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).predict(Xnew, full_cov, Y_metadata, kern, likelihood, include_likelihood)
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None, likelihood=None):
if isinstance(X, list):
X, _, ind = util.multioutput.build_XY(X,None)
if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).predict_quantiles(X, quantiles, Y_metadata, kern, likelihood)
def predictive_gradients(self, Xnew, kern=None):
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
#if Y_metadata is None:
#Y_metadata={'output_index': ind}
return super(MultioutputGP, self).predictive_gradients(Xnew, kern)
def predictive_gradients(self, Xnew, kern=None): #XNEW IS NOT A LIST!!
"""
Compute the derivatives of the predicted latent function with respect to X*
Given a set of points at which to predict X* (size [N*,Q]), compute the
derivatives of the mean and variance. Resulting arrays are sized:
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
Note that this is not the same as computing the mean and variance of the derivative of the function!
dv_dX* -- [N*, Q], (since all outputs have the same variance)
:param X: The points at which to get the predictive gradients
:type X: np.ndarray (Xnew x self.input_dim)
:returns: dmu_dX, dv_dX
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
"""
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
slices = index_to_slices(Xnew[:,-1])
for i in range(len(slices)):
if ((self.kern.kern[i].name == 'diffKern' ) and len(slices[i])>0):
assert 0, "It is not (yet) possible to predict gradients of gradient observations, sorry :)"
if kern is None:
kern = self.kern
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1]-1,self.output_dim))
for i in range(self.output_dim):
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)[:,0:-1]
# gradients wrt the diagonal part k_{xx}
dv_dX = kern.gradients_X(np.eye(Xnew.shape[0]), Xnew)[:,0:-1]
#grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
if self.posterior.woodbury_inv.ndim == 3:
tmp = np.empty(dv_dX.shape + (self.posterior.woodbury_inv.shape[2],))
tmp[:] = dv_dX[:,:,None]
for i in range(self.posterior.woodbury_inv.shape[2]):
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv[:, :, i])
tmp[:, :, i] += kern.gradients_X(alpha, Xnew, self._predictive_variable)
else:
tmp = dv_dX
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv)
tmp += kern.gradients_X(alpha, Xnew, self._predictive_variable)[:,0:-1]
return mean_jac, tmp
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
if isinstance(x_test, list):
x_test, y_test, ind = util.multioutput.build_XY(x_test, y_test)
if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).log_predictive_density(x_test, y_test, Y_metadata)
def set_XY(self, X=None, Y=None):
if isinstance(X, list):
X, _, self.output_index = util.multioutput.build_XY(X, None)
if isinstance(Y, list):
_, Y, self.output_index = util.multioutput.build_XY(Y, Y)
self.update_model(False)
if Y is not None:
self.Y = ObsAr(Y)
self.Y_normalized = self.Y
if X is not None:
self.X = ObsAr(X)
self.Y_metadata={'output_index': self.output_index, 'trials': np.ones(self.output_index.shape)}
if isinstance(self.inference_method, expectation_propagation.EP):
self.inference_method.reset()
self.update_model(True)

View file

@ -7,7 +7,7 @@ from unittest.case import skip
import GPy import GPy
from GPy.core.parameterization.param import Param from GPy.core.parameterization.param import Param
import numpy as np import numpy as np
import random
from ..util.config import config from ..util.config import config

View file

@ -2,11 +2,12 @@ import numpy as np
import scipy import scipy
from scipy.special import cbrt from scipy.special import cbrt
from GPy.models import GradientChecker from GPy.models import GradientChecker
import random
_lim_val = np.finfo(np.float64).max _lim_val = np.finfo(np.float64).max
_lim_val_exp = np.log(_lim_val) _lim_val_exp = np.log(_lim_val)
_lim_val_square = np.sqrt(_lim_val) _lim_val_square = np.sqrt(_lim_val)
_lim_val_cube = cbrt(_lim_val) _lim_val_cube = cbrt(_lim_val)
from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside, ScaledProbit
class LinkFunctionTests(np.testing.TestCase): class LinkFunctionTests(np.testing.TestCase):
def setUp(self): def setUp(self):
@ -124,6 +125,11 @@ class LinkFunctionTests(np.testing.TestCase):
lim_of_inf = _lim_val lim_of_inf = _lim_val
self.check_gradient(link, lim_of_inf, test_lim=True) self.check_gradient(link, lim_of_inf, test_lim=True)
def test_scaledprobit_gradients(self):
link = ScaledProbit(nu=random.random())
lim_of_inf = _lim_val
self.check_gradient(link, lim_of_inf, test_lim=True)
def test_Cloglog_gradients(self): def test_Cloglog_gradients(self):
link = Cloglog() link = Cloglog()
lim_of_inf = _lim_val_exp lim_of_inf = _lim_val_exp

View file

@ -1181,6 +1181,67 @@ class GradientTests(np.testing.TestCase):
with self.assertRaises(RuntimeError): with self.assertRaises(RuntimeError):
m.posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]])) m.posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]]))
def test_multioutput_model_with_derivative_observations(self):
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
N=10
M=10
sigma=0.05
sigmader=0.05
x = np.array([np.linspace(1,10,N)]).T
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
xd = np.array([np.linspace(2,8,M)]).T
yd = fd(xd) + np.array(sigmader*np.random.normal(0,1,(M,1)))
# squared exponential kernel:
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
se_der = GPy.kern.DiffKern(se, 0)
#Then
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
gauss = GPy.likelihoods.Gaussian(variance=0.1)
gauss_der = GPy.likelihoods.Gaussian(variance=sigma**2)
# Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
# Now we have the regular observations first and derivative observations second, meaning that the kernels and
# the likelihoods must follow the same order
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss])
m.randomize()
self.assertTrue(m.checkgrad())
m.optimize(messages=0, ipython_notebook=False)
self.assertTrue(m.checkgrad())
def test_multioutput_model_with_ep(self):
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
N=10
sigma=0.05
sigmader=0.05
x = np.array([np.linspace(1,10,N)]).T
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
M=7
xd = np.array([np.linspace(2,8,M)]).T
yd = 2*(fd(xd)>0) -1
# squared exponential kernel:
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
se_der = GPy.kern.DiffKern(se, 0)
#Then
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
probit = GPy.likelihoods.Binomial(gp_link = GPy.likelihoods.link_functions.ScaledProbit(nu=100))
# Then create the model, we give everything in lists
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, probit], inference_method=GPy.inference.latent_function_inference.EP(ep_mode="nested"))
self.assertTrue(m.checkgrad())
def _create_missing_data_model(kernel, Q): def _create_missing_data_model(kernel, Q):
D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3 D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3
_, _, Ylist = GPy.examples.dimensionality_reduction._simulate_matern(D1, D2, D3, N, num_inducing, False) _, _, Ylist = GPy.examples.dimensionality_reduction._simulate_matern(D1, D2, D3, N, num_inducing, False)

View file

@ -2,6 +2,33 @@ import numpy as np
import warnings import warnings
import GPy import GPy
def index_to_slices(index):
"""
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
e.g.
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
returns
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
or, a more complicated example
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
returns
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
"""
if len(index)==0:
return[]
#contruct the return structure
ind = np.asarray(index,dtype=np.int)
ret = [[] for i in range(ind.max()+1)]
#find the switchpoints
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret
def get_slices(input_list): def get_slices(input_list):
num_outputs = len(input_list) num_outputs = len(input_list)