diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 19919f97..4af0f996 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -581,3 +581,53 @@ def warped_gp_cubic_sine(max_iters=100): m.plot(title="Standard GP") warp_m.plot_warping() pb.show() + + + +def multioutput_gp_with_derivative_observations(): + def plot_gp_vs_real(m, x, yreal, size_inputs, title, fixed_input=1, xlim=[0,11], ylim=[-1.5,3]): + fig, ax = pb.subplots() + ax.set_title(title) + pb.plot(x, yreal, "r", label='Real function') + rows = slice(0, size_inputs[0]) if fixed_input == 0 else slice(size_inputs[0], size_inputs[0]+size_inputs[1]) + m.plot(fixed_inputs=[(1, fixed_input)], which_data_rows=rows, xlim=xlim, ylim=ylim, ax=ax) + f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3 + fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2 + N=10 # Number of observations + M=10 # Number of derivative observations + Npred=100 # Number of prediction points + sigma = 0.05 # Noise of observations + sigma_der = 0.05 # Noise of derivative observations + x = np.array([np.linspace(1,10,N)]).T + y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1))) + + xd = np.array([np.linspace(2,8,M)]).T + yd = fd(xd) + np.array(sigma_der*np.random.normal(0,1,(M,1))) + + xpred = np.array([np.linspace(0,11,Npred)]).T + ypred_true = f(xpred) + ydpred_true = fd(xpred) + + # squared exponential kernel: + se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2) + # We need to generate separate kernel for the derivative observations and give the created kernel as an input: + se_der = GPy.kern.DiffKern(se, 0) + + #Then + gauss = GPy.likelihoods.Gaussian(variance=sigma**2) + gauss_der = GPy.likelihoods.Gaussian(variance=sigma_der**2) + + # Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs + # Now we have the regular observations first and derivative observations second, meaning that the kernels and + # the likelihoods must follow the same order. Crosscovariances are automatically taken car of + m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss]) + + # Optimize the model + m.optimize(messages=0, ipython_notebook=False) + + #Plot the model, the syntax is same as for multioutput models: + plot_gp_vs_real(m, xpred, ydpred_true, [x.shape[0], xd.shape[0]], title='Latent function derivatives', fixed_input=1, xlim=[0,11], ylim=[-1.5,3]) + plot_gp_vs_real(m, xpred, ypred_true, [x.shape[0], xd.shape[0]], title='Latent function', fixed_input=0, xlim=[0,11], ylim=[-1.5,3]) + + #making predictions for the values: + mu, var = m.predict_noiseless(Xnew=[xpred, np.empty((0,1))]) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 1fedb314..0b769e0a 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -43,4 +43,6 @@ from .src.sde_standard_periodic import sde_StdPeriodic from .src.sde_static import sde_White, sde_Bias from .src.sde_stationary import sde_RBF,sde_Exponential,sde_RatQuad from .src.sde_brownian import sde_Brownian -from .src.multioutput_kern import MultioutputKern \ No newline at end of file +from .src.multioutput_kern import MultioutputKern +from .src.multioutput_derivative_kern import MultioutputDerivativeKern +from .src.diff_kern import DiffKern \ No newline at end of file diff --git a/GPy/kern/src/ODE_UY.py b/GPy/kern/src/ODE_UY.py index 19fb1e94..998d823f 100644 --- a/GPy/kern/src/ODE_UY.py +++ b/GPy/kern/src/ODE_UY.py @@ -2,7 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) from .kern import Kern -from .independent_outputs import index_to_slices +from ...util.multioutput import index_to_slices from ...core.parameterization import Param from paramz.transformations import Logexp import numpy as np diff --git a/GPy/kern/src/ODE_UYC.py b/GPy/kern/src/ODE_UYC.py index 57c41767..1537c1d6 100644 --- a/GPy/kern/src/ODE_UYC.py +++ b/GPy/kern/src/ODE_UYC.py @@ -5,7 +5,7 @@ from .kern import Kern from ...core.parameterization import Param from paramz.transformations import Logexp import numpy as np -from .independent_outputs import index_to_slices +from ...util.multioutput import index_to_slices class ODE_UYC(Kern): def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., ubias =1. ,active_dims=None, name='ode_uyc'): diff --git a/GPy/kern/src/ODE_st.py b/GPy/kern/src/ODE_st.py index 0b4fecae..af63c51a 100644 --- a/GPy/kern/src/ODE_st.py +++ b/GPy/kern/src/ODE_st.py @@ -4,7 +4,7 @@ from .kern import Kern from ...core.parameterization import Param from paramz.transformations import Logexp import numpy as np -from .independent_outputs import index_to_slices +from ...util.multioutput import index_to_slices class ODE_st(Kern): diff --git a/GPy/kern/src/ODE_t.py b/GPy/kern/src/ODE_t.py index f09ab77d..539330a3 100644 --- a/GPy/kern/src/ODE_t.py +++ b/GPy/kern/src/ODE_t.py @@ -2,7 +2,7 @@ from .kern import Kern from ...core.parameterization import Param from paramz.transformations import Logexp import numpy as np -from .independent_outputs import index_to_slices +from ...util.multioutput import index_to_slices class ODE_t(Kern): diff --git a/GPy/kern/src/diff_kern.py b/GPy/kern/src/diff_kern.py new file mode 100644 index 00000000..612c0632 --- /dev/null +++ b/GPy/kern/src/diff_kern.py @@ -0,0 +1,88 @@ +# Copyright (c) 2018, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) +from .kern import Kern +import numpy as np +from paramz.caching import Cache_this + +class DiffKern(Kern): + """ + Diff kernel is a thin wrapper for using partial derivatives of kernels as kernels. Eg. in combination with + Multioutput kernel this allows the user to train GPs with observations of latent function and latent + function derivatives. NOTE: DiffKern only works when used with Multioutput kernel. Do not use the kernel as standalone + + The parameters the kernel needs are: + -'base_kern': a member of Kernel class that is used for observations + -'dimension': integer that indigates in which dimensions the partial derivative observations are + """ + def __init__(self, base_kern, dimension): + super(DiffKern, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='DiffKern') + self.base_kern = base_kern + self.dimension = dimension + + def parameters_changed(self): + self.base_kern.parameters_changed() + + @Cache_this(limit=3, ignore_args=()) + def K(self, X, X2=None, dimX2 = None): #X in dimension self.dimension + if X2 is None: + X2 = X + if dimX2 is None: + dimX2 = self.dimension + return self.base_kern.dK2_dXdX2(X,X2, self.dimension, dimX2) + + @Cache_this(limit=3, ignore_args=()) + def Kdiag(self, X): + return np.diag(self.base_kern.dK2_dXdX2(X,X, self.dimension, self.dimension)) + + @Cache_this(limit=3, ignore_args=()) + def dK_dX_wrap(self, X, X2): #X in dimension self.dimension + return self.base_kern.dK_dX(X,X2, self.dimension) + + @Cache_this(limit=3, ignore_args=()) + def dK_dX2_wrap(self, X, X2): #X in dimension self.dimension + return self.base_kern.dK_dX2(X,X2, self.dimension) + + def reset_gradients(self): + self.base_kern.reset_gradients() + + @property + def gradient(self): + return self.base_kern.gradient + + @gradient.setter + def gradient(self, gradient): + self.base_kern.gradient = gradient + + def update_gradients_full(self, dL_dK, X, X2=None, dimX2=None): + if dimX2 is None: + dimX2 = self.dimension + gradients = self.base_kern.dgradients2_dXdX2(X,X2,self.dimension,dimX2) + self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients]) + + def update_gradients_diag(self, dL_dK_diag, X): + gradients = self.base_kern.dgradients2_dXdX2(X,X, self.dimension, self.dimension) + self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK_diag, gradient, f=np.diag) for gradient in gradients]) + + def update_gradients_dK_dX(self, dL_dK, X, X2=None): + if X2 is None: + X2 = X + gradients = self.base_kern.dgradients_dX(X,X2, self.dimension) + self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients]) + + def update_gradients_dK_dX2(self, dL_dK, X, X2=None): + gradients = self.base_kern.dgradients_dX2(X,X2, self.dimension) + self.base_kern.update_gradients_direct(*[self._convert_gradients(dL_dK, gradient) for gradient in gradients]) + + def gradients_X(self, dL_dK, X, X2): + tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:,:,:, self.dimension] + return np.sum(tmp, axis=1) + + def gradients_X2(self, dL_dK, X, X2): + tmp = self.base_kern.gradients_XX(dL_dK, X, X2)[:, :, self.dimension, :] + return np.sum(tmp, axis=1) + + def _convert_gradients(self, l,g, f = lambda x:x): + if type(g) is np.ndarray: + return np.sum(f(l)*f(g)) + else: + return np.array([np.sum(f(l)*f(gi)) for gi in g]) \ No newline at end of file diff --git a/GPy/kern/src/independent_outputs.py b/GPy/kern/src/independent_outputs.py index db6f9d37..d5c0ba08 100644 --- a/GPy/kern/src/independent_outputs.py +++ b/GPy/kern/src/independent_outputs.py @@ -5,34 +5,8 @@ from .kern import CombinationKernel import numpy as np import itertools +from ...util.multioutput import index_to_slices -def index_to_slices(index): - """ - take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index. - - e.g. - >>> index = np.asarray([0,0,0,1,1,1,2,2,2]) - returns - >>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]] - - or, a more complicated example - >>> index = np.asarray([0,0,1,1,0,2,2,2,1,1]) - returns - >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]] - """ - if len(index)==0: - return[] - - #contruct the return structure - ind = np.asarray(index,dtype=np.int) - ret = [[] for i in range(ind.max()+1)] - - #find the switchpoints - ind_ = np.hstack((ind,ind[0]+ind[-1]+1)) - switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0] - - [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))] - return ret class IndependentOutputs(CombinationKernel): """ diff --git a/GPy/kern/src/kern.py b/GPy/kern/src/kern.py index 8da3fbfb..362ac418 100644 --- a/GPy/kern/src/kern.py +++ b/GPy/kern/src/kern.py @@ -116,6 +116,12 @@ class Kern(Parameterized): except: return X[:, self._all_dims_active] + def _project_dim(self, dim): + try: + return np.where(self._all_dims_active == dim)[0][0] + except: + return None + def K(self, X, X2): """ Compute the kernel function. diff --git a/GPy/kern/src/kernel_slice_operations.py b/GPy/kern/src/kernel_slice_operations.py index 327436e7..8a5dcb81 100644 --- a/GPy/kern/src/kernel_slice_operations.py +++ b/GPy/kern/src/kernel_slice_operations.py @@ -19,16 +19,26 @@ def put_clean(dct, name, func): class KernCallsViaSlicerMeta(ParametersChangedMeta): def __new__(cls, name, bases, dct): put_clean(dct, 'K', _slice_K) + put_clean(dct, 'dK_dX', _slice_dK_dX) + put_clean(dct, 'dK_dX2', _slice_dK_dX) + put_clean(dct, 'dK2_dXdX2', _slice_dK2_dXdX2) put_clean(dct, 'Kdiag', _slice_Kdiag) put_clean(dct, 'phi', _slice_Kdiag) put_clean(dct, 'update_gradients_full', _slice_update_gradients_full) put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag) + put_clean(dct, 'update_gradients_dK_dX', _slice_update_gradients_full) + put_clean(dct, 'update_gradients_dK_dX2', _slice_update_gradients_full) put_clean(dct, 'gradients_X', _slice_gradients_X) + put_clean(dct, 'gradients_X2', _slice_gradients_X) put_clean(dct, 'gradients_X_X2', _slice_gradients_X) put_clean(dct, 'gradients_XX', _slice_gradients_XX) put_clean(dct, 'gradients_XX_diag', _slice_gradients_XX_diag) put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) + put_clean(dct, 'dgradients_dX',_slice_partial_gradients_list_X) + put_clean(dct, 'dgradients_dX2',_slice_partial_gradients_list_X) + put_clean(dct, 'dgradients2_dXdX2',_slice_partial_gradients_list_XX) + put_clean(dct, 'psi0', _slice_psi) put_clean(dct, 'psi1', _slice_psi) put_clean(dct, 'psi2', _slice_psi) @@ -79,6 +89,20 @@ class _Slice_wrap(object): return ret return return_val + def handle_return_list(self, return_val): + if self.ret: + ret = [np.zeros(self.shape) for _ in range(len(return_val))] + if len(self.shape) == 3: + for i in range(len(return_val)): + ret[i][self.k._all_dims_active, :, :] = return_val[i] + #[ret[i].__setitem__((:, :, self.k._all_dims_active), return_val[i]) for i in range(len(return_val))] + elif len(self.shape) == 4: + for i in range(len(return_val)): + ret[i][np.ix_(self.k._all_dims_active, self.k._all_dims_active)] = return_val[i] + #[ret[i].__setitem__((:, :, self.k._all_dims_active, self.k._all_dims_active), return_val[i]) for i in range(len(return_val))] + return ret + return return_val + def _slice_K(f): @wraps(f) def wrap(self, X, X2 = None, *a, **kw): @@ -97,15 +121,15 @@ def _slice_Kdiag(f): def _slice_update_gradients_full(f): @wraps(f) - def wrap(self, dL_dK, X, X2=None): + def wrap(self, dL_dK, X, X2=None, *a, **kw): with _Slice_wrap(self, X, X2) as s: - ret = f(self, dL_dK, s.X, s.X2) + ret = f(self, dL_dK, s.X, s.X2, *a, **kw) return ret return wrap def _slice_update_gradients_diag(f): @wraps(f) - def wrap(self, dL_dKdiag, X): + def wrap(self, dL_dKdiag, X, *a, **kw): with _Slice_wrap(self, X, None) as s: ret = f(self, dL_dKdiag, s.X) return ret @@ -119,6 +143,99 @@ def _slice_gradients_X(f): return ret return wrap +def _slice_dK_dX(f): + @wraps(f) + def wrap(self, X, X2, dim, *a, **kw): + with _Slice_wrap(self, X, X2) as s: + d = s.k._project_dim(dim) + if d is None: + ret = np.zeros((X.shape[0], X2.shape[0])) + else: + ret = f(self, s.X, s.X2, d, *a, **kw) + return ret + return wrap + +def _slice_dK2_dXdX2(f): + @wraps(f) + def wrap(self, X, X2, dimX, dimX2, *a, **kw): + with _Slice_wrap(self, X, X2) as s: + d = s.k._project_dim(dimX) + d2 = s.k._project_dim(dimX2) + if (d is None) or (d2 is None): + ret = np.zeros((X.shape[0], X2.shape[0])) + else: + ret = f(self, s.X, s.X2, d, d2, *a, **kw) + return ret + return wrap + +def _slice_partial_gradients_X(f): + @wraps(f) + def wrap(self, X, X2, dim): + if X2 is None: + N, M = X.shape[0], X.shape[0] + else: + N, M = X.shape[0], X2.shape[0] + Q1 = X.shape[1] + with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1)) as s: + ret = s.handle_return_array(f(self, s.X, s.X2, dim)) + return ret + return wrap + +def _slice_partial_gradients_list_X(f): + @wraps(f) + def wrap(self, X, X2, dim): + if X2 is None: + N, M = X.shape[0], X.shape[0] + else: + N, M = X.shape[0], X2.shape[0] + with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s: + d = s.k._project_dim(dim) + if d is None: + ret = [np.zeros((N, M)) for i in range(s.k.size)] + else: + ret = f(self, s.X, s.X2, d) + return ret + return wrap + +def _slice_partial_gradients_XX(f): + @wraps(f) + def wrap(self, X, X2, dim, dimX2): + if X2 is None: + N, M = X.shape[0], X.shape[0] + Q1 = X.shape[1] + else: + N, M = X.shape[0], X2.shape[0] + Q1, Q2 = X.shape[1], X2.shape[1] + with _Slice_wrap(self, X, X2, ret_shape=(N, M, Q1, Q2)) as s: + ret = s.handle_return_array(f(self, s.X, s.X2, dim, dimX2)) + return ret + return wrap + +def _slice_partial_gradients_list_XX(f): + @wraps(f) + def wrap(self, X, X2, dimX, dimX2): + if X2 is None: + N, M = X.shape[0], X.shape[0] + else: + N, M = X.shape[0], X2.shape[0] + with _Slice_wrap(self, X, X2, ret_shape=(N, M)) as s: + d = s.k._project_dim(dimX) + d2 = s.k._project_dim(dimX2) + if (d is None) or (d2 is None): + ret = [np.zeros((N, M)) for i in range(s.k.size)] + else: + ret = f(self, s.X, s.X2, d, d2) + return ret + return wrap + +def _slice_gradient_derivatives_X(f): + @wraps(f) + def wrap(self, dL_dK, X, X2=None): + with _Slice_wrap(self, X, X2) as s: + ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2)) + return ret + return wrap + def _slice_gradients_X_diag(f): @wraps(f) def wrap(self, dL_dKdiag, X): diff --git a/GPy/kern/src/multioutput_derivative_kern.py b/GPy/kern/src/multioutput_derivative_kern.py new file mode 100644 index 00000000..17da4a13 --- /dev/null +++ b/GPy/kern/src/multioutput_derivative_kern.py @@ -0,0 +1,84 @@ +# Copyright (c) 2018, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +from .kern import Kern, CombinationKernel +from .multioutput_kern import MultioutputKern, ZeroKern +import numpy as np +from functools import partial + +class KernWrapper(Kern): + def __init__(self, fk, fug, fg, base_kern): + self.fk = fk + self.fug = fug + self.fg = fg + self.base_kern = base_kern + super(KernWrapper, self).__init__(base_kern.active_dims.size, base_kern.active_dims, name='KernWrapper',useGPU=False) + + def K(self, X, X2=None): + return self.fk(X,X2=X2) + + def update_gradients_full(self,dL_dK, X, X2=None): + return self.fug(dL_dK, X, X2=X2) + + def gradients_X(self,dL_dK, X, X2=None): + return self.fg(dL_dK, X, X2=X2) + + @property + def gradient(self): + return self.base_kern.gradient + + @gradient.setter + def gradient(self, gradient): + self.base_kern.gradient = gradient + +class MultioutputDerivativeKern(MultioutputKern): + """ + Multioutput derivative kernel is a meta class for combining different kernels for multioutput GPs. + Multioutput derivative kernel is only a thin wrapper for Multioutput kernel for user not having to define + cross covariances. + """ + def __init__(self, kernels, cross_covariances={}, name='MultioutputDerivativeKern'): + #kernels contains a list of kernels as input, + if not isinstance(kernels, list): + self.single_kern = True + self.kern = kernels + kernels = [kernels] + else: + self.single_kern = False + self.kern = kernels + # The combination kernel ALLWAYS puts the extra dimension last. + # Thus, the index dimension of this kernel is always the last dimension + # after slicing. This is why the index_dim is just the last column: + self.index_dim = -1 + super(MultioutputKern, self).__init__(kernels=kernels, extra_dims=[self.index_dim], name=name, link_parameters=False) + + nl = len(kernels) + + #build covariance structure + covariance = [[None for i in range(nl)] for j in range(nl)] + linked = [] + for i in range(0,nl): + unique=True + for j in range(0,nl): + if i==j or (kernels[i] is kernels[j]): + kern = kernels[i] + if i>j: + unique=False + elif cross_covariances.get((i,j)) is not None: #cross covariance is given + kern = cross_covariances.get((i,j)) + elif kernels[i].name == 'DiffKern' and kernels[i].base_kern == kernels[j]: # one is derivative of other + kern = KernWrapper(kernels[i].dK_dX_wrap,kernels[i].update_gradients_dK_dX,kernels[i].gradients_X, kernels[j]) + unique=False + elif kernels[j].name == 'DiffKern' and kernels[j].base_kern == kernels[i]: # one is derivative of other + kern = KernWrapper(kernels[j].dK_dX2_wrap,kernels[j].update_gradients_dK_dX2,kernels[j].gradients_X2, kernels[i]) + elif kernels[i].name == 'DiffKern' and kernels[j].name == 'DiffKern' and kernels[i].base_kern == kernels[j].base_kern: #both are partial derivatives + kern = KernWrapper(partial(kernels[i].K, dimX2=kernels[j].dimension), partial(kernels[i].update_gradients_full, dimX2=kernels[j].dimension),None, kernels[i].base_kern) + if i>j: + unique=False + else: + kern = ZeroKern() + covariance[i][j] = kern + if unique is True: + linked.append(i) + self.covariance = covariance + self.link_parameters(*[kernels[i] for i in linked]) \ No newline at end of file diff --git a/GPy/kern/src/multioutput_kern.py b/GPy/kern/src/multioutput_kern.py index b7feaadb..6bedeb59 100644 --- a/GPy/kern/src/multioutput_kern.py +++ b/GPy/kern/src/multioutput_kern.py @@ -1,7 +1,10 @@ +# Copyright (c) 2018, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + from .kern import Kern, CombinationKernel import numpy as np from functools import reduce, partial -from .independent_outputs import index_to_slices +from ...util.multioutput import index_to_slices from paramz.caching import Cache_this class ZeroKern(Kern): @@ -18,6 +21,13 @@ class ZeroKern(Kern): def gradients_X(self,dL_dK, X, X2=None): return np.zeros((X.shape[0],X.shape[1])) + @property + def gradient(self): + return np.empty((1,0)) + + @gradient.setter + def gradient(self, gradient): + pass class MultioutputKern(CombinationKernel): """ @@ -62,14 +72,13 @@ class MultioutputKern(CombinationKernel): unique=True for j in range(0,nl): if i==j or (kernels[i] is kernels[j]): - covariance[i][j] = {'kern': kernels[i], 'K': kernels[i].K, 'update_gradients_full': kernels[i].update_gradients_full, 'gradients_X': kernels[i].gradients_X} + covariance[i][j] = kernels[i] if i>j: unique=False elif cross_covariances.get((i,j)) is not None: #cross covariance is given covariance[i][j] = cross_covariances.get((i,j)) else: # zero covariance structure - kern = ZeroKern() - covariance[i][j] = {'kern': kern, 'K': kern.K, 'update_gradients_full': kern.update_gradients_full, 'gradients_X': kern.gradients_X} + covariance[i][j] = ZeroKern() if unique is True: linked.append(i) self.covariance = covariance @@ -82,7 +91,7 @@ class MultioutputKern(CombinationKernel): slices = index_to_slices(X[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim]) target = np.zeros((X.shape[0], X2.shape[0])) - [[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j]['K'](X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))] + [[[[ target.__setitem__((slices[i][k],slices2[j][l]), self.covariance[i][j].K(X[slices[i][k],:],X2[slices2[j][l],:])) for k in range( len(slices[i]))] for l in range(len(slices2[j])) ] for i in range(len(slices))] for j in range(len(slices2))] return target @Cache_this(limit=3, ignore_args=()) @@ -93,10 +102,10 @@ class MultioutputKern(CombinationKernel): [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)] return target - def _update_gradients_full_wrapper(self, cov_struct, dL_dK, X, X2): - gradient = cov_struct['kern'].gradient.copy() - cov_struct['update_gradients_full'](dL_dK, X, X2) - cov_struct['kern'].gradient += gradient + def _update_gradients_full_wrapper(self, kern, dL_dK, X, X2): + gradient = kern.gradient.copy() + kern.update_gradients_full(dL_dK, X, X2) + kern.gradient += gradient def _update_gradients_diag_wrapper(self, kern, dL_dKdiag, X): gradient = kern.gradient.copy() @@ -118,14 +127,14 @@ class MultioutputKern(CombinationKernel): def update_gradients_diag(self, dL_dKdiag, X): self.reset_gradients() slices = index_to_slices(X[:,self.index_dim]) - [[ self._update_gradients_diag_wrapper(self.covariance[i][i]['kern'], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))] + [[ self._update_gradients_diag_wrapper(self.covariance[i][i], dL_dKdiag[slices[i][k]], X[slices[i][k],:]) for k in range(len(slices[i]))] for i in range(len(slices))] def gradients_X(self,dL_dK, X, X2=None): slices = index_to_slices(X[:,self.index_dim]) target = np.zeros((X.shape[0], X.shape[1]) ) if X2 is not None: slices2 = index_to_slices(X2[:,self.index_dim]) - [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] + [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices2[j][l]], X[slices[i][k],:], X2[slices2[j][l],:]) ) for k in range(len(slices[i]))] for l in range(len(slices2[j]))] for i in range(len(slices))] for j in range(len(slices2))] else: - [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j]['gradients_X'](dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] + [[[[ target.__setitem__((slices[i][k]), target[slices[i][k],:] + self.covariance[i][j].gradients_X(dL_dK[slices[i][k],slices[j][l]], X[slices[i][k],:], (None if (i==j and k==l) else X[slices[j][l],:] )) ) for k in range(len(slices[i]))] for l in range(len(slices[j]))] for i in range(len(slices))] for j in range(len(slices))] return target \ No newline at end of file diff --git a/GPy/kern/src/rbf.py b/GPy/kern/src/rbf.py index c17345d8..cbabcb99 100644 --- a/GPy/kern/src/rbf.py +++ b/GPy/kern/src/rbf.py @@ -6,6 +6,7 @@ import numpy as np from .stationary import Stationary from .psi_comp import PSICOMP_RBF, PSICOMP_RBF_GPU from ...core import Param +from paramz.caching import Cache_this from paramz.transformations import Logexp from .grid_kerns import GridRBF @@ -50,6 +51,27 @@ class RBF(Stationary): def K_of_r(self, r): return self.variance * np.exp(-0.5 * r**2) + @Cache_this(limit=3, ignore_args=()) + def dK_dX(self, X, X2, dimX): + r = self._scaled_dist(X, X2) + K = self.K_of_r(r) + dist = X[:,None,dimX]-X2[None,:,dimX] + lengthscale2inv = (np.ones((X.shape[1]))/(self.lengthscale**2))[dimX] + return -1.*K*dist*lengthscale2inv + @Cache_this(limit=3, ignore_args=()) + def dK_dX2(self, X, X2, dimX2): + return -self.dK_dX(X,X2, dimX2) + + @Cache_this(limit=3, ignore_args=()) + def dK2_dXdX2(self, X, X2, dimX, dimX2): + r = self._scaled_dist(X, X2) + K = self.K_of_r(r) + if X2 is None: + X2=X + dist = X[:,None,:]-X2[None,:,:] + lengthscale2inv = np.ones((X.shape[1]))/(self.lengthscale**2) + return -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*lengthscale2inv[dimX2] + (dimX==dimX2)*K*lengthscale2inv[dimX] + def dK_dr(self, r): return -r*self.K_of_r(r) @@ -58,6 +80,74 @@ class RBF(Stationary): def dK2_drdr_diag(self): return -self.variance # as the diagonal of r is always filled with zeros + + @Cache_this(limit=3, ignore_args=()) + def dK_dvariance(self,X,X2): + return self.K(X,X2)/self.variance + + @Cache_this(limit=3, ignore_args=()) + def dK2_dvariancedX(self, X, X2, dim): + return self.dK_dX(X,X2, dim)/self.variance + + @Cache_this(limit=3, ignore_args=()) + def dK2_dvariancedX2(self, X, X2, dim): + return self.dK_dX2(X,X2, dim)/self.variance + + @Cache_this(limit=3, ignore_args=()) + def dK3_dvariancedXdX2(self, X, X2, dim, dimX2): + return self.dK2_dXdX2(X, X2, dim, dimX2)/self.variance + + @Cache_this(limit=3, ignore_args=()) + def dK2_dlengthscaledX(self, X, X2, dimX): + r = self._scaled_dist(X, X2) + K = self.K_of_r(r) + if X2 is None: + X2=X + dist = X[:,None,:]-X2[None,:,:] + lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale) + if self.ARD: + g = [] + for diml in range(X.shape[1]): + g += [-1.*K*dist[:,:,dimX]*(dist[:,:,diml]**2)*(lengthscaleinv[dimX]**2)*(lengthscaleinv[diml]**3) + 2.*dist[:,:,dimX]*(lengthscaleinv[diml]**3)*K*(dimX == diml)] + else: + g = -1.*K*dist[:,:,dimX]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**5) + 2.*dist[:,:,dimX]*(lengthscaleinv[dimX]**3)*K + return g + + @Cache_this(limit=3, ignore_args=()) + def dK2_dlengthscaledX2(self, X, X2, dimX2): + tmp = self.dK2_dlengthscaledX(X, X2, dimX2) + if self.ARD: + return [-1.*g for g in tmp] + else: + return -1*tmp + + @Cache_this(limit=3, ignore_args=()) + def dK3_dlengthscaledXdX2(self, X, X2, dimX, dimX2): + r = self._scaled_dist(X, X2) + K = self.K_of_r(r) + if X2 is None: + X2=X + dist = X[:,None,:]-X2[None,:,:] + lengthscaleinv = np.ones((X.shape[1]))/(self.lengthscale) + lengthscale2inv = lengthscaleinv**2 + if self.ARD: + g = [] + for diml in range(X.shape[1]): + tmp = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*(dist[:,:,diml]**2)*lengthscale2inv[dimX]*lengthscale2inv[dimX2]*(lengthscaleinv[diml]**3) + if dimX == dimX2: + tmp += K*lengthscale2inv[dimX]*(lengthscaleinv[diml]**3)*(dist[:,:,diml]**2) + if diml == dimX: + tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX2]*(lengthscaleinv[dimX]**3) + if diml == dimX2: + tmp += 2.*K*dist[:,:,dimX]*dist[:,:,dimX2]*lengthscale2inv[dimX]*(lengthscaleinv[dimX2]**3) + if dimX == dimX2: + tmp += -2.*K*(lengthscaleinv[dimX]**3) + g += [tmp] + else: + g = -1.*K*dist[:,:,dimX]*dist[:,:,dimX2]*np.sum(dist**2, axis=2)*(lengthscaleinv[dimX]**7) +4*K*dist[:,:,dimX]*dist[:,:,dimX2]*(lengthscaleinv[dimX]**5) + if dimX == dimX2: + g += -2.*K*(lengthscaleinv[dimX]**3) + K*(lengthscaleinv[dimX]**5)*np.sum(dist**2, axis=2) + return g def __getstate__(self): dc = super(RBF, self).__getstate__() diff --git a/GPy/kern/src/splitKern.py b/GPy/kern/src/splitKern.py index 324178d4..4441a7cf 100644 --- a/GPy/kern/src/splitKern.py +++ b/GPy/kern/src/splitKern.py @@ -4,7 +4,7 @@ A new kernel import numpy as np from .kern import Kern, CombinationKernel -from .independent_outputs import index_to_slices +from ...util.multioutput import index_to_slices import itertools class DEtime(Kern): diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py index 9fa3273d..888320a0 100644 --- a/GPy/kern/src/stationary.py +++ b/GPy/kern/src/stationary.py @@ -212,7 +212,6 @@ class Stationary(Kern): r = self._scaled_dist(X, X2) self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale - def update_gradients_direct(self, dL_dVar, dL_dLen): """ Specially intended for the Grid regression case. @@ -307,6 +306,21 @@ class Stationary(Kern): l4 = np.ones(X.shape[1])*self.lengthscale**2 return dL_dK_diag * (np.eye(X.shape[1]) * -self.dK2_drdr_diag()/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],)) #return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape) + + def dgradients_dX(self, X, X2, dimX): + g1 = self.dK2_dvariancedX(X, X2, dimX) + g2 = self.dK2_dlengthscaledX(X, X2, dimX) + return [g1, g2] + + def dgradients_dX2(self, X, X2, dimX2): + g1 = self.dK2_dvariancedX2(X, X2, dimX2) + g2 = self.dK2_dlengthscaledX2(X, X2, dimX2) + return [g1, g2] + + def dgradients2_dXdX2(self, X, X2, dimX, dimX2): + g1 = self.dK3_dvariancedXdX2(X, X2, dimX, dimX2) + g2 = self.dK3_dlengthscaledXdX2(X, X2, dimX, dimX2) + return [g1, g2] def _gradients_X_pure(self, dL_dK, X, X2=None): invdist = self._inv_dist(X, X2) diff --git a/GPy/likelihoods/binomial.py b/GPy/likelihoods/binomial.py index 61440ec9..122cbcff 100644 --- a/GPy/likelihoods/binomial.py +++ b/GPy/likelihoods/binomial.py @@ -66,7 +66,6 @@ class Binomial(Likelihood): np.testing.assert_array_equal(N.shape, y.shape) nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1) - Ny = N-y t1 = np.zeros(y.shape) t2 = np.zeros(y.shape) @@ -177,6 +176,41 @@ class Binomial(Likelihood): def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None): pass + + def moments_match_ep(self,obs,tau,v,Y_metadata_i=None): + """ + Calculation of moments using quadrature + :param obs: observed output + :param tau: cavity distribution 1st natural parameter (precision) + :param v: cavity distribution 2nd natural paramenter (mu*precision) + """ + #Compute first integral for zeroth moment. + #NOTE constant np.sqrt(2*pi/tau) added at the end of the function + if (isinstance(self.gp_link, link_functions.Probit) or isinstance(self.gp_link, link_functions.ScaledProbit)) and (Y_metadata_i is None or int(Y_metadata_i.get('trials', 1)) == int(1)): #Special case for probit likelihood. Can be found from Riihimaki et Vehtari 2010 + if isinstance(self.gp_link, link_functions.ScaledProbit): + nu = self.gp_link.nu + else: + nu = 1.0 + nu = self.gp_link.nu + mu = v/tau + sigma2 = 1./tau + t = np.asarray(1 + sigma2*(nu**2)) + t[t<1e-20] = 1e-20 + a = np.sqrt(t) + z = obs*mu/a + normc_z = max(self.gp_link.transf(z), 1e-20) + m0 = normc_z + normp_z = self.gp_link.dtransf_df(z) + m1 = mu + (obs*sigma2*normp_z)/(normc_z*a) + #print('tau: {}, v: {}, nu: {}, z: {}, normc_z: {}, normp_z: {}'.format(tau, v, nu.values, z, normc_z, normp_z)) + m2 = sigma2 - ((sigma2**2)*normp_z)/((1./(nu**2)+sigma2)*normc_z)*(z + normp_z/(nu**2)/normc_z) + #print("m0: {}, m1: {}, m2: {}".format(m0,m1,m2)) + #m0a, m1a, m2a = super(Binomial, self).moments_match_ep(obs,tau,v,Y_metadata_i) + #print("m0a: {}, m1a: {}, m2a: {}".format(m0a,m1a,m2a)) + return m0, m1, m2 + else: + return super(Binomial, self).moments_match_ep(obs,tau,v,Y_metadata_i) + def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None): if isinstance(self.gp_link, link_functions.Probit): diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py index 0eb05e74..0554bd93 100644 --- a/GPy/likelihoods/link_functions.py +++ b/GPy/likelihoods/link_functions.py @@ -138,6 +138,38 @@ class Probit(GPTransformation): input_dict["class"] = "GPy.likelihoods.link_functions.Probit" return input_dict +class ScaledProbit(Probit): + """ + .. math:: + g(f) = \\Phi^{-1} (nu*mu) + """ + def __init__(self, nu=1.): + self.nu = float(nu) + + def transf(self,f): + return std_norm_cdf(f*self.nu) + + def dtransf_df(self,f): + return std_norm_pdf(f*self.nu)*self.nu + + def d2transf_df2(self,f): + return -(f*self.nu) * std_norm_pdf(f*self.nu)*(self.nu**2) + + def d3transf_df3(self,f): + return (safe_square(f*self.nu)-1.)*std_norm_pdf(f*self.nu)*(self.nu**3) + + def to_dict(self): + """ + Convert the object into a json serializable dictionary. + + Note: It uses the private method _save_to_input_dict of the parent. + + :return dict: json serializable dictionary containing the needed information to instantiate the object + """ + + input_dict = super(ScaledProbit, self)._save_to_input_dict() + input_dict["class"] = "GPy.likelihoods.link_functions.ScaledProbit" + return input_dict class Cloglog(GPTransformation): """ diff --git a/GPy/likelihoods/multioutput_likelihood.py b/GPy/likelihoods/multioutput_likelihood.py index 2b407571..2bc43fd0 100644 --- a/GPy/likelihoods/multioutput_likelihood.py +++ b/GPy/likelihoods/multioutput_likelihood.py @@ -14,7 +14,7 @@ from .gaussian import Gaussian from ..core.parameterization import Param from paramz.transformations import Logexp from ..core.parameterization import Parameterized -from ..kern.src.independent_outputs import index_to_slices +from ..util.multioutput import index_to_slices import itertools class MultioutputLikelihood(MixedNoise): diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index f7d80c61..3e788927 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -30,3 +30,4 @@ from .gp_grid_regression import GPRegressionGrid from .gp_multiout_regression import GPMultioutRegression from .gp_multiout_regression_md import GPMultioutRegressionMD from .tp_regression import TPRegression +from .multioutput_gp import MultioutputGP diff --git a/GPy/models/multioutput_gp.py b/GPy/models/multioutput_gp.py new file mode 100644 index 00000000..2c45a0c2 --- /dev/null +++ b/GPy/models/multioutput_gp.py @@ -0,0 +1,147 @@ +# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +import itertools +from ..core.model import Model +from ..core.parameterization.variational import VariationalPosterior +from ..core.mapping import Mapping +from .. import likelihoods +from ..likelihoods.gaussian import Gaussian +from .. import kern +from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation +from ..util.normalizer import Standardize +from .. import util +from paramz import ObsAr +from ..core.gp import GP + +from ..util.multioutput import index_to_slices +import logging +import warnings +logger = logging.getLogger("GP") + +class MultioutputGP(GP): + """ + Gaussian process model for using observations from multiple likelihoods and different kernels + :param X_list: input observations in a list for each likelihood + :param Y: output observations in a list for each likelihood + :param kernel_list: kernels in a list for each likelihood + :param likelihood_list: likelihoods in a list + :param kernel_cross_covariances: Cross covariances between different likelihoods. See class MultioutputKern for more + :param inference_method: The :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference method to use for this GP + """ + def __init__(self, X_list, Y_list, kernel_list, likelihood_list, name='multioutputgp', kernel_cross_covariances={}, inference_method=None): + #Input and Output + X,Y,self.output_index = util.multioutput.build_XY(X_list,Y_list) + Ny = len(Y_list) + + assert isinstance(kernel_list, list) + kernel = kern.MultioutputDerivativeKern(kernels=kernel_list, cross_covariances=kernel_cross_covariances) + + assert isinstance(likelihood_list, list) + likelihood = likelihoods.MultioutputLikelihood(likelihood_list) + + if inference_method is None: + if all([isinstance(l, Gaussian) for l in likelihood_list]): + inference_method = exact_gaussian_inference.ExactGaussianInference() + else: + inference_method = expectation_propagation.EP() + + super(MultioutputGP, self).__init__(X,Y,kernel,likelihood, Y_metadata={'output_index':self.output_index, 'trials':np.ones(self.output_index.shape)}, inference_method = inference_method) + + def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None): + if isinstance(Xnew, list): + Xnew, _, ind = util.multioutput.build_XY(Xnew,None) + if Y_metadata is None: + Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)} + return super(MultioutputGP, self).predict_noiseless(Xnew, full_cov, Y_metadata, kern) + + def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True): + if isinstance(Xnew, list): + Xnew, _, ind = util.multioutput.build_XY(Xnew,None) + if Y_metadata is None: + Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)} + return super(MultioutputGP, self).predict(Xnew, full_cov, Y_metadata, kern, likelihood, include_likelihood) + + def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None, likelihood=None): + if isinstance(X, list): + X, _, ind = util.multioutput.build_XY(X,None) + if Y_metadata is None: + Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)} + return super(MultioutputGP, self).predict_quantiles(X, quantiles, Y_metadata, kern, likelihood) + + def predictive_gradients(self, Xnew, kern=None): + if isinstance(Xnew, list): + Xnew, _, ind = util.multioutput.build_XY(Xnew, None) + #if Y_metadata is None: + #Y_metadata={'output_index': ind} + return super(MultioutputGP, self).predictive_gradients(Xnew, kern) + + def predictive_gradients(self, Xnew, kern=None): #XNEW IS NOT A LIST!! + """ + Compute the derivatives of the predicted latent function with respect to X* + Given a set of points at which to predict X* (size [N*,Q]), compute the + derivatives of the mean and variance. Resulting arrays are sized: + dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one). + Note that this is not the same as computing the mean and variance of the derivative of the function! + dv_dX* -- [N*, Q], (since all outputs have the same variance) + :param X: The points at which to get the predictive gradients + :type X: np.ndarray (Xnew x self.input_dim) + :returns: dmu_dX, dv_dX + :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ] + """ + + if isinstance(Xnew, list): + Xnew, _, ind = util.multioutput.build_XY(Xnew, None) + + slices = index_to_slices(Xnew[:,-1]) + + for i in range(len(slices)): + if ((self.kern.kern[i].name == 'diffKern' ) and len(slices[i])>0): + assert 0, "It is not (yet) possible to predict gradients of gradient observations, sorry :)" + + if kern is None: + kern = self.kern + mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1]-1,self.output_dim)) + for i in range(self.output_dim): + mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)[:,0:-1] + + # gradients wrt the diagonal part k_{xx} + dv_dX = kern.gradients_X(np.eye(Xnew.shape[0]), Xnew)[:,0:-1] + #grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx} + if self.posterior.woodbury_inv.ndim == 3: + tmp = np.empty(dv_dX.shape + (self.posterior.woodbury_inv.shape[2],)) + tmp[:] = dv_dX[:,:,None] + for i in range(self.posterior.woodbury_inv.shape[2]): + alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv[:, :, i]) + tmp[:, :, i] += kern.gradients_X(alpha, Xnew, self._predictive_variable) + else: + tmp = dv_dX + alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv) + tmp += kern.gradients_X(alpha, Xnew, self._predictive_variable)[:,0:-1] + return mean_jac, tmp + + def log_predictive_density(self, x_test, y_test, Y_metadata=None): + if isinstance(x_test, list): + x_test, y_test, ind = util.multioutput.build_XY(x_test, y_test) + if Y_metadata is None: + Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)} + return super(MultioutputGP, self).log_predictive_density(x_test, y_test, Y_metadata) + + def set_XY(self, X=None, Y=None): + if isinstance(X, list): + X, _, self.output_index = util.multioutput.build_XY(X, None) + if isinstance(Y, list): + _, Y, self.output_index = util.multioutput.build_XY(Y, Y) + + self.update_model(False) + if Y is not None: + self.Y = ObsAr(Y) + self.Y_normalized = self.Y + if X is not None: + self.X = ObsAr(X) + + self.Y_metadata={'output_index': self.output_index, 'trials': np.ones(self.output_index.shape)} + if isinstance(self.inference_method, expectation_propagation.EP): + self.inference_method.reset() + self.update_model(True) \ No newline at end of file diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 1e11f6a6..9b9cd911 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -7,7 +7,7 @@ from unittest.case import skip import GPy from GPy.core.parameterization.param import Param import numpy as np - +import random from ..util.config import config diff --git a/GPy/testing/link_function_tests.py b/GPy/testing/link_function_tests.py index 9f41f736..8f3525b0 100644 --- a/GPy/testing/link_function_tests.py +++ b/GPy/testing/link_function_tests.py @@ -2,11 +2,12 @@ import numpy as np import scipy from scipy.special import cbrt from GPy.models import GradientChecker +import random _lim_val = np.finfo(np.float64).max _lim_val_exp = np.log(_lim_val) _lim_val_square = np.sqrt(_lim_val) _lim_val_cube = cbrt(_lim_val) -from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside +from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside, ScaledProbit class LinkFunctionTests(np.testing.TestCase): def setUp(self): @@ -123,6 +124,11 @@ class LinkFunctionTests(np.testing.TestCase): link = Probit() lim_of_inf = _lim_val self.check_gradient(link, lim_of_inf, test_lim=True) + + def test_scaledprobit_gradients(self): + link = ScaledProbit(nu=random.random()) + lim_of_inf = _lim_val + self.check_gradient(link, lim_of_inf, test_lim=True) def test_Cloglog_gradients(self): link = Cloglog() diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index adfbc837..ca39932d 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -1180,6 +1180,67 @@ class GradientTests(np.testing.TestCase): with self.assertRaises(RuntimeError): m.posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]])) + + def test_multioutput_model_with_derivative_observations(self): + f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3 + fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2 + N=10 + M=10 + sigma=0.05 + sigmader=0.05 + x = np.array([np.linspace(1,10,N)]).T + y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1))) + + xd = np.array([np.linspace(2,8,M)]).T + yd = fd(xd) + np.array(sigmader*np.random.normal(0,1,(M,1))) + + # squared exponential kernel: + se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2) + # We need to generate separate kernel for the derivative observations and give the created kernel as an input: + se_der = GPy.kern.DiffKern(se, 0) + + #Then + gauss = GPy.likelihoods.Gaussian(variance=sigma**2) + gauss = GPy.likelihoods.Gaussian(variance=0.1) + gauss_der = GPy.likelihoods.Gaussian(variance=sigma**2) + + # Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs + # Now we have the regular observations first and derivative observations second, meaning that the kernels and + # the likelihoods must follow the same order + m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss]) + m.randomize() + self.assertTrue(m.checkgrad()) + + m.optimize(messages=0, ipython_notebook=False) + + self.assertTrue(m.checkgrad()) + + def test_multioutput_model_with_ep(self): + f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3 + fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2 + N=10 + sigma=0.05 + sigmader=0.05 + x = np.array([np.linspace(1,10,N)]).T + y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1))) + + M=7 + xd = np.array([np.linspace(2,8,M)]).T + yd = 2*(fd(xd)>0) -1 + + # squared exponential kernel: + se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2) + # We need to generate separate kernel for the derivative observations and give the created kernel as an input: + se_der = GPy.kern.DiffKern(se, 0) + + #Then + gauss = GPy.likelihoods.Gaussian(variance=sigma**2) + probit = GPy.likelihoods.Binomial(gp_link = GPy.likelihoods.link_functions.ScaledProbit(nu=100)) + + # Then create the model, we give everything in lists + m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, probit], inference_method=GPy.inference.latent_function_inference.EP(ep_mode="nested")) + + self.assertTrue(m.checkgrad()) def _create_missing_data_model(kernel, Q): D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3 diff --git a/GPy/util/multioutput.py b/GPy/util/multioutput.py index 2233dbb6..91227838 100644 --- a/GPy/util/multioutput.py +++ b/GPy/util/multioutput.py @@ -2,6 +2,33 @@ import numpy as np import warnings import GPy +def index_to_slices(index): + """ + take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index. + + e.g. + >>> index = np.asarray([0,0,0,1,1,1,2,2,2]) + returns + >>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]] + + or, a more complicated example + >>> index = np.asarray([0,0,1,1,0,2,2,2,1,1]) + returns + >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]] + """ + if len(index)==0: + return[] + + #contruct the return structure + ind = np.asarray(index,dtype=np.int) + ret = [[] for i in range(ind.max()+1)] + + #find the switchpoints + ind_ = np.hstack((ind,ind[0]+ind[-1]+1)) + switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0] + + [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))] + return ret def get_slices(input_list): num_outputs = len(input_list)