diff --git a/GPy/_models/__init__.py b/GPy/_models/__init__.py new file mode 100644 index 00000000..6fc93631 --- /dev/null +++ b/GPy/_models/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +# from gp_regression import GPRegression; _gp_regression = gp_regression ; del gp_regression +# from gp_classification import GPClassification; _gp_classification = gp_classification ; del gp_classification +# from sparse_gp_regression import SparseGPRegression; _sparse_gp_regression = sparse_gp_regression ; del sparse_gp_regression +# from svigp_regression import SVIGPRegression; _svigp_regression = svigp_regression ; del svigp_regression +# from sparse_gp_classification import SparseGPClassification; _sparse_gp_classification = sparse_gp_classification ; del sparse_gp_classification +# from fitc_classification import FITCClassification; _fitc_classification = fitc_classification ; del fitc_classification +# from gplvm import GPLVM; _gplvm = gplvm ; del gplvm +# from bcgplvm import BCGPLVM; _bcgplvm = bcgplvm; del bcgplvm +# from sparse_gplvm import SparseGPLVM; _sparse_gplvm = sparse_gplvm ; del sparse_gplvm +# from warped_gp import WarpedGP; _warped_gp = warped_gp ; del warped_gp +# from bayesian_gplvm import BayesianGPLVM; _bayesian_gplvm = bayesian_gplvm ; del bayesian_gplvm +# from mrd import MRD; _mrd = mrd ; del mrd +# from gradient_checker import GradientChecker; _gradient_checker = gradient_checker ; del gradient_checker +# from gp_multioutput_regression import GPMultioutputRegression; _gp_multioutput_regression = gp_multioutput_regression ; del gp_multioutput_regression +# from sparse_gp_multioutput_regression import SparseGPMultioutputRegression; _sparse_gp_multioutput_regression = sparse_gp_multioutput_regression ; del sparse_gp_multioutput_regression + diff --git a/GPy/models/bayesian_gplvm.py b/GPy/_models/bayesian_gplvm.py similarity index 99% rename from GPy/models/bayesian_gplvm.py rename to GPy/_models/bayesian_gplvm.py index 21b46a8a..2b299ad8 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/_models/bayesian_gplvm.py @@ -2,14 +2,14 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from ..core import SparseGP +from ..core.sparse_gp import SparseGP from ..likelihoods import Gaussian from .. import kern import itertools from matplotlib.colors import colorConverter from GPy.inference.optimization import SCG from GPy.util import plot_latent, linalg -from GPy.models.gplvm import GPLVM +from .gplvm import GPLVM from GPy.util.plot_latent import most_significant_input_dimensions from matplotlib import pyplot diff --git a/GPy/models/bcgplvm.py b/GPy/_models/bcgplvm.py similarity index 100% rename from GPy/models/bcgplvm.py rename to GPy/_models/bcgplvm.py diff --git a/GPy/models/fitc_classification.py b/GPy/_models/fitc_classification.py similarity index 100% rename from GPy/models/fitc_classification.py rename to GPy/_models/fitc_classification.py diff --git a/GPy/models/gp_classification.py b/GPy/_models/gp_classification.py similarity index 100% rename from GPy/models/gp_classification.py rename to GPy/_models/gp_classification.py diff --git a/GPy/models/gp_multioutput_regression.py b/GPy/_models/gp_multioutput_regression.py similarity index 100% rename from GPy/models/gp_multioutput_regression.py rename to GPy/_models/gp_multioutput_regression.py diff --git a/GPy/models/gp_regression.py b/GPy/_models/gp_regression.py similarity index 98% rename from GPy/models/gp_regression.py rename to GPy/_models/gp_regression.py index 633fc1c8..8b44c1ba 100644 --- a/GPy/models/gp_regression.py +++ b/GPy/_models/gp_regression.py @@ -2,7 +2,6 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -import numpy as np from ..core import GP from .. import likelihoods from .. import kern diff --git a/GPy/models/gplvm.py b/GPy/_models/gplvm.py similarity index 87% rename from GPy/models/gplvm.py rename to GPy/_models/gplvm.py index 795389a7..f27f861c 100644 --- a/GPy/models/gplvm.py +++ b/GPy/_models/gplvm.py @@ -4,15 +4,11 @@ import numpy as np import pylab as pb -import sys, pdb from .. import kern -from ..core import Model -from ..util.linalg import pdinv, PCA -from ..core.priors import Gaussian as Gaussian_prior +from ..core import priors from ..core import GP from ..likelihoods import Gaussian from .. import util -from GPy.util import plot_latent class GPLVM(GP): @@ -34,12 +30,13 @@ class GPLVM(GP): kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2)) likelihood = Gaussian(Y, normalize=normalize_Y, variance=np.exp(-2.)) GP.__init__(self, X, likelihood, kernel, normalize_X=False) - self.set_prior('.*X', Gaussian_prior(0, 1)) + self.set_prior('.*X', priors.Gaussian(0, 1)) self.ensure_default_constraints() def initialise_latent(self, init, input_dim, Y): Xr = np.random.randn(Y.shape[0], input_dim) if init == 'PCA': + from ..util.linalg import PCA PC = PCA(Y, input_dim)[0] Xr[:PC.shape[0], :PC.shape[1]] = PC return Xr @@ -62,15 +59,15 @@ class GPLVM(GP): def jacobian(self,X): target = np.zeros((X.shape[0],X.shape[1],self.output_dim)) for i in range(self.output_dim): - target[:,:,i] = self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X) + target[:,:,i] = self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X) return target def magnification(self,X): target=np.zeros(X.shape[0]) J = np.zeros((X.shape[0],X.shape[1],self.output_dim)) - J=self.jacobian(X) + J=self.jacobian(X) for i in range(X.shape[0]): - target[i]=np.sqrt(pb.det(np.dot(J[i,:,:],np.transpose(J[i,:,:])))) + target[i]=np.sqrt(pb.det(np.dot(J[i,:,:],np.transpose(J[i,:,:])))) return target def plot(self): diff --git a/GPy/models/gradient_checker.py b/GPy/_models/gradient_checker.py similarity index 100% rename from GPy/models/gradient_checker.py rename to GPy/_models/gradient_checker.py diff --git a/GPy/models/mrd.py b/GPy/_models/mrd.py similarity index 99% rename from GPy/models/mrd.py rename to GPy/_models/mrd.py index 2aaa731c..b9c99a64 100644 --- a/GPy/models/mrd.py +++ b/GPy/_models/mrd.py @@ -9,8 +9,8 @@ from GPy.util.linalg import PCA import numpy import itertools import pylab -from GPy.kern.kern import kern -from GPy.models.bayesian_gplvm import BayesianGPLVM +from ..kern import kern +from bayesian_gplvm import BayesianGPLVM class MRD(Model): """ diff --git a/GPy/models/sparse_gp_classification.py b/GPy/_models/sparse_gp_classification.py similarity index 100% rename from GPy/models/sparse_gp_classification.py rename to GPy/_models/sparse_gp_classification.py diff --git a/GPy/models/sparse_gp_multioutput_regression.py b/GPy/_models/sparse_gp_multioutput_regression.py similarity index 100% rename from GPy/models/sparse_gp_multioutput_regression.py rename to GPy/_models/sparse_gp_multioutput_regression.py diff --git a/GPy/models/sparse_gp_regression.py b/GPy/_models/sparse_gp_regression.py similarity index 100% rename from GPy/models/sparse_gp_regression.py rename to GPy/_models/sparse_gp_regression.py diff --git a/GPy/models/sparse_gplvm.py b/GPy/_models/sparse_gplvm.py similarity index 96% rename from GPy/models/sparse_gplvm.py rename to GPy/_models/sparse_gplvm.py index 6e7e40b1..ab616d5a 100644 --- a/GPy/models/sparse_gplvm.py +++ b/GPy/_models/sparse_gplvm.py @@ -5,8 +5,8 @@ import numpy as np import pylab as pb import sys, pdb -from GPy.models.sparse_gp_regression import SparseGPRegression -from GPy.models.gplvm import GPLVM +from sparse_gp_regression import SparseGPRegression +from gplvm import GPLVM # from .. import kern # from ..core import model # from ..util.linalg import pdinv, PCA diff --git a/GPy/models/svigp_regression.py b/GPy/_models/svigp_regression.py similarity index 100% rename from GPy/models/svigp_regression.py rename to GPy/_models/svigp_regression.py diff --git a/GPy/models/warped_gp.py b/GPy/_models/warped_gp.py similarity index 100% rename from GPy/models/warped_gp.py rename to GPy/_models/warped_gp.py diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py index 0da93c7c..7b2c89b9 100644 --- a/GPy/core/mapping.py +++ b/GPy/core/mapping.py @@ -36,7 +36,6 @@ class Mapping(Parameterized): def df_dtheta(self, dL_df, X): """The gradient of the outputs of the multi-layer perceptron with respect to each of the parameters. - :param dL_df: gradient of the objective with respect to the function. :type dL_df: ndarray (num_data x output_dim) :param X: input locations where the function is evaluated. @@ -44,14 +43,13 @@ class Mapping(Parameterized): :returns: Matrix containing gradients with respect to parameters of each output for each input data. :rtype: ndarray (num_params length) """ - raise NotImplementedError def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue']): """ Plot the mapping. - + Plots the mapping associated with the model. - In one dimension, the function is plotted. - In two dimsensions, a contour-plot shows the function @@ -110,7 +108,7 @@ class Mapping(Parameterized): for d in range(y.shape[1]): ax.plot(Xnew, f[:, d], edgecol=linecol) - elif self.X.shape[1] == 2: + elif self.X.shape[1] == 2: resolution = resolution or 50 Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) @@ -126,7 +124,11 @@ class Mapping(Parameterized): from GPy.core.model import Model class Mapping_check_model(Model): - """This is a dummy model class used as a base class for checking that the gradients of a given mapping are implemented correctly. It enables checkgradient() to be called independently on each mapping.""" + """ + This is a dummy model class used as a base class for checking that the + gradients of a given mapping are implemented correctly. It enables + checkgradient() to be called independently on each mapping. + """ def __init__(self, mapping=None, dL_df=None, X=None): num_samples = 20 if mapping==None: @@ -135,14 +137,14 @@ class Mapping_check_model(Model): X = np.random.randn(num_samples, mapping.input_dim) if dL_df==None: dL_df = np.ones((num_samples, mapping.output_dim)) - + self.mapping=mapping self.X = X self.dL_df = dL_df self.num_params = self.mapping.num_params Model.__init__(self) - + def _get_params(self): return self.mapping._get_params() @@ -157,7 +159,7 @@ class Mapping_check_model(Model): def _log_likelihood_gradients(self): raise NotImplementedError, "This needs to be implemented to use the Mapping_check_model class." - + class Mapping_check_df_dtheta(Mapping_check_model): """This class allows gradient checks for the gradient of a mapping with respect to parameters. """ def __init__(self, mapping=None, dL_df=None, X=None): @@ -175,13 +177,13 @@ class Mapping_check_df_dX(Mapping_check_model): if dL_df==None: dL_df = np.ones((self.X.shape[0],self.mapping.output_dim)) self.num_params = self.X.shape[0]*self.mapping.input_dim - + def _log_likelihood_gradients(self): return self.mapping.df_dX(self.dL_df, self.X).flatten() def _get_param_names(self): return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])] - + def _get_params(self): return self.X.flatten() diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 666209f9..cdd69ab5 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -6,8 +6,8 @@ from matplotlib import pyplot as plt, cm import GPy from GPy.core.transformations import logexp -from GPy.models.bayesian_gplvm import BayesianGPLVM from GPy.likelihoods.gaussian import Gaussian +from GPy.models import BayesianGPLVM default_seed = np.random.seed(123344) diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index 44f4ae3f..05eaa028 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -5,6 +5,7 @@ import numpy as np from kern import kern import parts + def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False): """ Construct an RBF kernel @@ -149,33 +150,6 @@ def white(input_dim,variance=1.): part = parts.white.White(input_dim,variance) return kern(input_dim, [part]) -def eq_ode1(output_dim, W=None, rank=1, kappa=None, length_scale=1., decay=None, delay=None): - """Covariance function for first order differential equation driven by an exponentiated quadratic covariance. - - This outputs of this kernel have the form - .. math:: - \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t) - - where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance. - - :param output_dim: number of outputs driven by latent function. - :type output_dim: int - :param W: sensitivities of each output to the latent driving function. - :type W: ndarray (output_dim x rank). - :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance. - :type rank: int - :param decay: decay rates for the first order system. - :type decay: array of length output_dim. - :param delay: delay between latent force and output response. - :type delay: array of length output_dim. - :param kappa: diagonal term that allows each latent output to have an independent component to the response. - :type kappa: array of length output_dim. - - .. Note: see first order differential equation examples in GPy.examples.regression for some usage. - """ - part = parts.eq_ode1.Eq_ode1(output_dim, W, rank, kappa, length_scale, decay, delay) - return kern(2, [part]) - def exponential(input_dim,variance=1., lengthscale=None, ARD=False): """ @@ -292,8 +266,8 @@ except ImportError: if sympy_available: from parts.sympykern import spkern from sympy.parsing.sympy_parser import parse_expr - from GPy.util.symbolic import sinc - + from GPy.util import symbolic + def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.): """ Radial Basis Function covariance. @@ -313,9 +287,19 @@ if sympy_available: f = variance*sp.exp(-dist/(2*lengthscale**2)) return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')]) - def eq_sympy(input_dim, output_dim, ARD=False, variance=1., lengthscale=1.): + def eq_sympy(input_dim, output_dim, ARD=False): """ - Exponentiated quadratic with multiple outputs. + Latent force model covariance, exponentiated quadratic with multiple outputs. Derived from a diffusion equation with the initial spatial condition layed down by a Gaussian process with lengthscale given by shared_lengthscale. + + See IEEE Trans Pattern Anal Mach Intell. 2013 Nov;35(11):2693-705. doi: 10.1109/TPAMI.2013.86. Linear latent force models using Gaussian processes. Alvarez MA, Luengo D, Lawrence ND. + + :param input_dim: Dimensionality of the kernel + :type input_dim: int + :param output_dim: number of outputs in the covariance function. + :type output_dim: int + :param ARD: whether or not to user ARD (default False). + :type ARD: bool + """ real_input_dim = input_dim if output_dim>1: @@ -326,7 +310,7 @@ if sympy_available: if ARD: lengthscales = [sp.var('lengthscale%i_i lengthscale%i_j' % i, positive=True) for i in range(real_input_dim)] shared_lengthscales = [sp.var('shared_lengthscale%i' % i, positive=True) for i in range(real_input_dim)] - dist_string = ' + '.join(['(x_%i-z_%i)**2/(shared_lengthscale%i**2 + lengthscale%i_i*lengthscale%i_j)' % (i, i, i) for i in range(real_input_dim)]) + dist_string = ' + '.join(['(x_%i-z_%i)**2/(shared_lengthscale%i**2 + lengthscale%i_i**2 + lengthscale%i_j**2)' % (i, i, i) for i in range(real_input_dim)]) dist = parse_expr(dist_string) f = variance*sp.exp(-dist/2.) else: @@ -337,26 +321,25 @@ if sympy_available: f = scale_i*scale_j*sp.exp(-dist/(2*(lengthscale_i**2 + lengthscale_j**2 + shared_lengthscale**2))) return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')]) - def sinc(input_dim, ARD=False, variance=1., lengthscale=1.): + def ode1_eq(output_dim=1): """ - TODO: Not clear why this isn't working, suggests argument of sinc is not a number. - sinc covariance funciton + Latent force model covariance, first order differential + equation driven by exponentiated quadratic. + + See N. D. Lawrence, G. Sanguinetti and M. Rattray. (2007) + 'Modelling transcriptional regulation using Gaussian + processes' in B. Schoelkopf, J. C. Platt and T. Hofmann (eds) + Advances in Neural Information Processing Systems, MIT Press, + Cambridge, MA, pp 785--792. + + :param output_dim: number of outputs in the covariance function. + :type output_dim: int """ - X = sp.symbols('x_:' + str(input_dim)) - Z = sp.symbols('z_:' + str(input_dim)) - variance = sp.var('variance',positive=True) - if ARD: - lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)] - dist_string = ' + '.join(['(x_%i-z_%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)]) - dist = parse_expr(dist_string) - f = variance*sinc(sp.pi*sp.sqrt(dist)) - else: - lengthscale = sp.var('lengthscale',positive=True) - dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(input_dim)]) - dist = parse_expr(dist_string) - f = variance*sinc(sp.pi*sp.sqrt(dist)/lengthscale) - - return kern(input_dim, [spkern(input_dim, f, name='sinc')]) + input_dim = 2 + x_0, z_0, decay_i, decay_j, scale_i, scale_j, lengthscale = sp.symbols('x_0, z_0, decay_i, decay_j, scale_i, scale_j, lengthscale') + f = scale_i*scale_j*(symbolic.h(x_0, z_0, decay_i, decay_j, lengthscale) + + symbolic.h(z_0, x_0, decay_j, decay_i, lengthscale)) + return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='ode1_eq')]) def sympykern(input_dim, k=None, output_dim=1, name=None, param=None): """ @@ -600,3 +583,20 @@ def ODE_1(input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthsc """ part = parts.ODE_1.ODE_1(input_dim, varianceU, varianceY, lengthscaleU, lengthscaleY) return kern(input_dim, [part]) + +def ODE_UY(input_dim=2, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None): + """ + kernel resultiong from a first order ODE with OU driving GP + :param input_dim: the number of input dimension, has to be equal to one + :type input_dim: int + :param input_lengthU: the number of input U length + :param varianceU: variance of the driving GP + :type varianceU: float + :param varianceY: 'variance' of the transfer function + :type varianceY: float + :param lengthscaleY: 'lengthscale' of the transfer function + :type lengthscaleY: float + :rtype: kernel object + """ + part = parts.ODE_UY.ODE_UY(input_dim, varianceU, varianceY, lengthscaleU, lengthscaleY) + return kern(input_dim, [part]) diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 619d1687..46bb01c8 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -412,6 +412,9 @@ class kern(Parameterized): [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] return self._transform_gradients(target) + def dpsi0_dZ(self, dL_dpsi0, Z, mu, S): + return np.zeros_like(Z) + def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] @@ -456,73 +459,123 @@ class kern(Parameterized): from parts.linear import Linear from parts.fixed import Fixed - for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self.parts, self.param_slices), 2): + for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self.parts, self.input_slices), 2): # white doesn;t combine with anything if isinstance(p1, White) or isinstance(p2, White): pass # rbf X bias elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): - target += 2 * p1.variance * (p2._psi1[:, :, None] + p2._psi1[:, None, :]) + target += p1.variance * (p2._psi1[:, :, None] + p2._psi1[:, None, :]) elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): - tmp1 = p2.variance * (p1._psi1[:, :, None] + p1._psi1[:, None, :]) - renorm = p1.variance*np.exp() target += p2.variance * (p1._psi1[:, :, None] + p1._psi1[:, None, :]) # linear X bias - elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, Linear): + elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (Linear, RBF, RBFInv)): tmp = np.zeros((mu.shape[0], Z.shape[0])) p2.psi1(Z, mu, S, tmp) target += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) - elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, Linear): + elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (Linear, RBF, RBFInv)): tmp = np.zeros((mu.shape[0], Z.shape[0])) p1.psi1(Z, mu, S, tmp) target += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) # rbf X any - elif isinstance(p1, (RBF, RBFInv)): - pass - elif isinstance(p2, (RBF, RBFInv)): - raise NotImplementedError # TODO + elif False:#isinstance(p1, (RBF, RBFInv)) or isinstance(p2, (RBF, RBFInv)): + if isinstance(p2, (RBF, RBFInv)) and not isinstance(p1, (RBF, RBFInv)): + p1t = p1; p1 = p2; p2 = p1t; del p1t + N, M = mu.shape[0], Z.shape[0]; NM=N*M + psi11 = np.zeros((N, M)) + psi12 = np.zeros((NM, M)) + p1.psi1(Z, mu, S, psi11) + Mu, Sigma = p1._crossterm_mu_S(Z, mu, S) + Mu, Sigma = Mu.reshape(NM,self.input_dim), Sigma.reshape(NM,self.input_dim) + + p2.psi1(Z, Mu, Sigma, psi12) + eK2 = psi12.reshape(N, M, M) + crossterms = eK2 * (psi11[:, :, None] + psi11[:, None, :]) + target += crossterms + #import ipdb;ipdb.set_trace() else: raise NotImplementedError, "psi2 cannot be computed for this kernel" - return target + return target def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): target = np.zeros(self.num_params) [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] + from parts.white import White + from parts.rbf import RBF + from parts.rbf_inv import RBFInv + from parts.bias import Bias + from parts.linear import Linear + from parts.fixed import Fixed + # compute the "cross" terms # TODO: better looping, input_slices for i1, i2 in itertools.combinations(range(len(self.parts)), 2): p1, p2 = self.parts[i1], self.parts[i2] -# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] - ps1, ps2 = self.param_slices[i1], self.param_slices[i2] - - # white doesn;t combine with anything - if p1.name == 'white' or p2.name == 'white': + #ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] + ps1, ps2 = self.param_slices[i1], self.param_slices[i2] + if isinstance(p1, White) or isinstance(p2, White): pass # rbf X bias - elif p1.name == 'bias' and p2.name == 'rbf': + elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2._psi1 * 2., Z, mu, S, target[ps1]) - elif p2.name == 'bias' and p1.name == 'rbf': + elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1]) p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2]) # linear X bias - elif p1.name == 'bias' and p2.name == 'linear': + elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, Linear): p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1]) psi1 = np.zeros((mu.shape[0], Z.shape[0])) p2.psi1(Z, mu, S, psi1) p1.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps1]) - elif p2.name == 'bias' and p1.name == 'linear': + elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, Linear): p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1]) psi1 = np.zeros((mu.shape[0], Z.shape[0])) p1.psi1(Z, mu, S, psi1) p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2]) # rbf X any - - elif p1.name == 'linear' and p2.name == 'rbf': - raise NotImplementedError # TODO - elif p2.name == 'linear' and p1.name == 'rbf': - raise NotImplementedError # TODO + elif False:#isinstance(p1, (RBF, RBFInv)) or isinstance(p2, (RBF, RBFInv)): + if isinstance(p2, (RBF, RBFInv)) and not isinstance(p1, (RBF, RBFInv)): + # turn around to have rbf in front + p1, p2 = self.parts[i2], self.parts[i1] + ps1, ps2 = self.param_slices[i2], self.param_slices[i1] + + N, M = mu.shape[0], Z.shape[0]; NM=N*M + + psi11 = np.zeros((N, M)) + p1.psi1(Z, mu, S, psi11) + + Mu, Sigma = p1._crossterm_mu_S(Z, mu, S) + Mu, Sigma = Mu.reshape(NM,self.input_dim), Sigma.reshape(NM,self.input_dim) + + tmp1 = np.zeros_like(target[ps1]) + tmp2 = np.zeros_like(target[ps2]) +# for n in range(N): +# for m in range(M): +# for m_prime in range(M): +# p1.dpsi1_dtheta((dL_dpsi2[n:n+1,m:m+1,m_prime:m_prime+1]*psi12_t.reshape(N,M,M)[n:n+1,m:m+1,m_prime:m_prime+1])[0], Z[m:m+1], mu[n:n+1], S[n:n+1], tmp2)#Z[m_prime:m_prime+1], mu[n:n+1], S[n:n+1], tmp2) +# p1.dpsi1_dtheta((dL_dpsi2[n:n+1,m:m+1,m_prime:m_prime+1]*psi12_t.reshape(N,M,M)[n:n+1,m_prime:m_prime+1,m:m+1])[0], Z[m_prime:m_prime+1], mu[n:n+1], S[n:n+1], tmp2) +# Mu, Sigma= Mu.reshape(N,M,self.input_dim), Sigma.reshape(N,M,self.input_dim) +# p2.dpsi1_dtheta((dL_dpsi2[n:n+1,m:m+1,m_prime:m_prime+1]*(psi11[n:n+1,m_prime:m_prime+1]))[0], Z[m:m+1], Mu[n:n+1,m], Sigma[n:n+1,m], target[ps2]) +# p2.dpsi1_dtheta((dL_dpsi2[n:n+1,m:m+1,m_prime:m_prime+1]*(psi11[n:n+1,m:m+1]))[0], Z[m_prime:m_prime+1], Mu[n:n+1, m_prime], Sigma[n:n+1, m_prime], target[ps2])#Z[m_prime:m_prime+1], Mu[n+m:(n+m)+1], Sigma[n+m:(n+m)+1], target[ps2]) + + if isinstance(p1, RBF) and isinstance(p2, RBF): + psi12 = np.zeros((N, M)) + p2.psi1(Z, mu, S, psi12) + Mu2, Sigma2 = p2._crossterm_mu_S(Z, mu, S) + Mu2, Sigma2 = Mu2.reshape(NM,self.input_dim), Sigma2.reshape(NM,self.input_dim) + p1.dpsi1_dtheta((dL_dpsi2*(psi12[:,:,None] + psi12[:,None,:])).reshape(NM,M), Z, Mu2, Sigma2, tmp1) + pass + + if isinstance(p1, RBF) and isinstance(p2, Linear): + #import ipdb;ipdb.set_trace() + pass + + p2.dpsi1_dtheta((dL_dpsi2*(psi11[:,:,None] + psi11[:,None,:])).reshape(NM,M), Z, Mu, Sigma, tmp2) + + target[ps1] += tmp1 + target[ps2] += tmp2 else: raise NotImplementedError, "psi2 cannot be computed for this kernel" @@ -532,61 +585,102 @@ class kern(Parameterized): target = np.zeros_like(Z) [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + from parts.white import White + from parts.rbf import RBF + from parts.rbf_inv import RBFInv + from parts.bias import Bias + from parts.linear import Linear + from parts.fixed import Fixed + # compute the "cross" terms - # TODO: we need input_slices here. + # TODO: better looping, input_slices for p1, p2 in itertools.combinations(self.parts, 2): - # white doesn;t combine with anything - if p1.name == 'white' or p2.name == 'white': + if isinstance(p1, White) or isinstance(p2, White): pass # rbf X bias - elif p1.name == 'bias' and p2.name == 'rbf': - p2.dpsi1_dX(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target) - elif p2.name == 'bias' and p1.name == 'rbf': - p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target) + elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): + p2.dpsi1_dZ(dL_dpsi2.sum(1) * p1.variance, Z, mu, S, target) + elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): + p1.dpsi1_dZ(dL_dpsi2.sum(1) * p2.variance, Z, mu, S, target) # linear X bias - elif p1.name == 'bias' and p2.name == 'linear': - p2.dpsi1_dZ(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target) - elif p2.name == 'bias' and p1.name == 'linear': - p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target) - # rbf X linear - elif p1.name == 'linear' and p2.name == 'rbf': - raise NotImplementedError # TODO - elif p2.name == 'linear' and p1.name == 'rbf': - raise NotImplementedError # TODO + elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, Linear): + p2.dpsi1_dZ(dL_dpsi2.sum(1) * p1.variance, Z, mu, S, target) + elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, Linear): + p1.dpsi1_dZ(dL_dpsi2.sum(1) * p2.variance, Z, mu, S, target) + # rbf X any + elif False:#isinstance(p1, (RBF, RBFInv)) or isinstance(p2, (RBF, RBFInv)): + if isinstance(p2, (RBF, RBFInv)) and not isinstance(p1, (RBF, RBFInv)): + p1t = p1; p1 = p2; p2 = p1t; del p1t + N, M = mu.shape[0], Z.shape[0]; NM=N*M + psi11 = np.zeros((N, M)) + psi12 = np.zeros((NM, M)) + #psi12_t = np.zeros((N,M)) + + p1.psi1(Z, mu, S, psi11) + Mu, Sigma = p1._crossterm_mu_S(Z, mu, S) + Mu, Sigma = Mu.reshape(NM,self.input_dim), Sigma.reshape(NM,self.input_dim) + + p2.psi1(Z, Mu, Sigma, psi12) + tmp1 = np.zeros_like(target) + p1.dpsi1_dZ((dL_dpsi2*psi12.reshape(N,M,M)).sum(1), Z, mu, S, tmp1) + p1.dpsi1_dZ((dL_dpsi2*psi12.reshape(N,M,M)).sum(2), Z, mu, S, tmp1) + target += tmp1 + + #p2.dpsi1_dtheta((dL_dpsi2*(psi11[:,:,None] + psi11[:,None,:])).reshape(NM,M), Z, Mu, Sigma, target) + p2.dpsi1_dZ((dL_dpsi2*(psi11[:,:,None] + psi11[:,None,:])).reshape(NM,M), Z, Mu, Sigma, target) else: raise NotImplementedError, "psi2 cannot be computed for this kernel" - - return target * 2. + return target * 2 def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S): target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] + from parts.white import White + from parts.rbf import RBF + from parts.rbf_inv import RBFInv + from parts.bias import Bias + from parts.linear import Linear + from parts.fixed import Fixed + # compute the "cross" terms - # TODO: we need input_slices here. + # TODO: better looping, input_slices for p1, p2 in itertools.combinations(self.parts, 2): - # white doesn;t combine with anything - if p1.name == 'white' or p2.name == 'white': + if isinstance(p1, White) or isinstance(p2, White): pass # rbf X bias - elif p1.name == 'bias' and p2.name == 'rbf': - p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S) - elif p2.name == 'bias' and p1.name == 'rbf': - p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S) + elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): + p2.dpsi1_dmuS(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target_mu, target_S) + elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): + p1.dpsi1_dmuS(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target_mu, target_S) # linear X bias - elif p1.name == 'bias' and p2.name == 'linear': - p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S) - elif p2.name == 'bias' and p1.name == 'linear': - p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S) - # rbf X linear - elif p1.name == 'linear' and p2.name == 'rbf': - raise NotImplementedError # TODO - elif p2.name == 'linear' and p1.name == 'rbf': - raise NotImplementedError # TODO + elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, Linear): + p2.dpsi1_dmuS(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target_mu, target_S) + elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, Linear): + p1.dpsi1_dmuS(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target_mu, target_S) + # rbf X any + elif False:#isinstance(p1, (RBF, RBFInv)) or isinstance(p2, (RBF, RBFInv)): + if isinstance(p2, (RBF, RBFInv)) and not isinstance(p1, (RBF, RBFInv)): + p1t = p1; p1 = p2; p2 = p1t; del p1t + N, M = mu.shape[0], Z.shape[0]; NM=N*M + psi11 = np.zeros((N, M)) + psi12 = np.zeros((NM, M)) + #psi12_t = np.zeros((N,M)) + + p1.psi1(Z, mu, S, psi11) + Mu, Sigma = p1._crossterm_mu_S(Z, mu, S) + Mu, Sigma = Mu.reshape(NM,self.input_dim), Sigma.reshape(NM,self.input_dim) + + p2.psi1(Z, Mu, Sigma, psi12) + p1.dpsi1_dmuS((dL_dpsi2*psi12.reshape(N,M,M)).sum(1), Z, mu, S, target_mu, target_S) + p1.dpsi1_dmuS((dL_dpsi2*psi12.reshape(N,M,M)).sum(2), Z, mu, S, target_mu, target_S) + + #p2.dpsi1_dtheta((dL_dpsi2*(psi11[:,:,None] + psi11[:,None,:])).reshape(NM,M), Z, Mu, Sigma, target) + p2.dpsi1_dmuS((dL_dpsi2*(psi11[:,:,None])).sum(1)*2, Z, Mu.reshape(N,M,self.input_dim).sum(1), Sigma.reshape(N,M,self.input_dim).sum(1), target_mu, target_S) else: raise NotImplementedError, "psi2 cannot be computed for this kernel" - return target_mu, target_S + def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs): if which_parts == 'all': which_parts = [True] * self.num_parts @@ -653,7 +747,7 @@ class Kern_check_model(Model): if kernel==None: kernel = GPy.kern.rbf(1) if X==None: - X = np.random.randn(num_samples, kernel.input_dim) + X = np.random.normal(size=(num_samples, kernel.input_dim)) if dL_dK==None: if X2==None: dL_dK = np.ones((X.shape[0], X.shape[0])) @@ -750,7 +844,7 @@ class Kern_check_dKdiag_dX(Kern_check_model): def _set_params(self, x): self.X=x.reshape(self.X.shape) -def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): +def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False, X_positive=False): """This function runs on kernels to check the correctness of their implementation. It checks that the covariance function is positive definite for a randomly generated data set. :param kern: the kernel to be tested. @@ -764,12 +858,16 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): pass_checks = True if X==None: X = np.random.randn(10, kern.input_dim) + if X_positive: + X = abs(X) if output_ind is not None: - X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) + X[:, output_ind] = np.random.randint(kern.parts[0].output_dim, X.shape[0]) if X2==None: X2 = np.random.randn(20, kern.input_dim) + if X_positive: + X2 = abs(X2) if output_ind is not None: - X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) + X2[:, output_ind] = np.random.randint(kern.parts[0].output_dim, X2.shape[0]) if verbose: print("Checking covariance function is positive definite.") diff --git a/GPy/kern/parts/ODE_UY.py b/GPy/kern/parts/ODE_UY.py new file mode 100644 index 00000000..8e0096d2 --- /dev/null +++ b/GPy/kern/parts/ODE_UY.py @@ -0,0 +1,253 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +from kernpart import Kernpart +import numpy as np + +def index_to_slices(index): + """ + take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index. + + e.g. + >>> index = np.asarray([0,0,0,1,1,1,2,2,2]) + returns + >>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]] + + or, a more complicated example + >>> index = np.asarray([0,0,1,1,0,2,2,2,1,1]) + returns + >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]] + """ + + #contruct the return structure + ind = np.asarray(index,dtype=np.int64) + ret = [[] for i in range(ind.max()+1)] + + #find the switchpoints + ind_ = np.hstack((ind,ind[0]+ind[-1]+1)) + switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0] + + [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))] + return ret + +class ODE_UY(Kernpart): + """ + kernel resultiong from a first order ODE with OU driving GP + + :param input_dim: the number of input dimension, has to be equal to one + :type input_dim: int + :param input_lengthU: the number of input U length + :type input_dim: int + :param varianceU: variance of the driving GP + :type varianceU: float + :param lengthscaleU: lengthscale of the driving GP (sqrt(3)/lengthscaleU) + :type lengthscaleU: float + :param varianceY: 'variance' of the transfer function + :type varianceY: float + :param lengthscaleY: 'lengthscale' of the transfer function (1/lengthscaleY) + :type lengthscaleY: float + :rtype: kernel object + + """ + + + + + def __init__(self, input_dim=2,varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None): + assert input_dim==2, "Only defined for input_dim = 1" + self.input_dim = input_dim + self.num_params = 4 + self.name = 'ODE_UY' + + + if lengthscaleU is not None: + lengthscaleU = np.asarray(lengthscaleU) + assert lengthscaleU.size == 1, "lengthscaleU should be one dimensional" + else: + lengthscaleU = np.ones(1) + if lengthscaleY is not None: + lengthscaleY = np.asarray(lengthscaleY) + assert lengthscaleY.size == 1, "lengthscaleY should be one dimensional" + else: + lengthscaleY = np.ones(1) + #lengthscaleY = 0.5 + self._set_params(np.hstack((varianceU, varianceY, lengthscaleU,lengthscaleY))) + + def _get_params(self): + """return the value of the parameters.""" + return np.hstack((self.varianceU,self.varianceY, self.lengthscaleU,self.lengthscaleY)) + + def _set_params(self, x): + """set the value of the parameters.""" + assert x.size == self.num_params + + self.varianceU = x[0] + self.varianceY = x[1] + self.lengthscaleU = x[2] + self.lengthscaleY = x[3] + + + def _get_param_names(self): + """return parameter names.""" + return ['varianceU','varianceY', 'lengthscaleU', 'lengthscaleY'] + + + def K(self, X, X2, target): + """Compute the covariance matrix between X and X2.""" + + X,slices = X[:,:-1],index_to_slices(X[:,-1]) + if X2 is None: + X2,slices2 = X,slices + else: + X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) + + + #rdist = X[:,0][:,None] - X2[:,0][:,None].T + rdist = X - X2.T + ly=1/self.lengthscaleY + lu=np.sqrt(3)/self.lengthscaleU + #iu=self.input_lengthU #dimention of U + + Vu=self.varianceU + Vy=self.varianceY + + kuu = lambda dist:Vu * (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist)) + + k1 = lambda dist:np.exp(-ly*np.abs(dist))*(2*lu+ly)/(lu+ly)**2 + k2 = lambda dist:(np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 + k3 = lambda dist:np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 ) + kyy = lambda dist:Vu*Vy*(k1(dist) + k2(dist) + k3(dist)) + + kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly))) + kyup = lambda dist:Vu*Vy*(k1(dist)+k2(dist)) #t>0 kyu + kyun = lambda dist:Vu*Vy*(kyu3(dist)) #t<0 kyu + + kuyp = lambda dist:Vu*Vy*(kyu3(dist)) #t>0 kuy + kuyn = lambda dist:Vu*Vy*(k1(dist)+k2(dist)) #t<0 kuy + + for i, s1 in enumerate(slices): + for j, s2 in enumerate(slices2): + for ss1 in s1: + for ss2 in s2: + if i==0 and j==0: + target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2])) + elif i==0 and j==1: + target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) ) + elif i==1 and j==1: + target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2])) + else: + target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) ) + + + #KUU = kuu(np.abs(rdist[:iu,:iu])) + + #KYY = kyy(np.abs(rdist[iu:,iu:])) + + #KYU = np.where(rdist[iu:,:iu]>0,kyup(np.abs(rdist[iu:,:iu])),kyun(np.abs(rdist[iu:,:iu]) )) + + #KUY = np.where(rdist[:iu,iu:]>0,kuyp(np.abs(rdist[:iu,iu:])),kuyn(np.abs(rdist[:iu,iu:]) )) + + #ker=np.vstack((np.hstack([KUU,KUY]),np.hstack([KYU,KYY]))) + + #np.add(ker, target, target) + + def Kdiag(self, X, target): + """Compute the diagonal of the covariance matrix associated to X.""" + ly=1/self.lengthscaleY + lu=np.sqrt(3)/self.lengthscaleU + #ly=self.lengthscaleY + #lu=self.lengthscaleU + + k1 = (2*lu+ly)/(lu+ly)**2 + k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2 + k3 = 1/(lu+ly) + (lu)/(lu+ly)**2 + + slices = index_to_slices(X[:,-1]) + + for i, ss1 in enumerate(slices): + for s1 in ss1: + if i==0: + target[s1]+= self.varianceU + elif i==1: + target[s1]+= self.varianceU*self.varianceY*(k1+k2+k3) + else: + raise ValueError, "invalid input/output index" + + #target[slices[0][0]]+= self.varianceU #matern32 diag + #target[slices[1][0]]+= self.varianceU*self.varianceY*(k1+k2+k3) # diag + + + + + + + def dK_dtheta(self, dL_dK, X, X2, target): + """derivative of the covariance matrix with respect to the parameters.""" + if X2 is None: X2 = X + dist = np.abs(X - X2.T) + + ly=1/self.lengthscaleY + lu=np.sqrt(3)/self.lengthscaleU + #ly=self.lengthscaleY + #lu=self.lengthscaleU + + dk1theta1 = lambda dist: np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3 + #c=np.sqrt(3) + #t1=c/lu + #t2=1/ly + #dk1theta1=np.exp(-dist*ly)*t2*( (2*c*t2+2*t1)/(c*t2+t1)**2 -2*(2*c*t2*t1+t1**2)/(c*t2+t1)**3 ) + + dk2theta1 = lambda dist: 1*( + np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2) + +np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3) + +np.exp(-dist*ly)*2*(ly-lu)**(-2) + +np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3) + ) + + dk3theta1 = lambda dist: np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist) + + dktheta1 = lambda dist: self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1) + + + + + dk1theta2 = lambda dist: np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) ) + + dk2theta2 =lambda dist: 1*( + np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) ) + +np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) ) + ) + + dk3theta2 = lambda dist: np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3 + + dktheta2 = lambda dist: self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2) + + + + k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2 + k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 + k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 ) + dkdvar = k1+k2+k3 + + target[0] += np.sum(self.varianceY*dkdvar * dL_dK) + target[1] += np.sum(self.varianceU*dkdvar * dL_dK) + target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK) + target[3] += np.sum(dktheta2*(-self.lengthscaleY**(-2)) * dL_dK) + + + # def dKdiag_dtheta(self, dL_dKdiag, X, target): + # """derivative of the diagonal of the covariance matrix with respect to the parameters.""" + # # NB: derivative of diagonal elements wrt lengthscale is 0 + # target[0] += np.sum(dL_dKdiag) + + # def dK_dX(self, dL_dK, X, X2, target): + # """derivative of the covariance matrix with respect to X.""" + # if X2 is None: X2 = X + # dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] + # ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) + # dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2)) + # target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) + + # def dKdiag_dX(self, dL_dKdiag, X, target): + # pass diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py index 0a758f1e..d8e7f8e6 100644 --- a/GPy/kern/parts/__init__.py +++ b/GPy/kern/parts/__init__.py @@ -14,6 +14,7 @@ import Matern32 import Matern52 import mlp import ODE_1 +import ODE_UY import periodic_exponential import periodic_Matern32 import periodic_Matern52 @@ -26,4 +27,5 @@ import rbf import rbf_inv import spline import symmetric +import sympy_helpers import white diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py index 585d687f..dbc689d5 100644 --- a/GPy/kern/parts/rbf.py +++ b/GPy/kern/parts/rbf.py @@ -186,7 +186,7 @@ class RBF(Kernpart): self._psi_computations(Z, mu, S) target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance) d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale) - dpsi1_dlength = d_length * dL_dpsi1[:, :, None] + dpsi1_dlength = d_length * np.atleast_3d(dL_dpsi1) if not self.ARD: target[1] += dpsi1_dlength.sum() else: @@ -208,12 +208,19 @@ class RBF(Kernpart): self._psi_computations(Z, mu, S) target += self._psi2 + def _crossterm_mu_S(self, Z, mu, S): + # compute the crossterm expectation for K as the other kernel: + Sigma = 1./self.lengthscale2[None,None,:] + 1./S[:,None,:] # is independent across M, + Sigma_tilde = (self.lengthscale2[None, :] + S) + M = (S*mu/Sigma_tilde)[:, None, :] + (self.lengthscale2[None,:]*Z)[None, :, :]/Sigma_tilde[:, None, :] + # make sure return is [N x M x Q] + return M, Sigma.repeat(Z.shape[0],1) + def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target): """Shape N,num_inducing,num_inducing,Ntheta""" self._psi_computations(Z, mu, S) d_var = 2.*self._psi2 / self.variance d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom) - target[0] += np.sum(dL_dpsi2 * d_var) dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None] if not self.ARD: @@ -296,8 +303,8 @@ class RBF(Kernpart): psi2 = np.empty((N, num_inducing, num_inducing)) psi2_Zdist_sq = self._psi2_Zdist_sq - _psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim) - half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim) + _psi2_denom = self._psi2_denom.squeeze().reshape(-1, input_dim) + half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(-1, input_dim) variance_sq = float(np.square(self.variance)) if self.ARD: lengthscale2 = self.lengthscale2 diff --git a/GPy/kern/parts/sympy_helpers.cpp b/GPy/kern/parts/sympy_helpers.cpp index e4df4d80..9f30eea9 100644 --- a/GPy/kern/parts/sympy_helpers.cpp +++ b/GPy/kern/parts/sympy_helpers.cpp @@ -1,7 +1,9 @@ +#include "Python.h" #include #include #include - +#include +#include double DiracDelta(double x){ // TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills @@ -14,6 +16,7 @@ double DiracDelta(double x,int foo){ }; double sinc(double x){ + // compute the sinc function if (x==0) return 1.0; else @@ -21,28 +24,39 @@ double sinc(double x){ } double sinc_grad(double x){ + // compute the gradient of the sinc function. if (x==0) return 0.0; else return (x*cos(x) - sin(x))/(x*x); } - double erfcx(double x){ + // Based on code by Soren Hauberg 2010 for Octave. + // compute the scaled complex error function. + //return erfc(x)*exp(x*x); double xneg=-sqrt(log(DBL_MAX/2)); double xmax = 1/(sqrt(M_PI)*DBL_MIN); xmax = DBL_MAXxmax) return 0.0; else @@ -50,12 +64,115 @@ double erfcx(double x){ } double ln_diff_erf(double x0, double x1){ - if (x0==x1) - return INFINITY; - else if(x0<0 && x1>0 || x0>0 && x1<0) + // stably compute the log of difference between two erfs. + if (x1>x0){ + PyErr_SetString(PyExc_RuntimeError,"second argument must be smaller than or equal to first in ln_diff_erf"); + throw 1; + } + if (x0==x1){ + PyErr_WarnEx(PyExc_RuntimeWarning,"divide by zero encountered in log", 1); + return -INFINITY; + } + else if(x0<0 && x1>0 || x0>0 && x1<0) //x0 and x1 have opposite signs return log(erf(x0)-erf(x1)); - else if(x1>0) - return log(erfcx(x1)-erfcx(x0)*exp(x1*x1)- x0*x0)-x1*x1; - else + else if(x0>0) //x0 positive, x1 non-negative + return log(erfcx(x1)-erfcx(x0)*exp(x1*x1- x0*x0))-x1*x1; + else //x0 and x1 non-positive return log(erfcx(-x0)-erfcx(-x1)*exp(x0*x0 - x1*x1))-x0*x0; } + +double h(double t, double tprime, double d_i, double d_j, double l){ + // Compute the h function for the sim covariance. + double half_l_di = 0.5*l*d_i; + double arg_1 = half_l_di + tprime/l; + double arg_2 = half_l_di - (t-tprime)/l; + double ln_part_1 = ln_diff_erf(arg_1, arg_2); + arg_2 = half_l_di - t/l; + double sign_val = 1.0; + if(t/l==0) + sign_val = 0.0; + else if (t/l < 0) + sign_val = -1.0; + arg_2 = half_l_di - t/l; + double ln_part_2 = ln_diff_erf(half_l_di, arg_2); + // if either ln_part_1 or ln_part_2 are -inf, don't bother computing rest of that term. + double part_1 = 0.0; + if(isfinite(ln_part_1)) + part_1 = sign_val*exp(half_l_di*half_l_di - d_i*(t-tprime) + ln_part_1 - log(d_i + d_j)); + double part_2 = 0.0; + if(isfinite(ln_part_2)) + part_2 = sign_val*exp(half_l_di*half_l_di - d_i*t - d_j*tprime + ln_part_2 - log(d_i + d_j)); + return part_1 - part_2; +} + + +double dh_dd_i(double t, double tprime, double d_i, double d_j, double l){ + double diff_t = (t-tprime); + double l2 = l*l; + double hv = h(t, tprime, d_i, d_j, l); + double half_l_di = 0.5*l*d_i; + double arg_1 = half_l_di + tprime/l; + double arg_2 = half_l_di - (t-tprime)/l; + double ln_part_1 = ln_diff_erf(arg_1, arg_2); + arg_1 = half_l_di; + arg_2 = half_l_di - t/l; + double sign_val = 1.0; + if(t/l==0) + sign_val = 0.0; + else if (t/l < 0) + sign_val = -1.0; + double ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l); + double base = (0.5*d_i*l2*(d_i+d_j)-1)*hv; + if(isfinite(ln_part_1)) + base -= diff_t*sign_val*exp(half_l_di*half_l_di + -d_i*diff_t + +ln_part_1); + if(isfinite(ln_part_2)) + base += t*sign_val*exp(half_l_di*half_l_di + -d_i*t-d_j*tprime + +ln_part_2); + base += l/sqrt(M_PI)*(-exp(-diff_t*diff_t/l2) + +exp(-tprime*tprime/l2-d_i*t) + +exp(-t*t/l2-d_j*tprime) + -exp(-(d_i*t + d_j*tprime))); + return base/(d_i+d_j); + +} + +double dh_dd_j(double t, double tprime, double d_i, double d_j, double l){ + double half_l_di = 0.5*l*d_i; + double hv = h(t, tprime, d_i, d_j, l); + double sign_val = 1.0; + if(t/l==0) + sign_val = 0.0; + else if (t/l < 0) + sign_val = -1.0; + double ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l); + double base = -hv; + if(isfinite(ln_part_2)) + base += tprime*sign_val*exp(half_l_di*half_l_di-(d_i*t+d_j*tprime)+ln_part_2); + return base/(d_i+d_j); +} + +double dh_dl(double t, double tprime, double d_i, double d_j, double l){ + // compute gradient of h function with respect to lengthscale for sim covariance + // TODO a lot of energy wasted recomputing things here, need to do this in a shared way somehow ... perhaps needs rewrite of sympykern. + double half_l_di = 0.5*l*d_i; + double arg_1 = half_l_di + tprime/l; + double arg_2 = half_l_di - (t-tprime)/l; + double ln_part_1 = ln_diff_erf(arg_1, arg_2); + arg_2 = half_l_di - t/l; + double ln_part_2 = ln_diff_erf(half_l_di, arg_2); + double diff_t = t - tprime; + double l2 = l*l; + double hv = h(t, tprime, d_i, d_j, l); + return 0.5*d_i*d_i*l*hv + 2/(sqrt(M_PI)*(d_i+d_j))*((-diff_t/l2-d_i/2)*exp(-diff_t*diff_t/l2)+(-tprime/l2+d_i/2)*exp(-tprime*tprime/l2-d_i*t)-(-t/l2-d_i/2)*exp(-t*t/l2-d_j*tprime)-d_i/2*exp(-(d_i*t+d_j*tprime))); +} + +double dh_dt(double t, double tprime, double d_i, double d_j, double l){ + return 0.0; +} + +double dh_dtprime(double t, double tprime, double d_i, double d_j, double l){ + return 0.0; +} diff --git a/GPy/kern/parts/sympy_helpers.h b/GPy/kern/parts/sympy_helpers.h index 56220167..5e58d5d2 100644 --- a/GPy/kern/parts/sympy_helpers.h +++ b/GPy/kern/parts/sympy_helpers.h @@ -7,3 +7,10 @@ double sinc_grad(double x); double erfcx(double x); double ln_diff_erf(double x0, double x1); + +double h(double t, double tprime, double d_i, double d_j, double l); +double dh_dl(double t, double tprime, double d_i, double d_j, double l); +double dh_dd_i(double t, double tprime, double d_i, double d_j, double l); +double dh_dd_j(double t, double tprime, double d_i, double d_j, double l); +double dh_dt(double t, double tprime, double d_i, double d_j, double l); +double dh_dtprime(double t, double tprime, double d_i, double d_j, double l); diff --git a/GPy/kern/parts/sympy_helpers.py b/GPy/kern/parts/sympy_helpers.py new file mode 100644 index 00000000..125dac58 --- /dev/null +++ b/GPy/kern/parts/sympy_helpers.py @@ -0,0 +1,71 @@ +# Code for testing functions written in sympy_helpers.cpp +from scipy import weave +import tempfile +import os +import numpy as np +current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) +extra_compile_args = [] + +weave_kwargs = { + 'support_code': "", + 'include_dirs':[tempfile.gettempdir(), current_dir], + 'headers':['"parts/sympy_helpers.h"'], + 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")], + 'extra_compile_args':extra_compile_args, + 'extra_link_args':['-lgomp'], + 'verbose':True} + +def erfcx(x): + code = """ + // Code for computing scaled complementary erf + int i; + int dim; + int elements = Ntarget[0]; + for (dim=1; dim