diff --git a/GPy/__init__.py b/GPy/__init__.py new file mode 100644 index 00000000..718b763b --- /dev/null +++ b/GPy/__init__.py @@ -0,0 +1,6 @@ +import kern +import models +import inference +import util +import examples +from core import priors diff --git a/GPy/kern/Brownian.py b/GPy/kern/Brownian.py new file mode 100644 index 00000000..3e1d8ee5 --- /dev/null +++ b/GPy/kern/Brownian.py @@ -0,0 +1,56 @@ +from kernpart import kernpart +import numpy as np + +def theta(x): + """Heavisdie step function""" + return np.where(x>=0.,1.,0.) + +class Brownian(kernpart): + """ + Brownian Motion kernel. + + :param D: the number of input dimensions + :type D: int + :param variance: + :type variance: float + """ + def __init__(self,D,variance=1.): + self.D = D + assert self.D==1, "Brownian motion in 1D only" + self.Nparam = 1. + self.name = 'Brownian' + self.set_param(np.array([variance]).flatten()) + + def get_param(self): + return self.variance + + def set_param(self,x): + assert x.shape==(1,) + self.variance = x + + def get_param_names(self): + return ['variance'] + + def K(self,X,X2,target): + target += self.variance*np.fmin(X,X2.T) + + def Kdiag(self,X,target): + target += self.variance*X.flatten() + + def dK_dtheta(self,X,X2,target): + target += np.fmin(X,X2.T) + + def dKdiag_dtheta(self,X,target): + target += X.flatten() + + def dK_dX(self,X,X2,target): + target += self.variance + target -= self.variance*theta(X-X2.T) + if X.shape==X2.shape: + if np.all(X==X2): + np.add(target[:,:,0],self.variance*np.diag(X2.flatten()-X.flatten()),target[:,:,0]) + + + def dKdiag_dX(self,X,target): + target += self.variance + diff --git a/GPy/kern/DelayedDecorator.py b/GPy/kern/DelayedDecorator.py new file mode 100644 index 00000000..bcd53744 --- /dev/null +++ b/GPy/kern/DelayedDecorator.py @@ -0,0 +1,86 @@ +from new import instancemethod +#this is here so that we can have nice syntatic sugar when taking gradients +# See the transform_gradients decorator in kern + +class DelayedDecorator(object): + """Wrapper that delays decorating a function until it is invoked. + + This class allows a decorator to be used with both ordinary functions and + methods of classes. It wraps the function passed to it with the decorator + passed to it, but with some special handling: + + - If the wrapped function is an ordinary function, it will be decorated + the first time it is called. + + - If the wrapped function is a method of a class, it will be decorated + separately the first time it is called on each instance of the class. + It will also be decorated separately the first time it is called as + an unbound method of the class itself (though this use case should + be rare). + """ + + def __init__(self, deco, func): + # The base decorated function (which may be modified, see below) + self._func = func + # The decorator that will be applied + self._deco = deco + # Variable to monitor calling as an ordinary function + self.__decofunc = None + # Variable to monitor calling as an unbound method + self.__clsfunc = None + + def _decorated(self, cls=None, instance=None): + """Return the decorated function. + + This method is for internal use only; it can be implemented by + subclasses to modify the actual decorated function before it is + returned. The ``cls`` and ``instance`` parameters are supplied so + this method can tell how it was invoked. If it is not overridden, + the base function stored when this class was instantiated will + be decorated by the decorator passed when this class was instantiated, + and then returned. + + Note that factoring out this method, in addition to allowing + subclasses to modify the decorated function, ensures that the + right thing is done automatically when the decorated function + itself is a higher-order function (e.g., a generator function). + Since this method is called every time the decorated function + is accessed, a new instance of whatever it returns will be + created (e.g., a new generator will be realized), which is + exactly the expected semantics. + """ + return self._deco(self._func) + + def __call__(self, *args, **kwargs): + """Direct function call syntax support. + + This makes an instance of this class work just like the underlying + decorated function when called directly as an ordinary function. + An internal reference to the decorated function is stored so that + future direct calls will get the stored function. + """ + if not self.__decofunc: + self.__decofunc = self._decorated() + return self.__decofunc(*args, **kwargs) + + def __get__(self, instance, cls): + """Descriptor protocol support. + + This makes an instance of this class function correctly when it + is used to decorate a method on a user-defined class. If called + as a bound method, we store the decorated function in the instance + dictionary, so we will not be called again for that instance. If + called as an unbound method, we store a reference to the decorated + function internally and use it on future unbound method calls. + """ + if instance: + deco = instancemethod(self._decorated(cls, instance), instance, cls) + # This prevents us from being called again for this instance + setattr(instance, self._func.__name__, deco) + elif cls: + if not self.__clsfunc: + self.__clsfunc = instancemethod(self._decorated(cls), None, cls) + deco = self.__clsfunc + else: + raise ValueError("Must supply instance or class to descriptor.") + return deco diff --git a/GPy/kern/Matern32.py b/GPy/kern/Matern32.py new file mode 100644 index 00000000..da4e8a6d --- /dev/null +++ b/GPy/kern/Matern32.py @@ -0,0 +1,107 @@ +from kernpart import kernpart +import numpy as np +import hashlib +from ..util.linalg import pdinv,mdot +from scipy import integrate + +class Matern32(kernpart): + """ + Matern 3/2 kernel: + + .. math:: + + k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } + + :param D: the number of input dimensions + :type D: int + :param variance: the variance :math:`\sigma^2` + :type variance: float + :param lengthscale: the lengthscales :math:`\ell_i` + :type lengthscale: np.ndarray of size (D,) + :rtype: kernel object + + """ + + def __init__(self,D,variance=1.,lengthscales=None): + self.D = D + if lengthscales is not None: + assert lengthscales.shape==(self.D,) + else: + lengthscales = np.ones(self.D) + self.Nparam = self.D + 1 + self.name = 'Mat32' + self.set_param(np.hstack((variance,lengthscales))) + + def get_param(self): + """return the value of the parameters.""" + return np.hstack((self.variance,self.lengthscales)) + def set_param(self,x): + """set the value of the parameters.""" + assert x.size==(self.D+1) + self.variance = x[0] + self.lengthscales = x[1:] + def get_param_names(self): + """return parameter names.""" + if self.D==1: + return ['variance','lengthscale'] + else: + return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)] + + def K(self,X,X2,target): + """Compute the covariance matrix between X and X2.""" + if X2 is None: X2 = X + dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1)) + np.add(self.variance*(1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist), target,target) + def Kdiag(self,X,target): + """Compute the diagonal of the covariance matrix associated to X.""" + np.add(target,self.variance,target) + + def Gram_matrix(self,F,F1,F2,lower,upper): + """ + Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1. + + :param F: vector of functions + :type F: np.array + :param F1: vector of derivatives of F + :type F1: np.array + :param F2: vector of second derivatives of F + :type F2: np.array + :param lower,upper: boundaries of the input domain + :type lower,upper: floats + """ + assert self.D == 1 + def L(x,i): + return(3./self.lengthscales**2*F[i](x) + 2*np.sqrt(3)/self.lengthscales*F1[i](x) + F2[i](x)) + n = F.shape[0] + G = np.zeros((n,n)) + for i in range(n): + for j in range(i,n): + G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0] + Flower = np.array([f(lower) for f in F])[:,None] + F1lower = np.array([f(lower) for f in F1])[:,None] + #print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n" + #return(G) + return(self.lengthscales**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscales**2/(3.*self.variance)*np.dot(F1lower,F1lower.T)) + + def dK_dtheta(self,X,X2,target): + """derivative of the cross-covariance matrix with respect to the parameters (shape is NxMxNparam)""" + if X2 is None: X2 = X + dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1)) + dvar = (1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist) + invdist = 1./np.where(dist!=0.,dist,np.inf) + dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3 + dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] + np.add(target[:,:,0],dvar, target[:,:,0]) + np.add(target[:,:,1:],dl, target[:,:,1:]) + def dKdiag_dtheta(self,X,target): + """derivative of the diagonal of the covariance matrix with respect to the parameters (shape is NxNparam)""" + np.add(target[:,0],1.,target[:,0]) + def dK_dX(self,X,X2,target): + """derivative of the covariance matrix with respect to X (*! shape is NxMxD !*).""" + if X2 is None: X2 = X + dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None] + ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf) + target += - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2)) + def dKdiag_dX(self,X,target): + pass + diff --git a/GPy/kern/Matern52.py b/GPy/kern/Matern52.py new file mode 100644 index 00000000..836fd793 --- /dev/null +++ b/GPy/kern/Matern52.py @@ -0,0 +1,112 @@ +from kernpart import kernpart +import numpy as np +import hashlib +from scipy import integrate + +class Matern52(kernpart): + """ + Matern 5/2 kernel: + + .. math:: + + k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } + + :param D: the number of input dimensions + :type D: int + :param variance: the variance :math:`\sigma^2` + :type variance: float + :param lengthscale: the lengthscales :math:`\ell_i` + :type lengthscale: np.ndarray of size (D,) + :rtype: kernel object + + """ + def __init__(self,D,variance=1.,lengthscales=None): + self.D = D + if lengthscales is not None: + assert lengthscales.shape==(self.D,) + else: + lengthscales = np.ones(self.D) + self.Nparam = self.D + 1 + self.name = 'Mat52' + self.set_param(np.hstack((variance,lengthscales))) + self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S + + def get_param(self): + """return the value of the parameters.""" + return np.hstack((self.variance,self.lengthscales)) + def set_param(self,x): + """set the value of the parameters.""" + assert x.size==(self.D+1) + self.variance = x[0] + self.lengthscales = x[1:] + self.lengthscales2 = np.square(self.lengthscales) + self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S + def get_param_names(self): + """return parameter names.""" + if self.D==1: + return ['variance','lengthscale'] + else: + return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)] + def K(self,X,X2,target): + """Compute the covariance matrix between X and X2.""" + if X2 is None: X2 = X + dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1)) + np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target) + def Kdiag(self,X,target): + """Compute the diagonal of the covariance matrix associated to X.""" + np.add(target,self.variance,target) + + def Gram_matrix(self,F,F1,F2,F3,lower,upper): + """ + Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1. + + :param F: vector of functions + :type F: np.array + :param F1: vector of derivatives of F + :type F1: np.array + :param F2: vector of second derivatives of F + :type F2: np.array + :param F3: vector of third derivatives of F + :type F3: np.array + :param lower,upper: boundaries of the input domain + :type lower,upper: floats + """ + assert self.D == 1 + def L(x,i): + return(5*np.sqrt(5)/self.lengthscales**3*F[i](x) + 15./self.lengthscales**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscales*F2[i](x) + F3[i](x)) + n = F.shape[0] + G = np.zeros((n,n)) + for i in range(n): + for j in range(i,n): + G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0] + G_coef = 3.*self.lengthscales**5/(400*np.sqrt(5)) + Flower = np.array([f(lower) for f in F])[:,None] + F1lower = np.array([f(lower) for f in F1])[:,None] + F2lower = np.array([f(lower) for f in F2])[:,None] + orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscales**4/200*np.dot(F2lower,F2lower.T) + orig2 = 3./5*self.lengthscales**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T)) + return(1./self.variance* (G_coef*G + orig + orig2)) + + def dK_dtheta(self,X,X2,target): + """derivative of the cross-covariance matrix with respect to the parameters (shape is NxMxNparam)""" + if X2 is None: X2 = X + dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1)) + invdist = 1./np.where(dist!=0.,dist,np.inf) + dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3 + dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist) + dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] + np.add(target[:,:,0],dvar, target[:,:,0]) + np.add(target[:,:,1:],dl, target[:,:,1:]) + def dKdiag_dtheta(self,X,target): + """derivative of the diagonal of the covariance matrix with respect to the parameters (shape is NxNparam)""" + np.add(target[:,0],1.,target[:,0]) + def dK_dX(self,X,X2,target): + """derivative of the covariance matrix with respect to X (*! shape is NxMxD !*).""" + if X2 is None: X2 = X + dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None] + ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf) + target += - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) + def dKdiag_dX(self,X,target): + pass + + diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py new file mode 100644 index 00000000..35f70a35 --- /dev/null +++ b/GPy/kern/__init__.py @@ -0,0 +1,2 @@ +from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD +from kern import kern diff --git a/GPy/kern/bias.py b/GPy/kern/bias.py new file mode 100644 index 00000000..55c5cff6 --- /dev/null +++ b/GPy/kern/bias.py @@ -0,0 +1,47 @@ +from kernpart import kernpart +import numpy as np +import hashlib + +class bias(kernpart): + def __init__(self,D,variance=1.): + """ + Arguments + ---------- + D: int - the number of input dimensions + variance: float + """ + self.D = D + self.Nparam = 1 + self.name = 'bias' + self.set_param(np.array([variance]).flatten()) + + def get_param(self): + return self.variance + + def set_param(self,x): + assert x.shape==(1,) + self.variance = x + + def get_param_names(self): + return ['variance'] + + def K(self,X,X2,target): + if X2 is None: X2 = X + np.add(self.variance, target,target) + + def Kdiag(self,X,target): + np.add(target,self.variance,target) + + def dK_dtheta(self,X,X2,target): + """Return shape is NxMx(Ntheta)""" + if X2 is None: X2 = X + np.add(target[:,:,0],1., target[:,:,0]) + + def dKdiag_dtheta(self,X,target): + np.add(target[:,0],1.,target[:,0]) + + def dK_dX(self, X, X2, target): + pass + + def dKdiag_dX(self,X,target): + pass diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py new file mode 100644 index 00000000..f3ffdba2 --- /dev/null +++ b/GPy/kern/constructors.py @@ -0,0 +1,168 @@ +import numpy as np +from kern import kern + +from rbf import rbf as rbfpart +from rbf_ARD import rbf_ARD as rbf_ARD_part +from white import white as whitepart +from linear import linear as linearpart +from linear_ARD import linear_ARD as linear_ARD_part +from exponential import exponential as exponentialpart +from Matern32 import Matern32 as Matern32part +from Matern52 import Matern52 as Matern52part +from bias import bias as biaspart +from finite_dimensional import finite_dimensional as finite_dimensionalpart +from spline import spline as splinepart +from Brownian import Brownian as Brownianpart + +#TODO these s=constructors are not as clean as we'd like. Tidy the code up +#using meta-classes to make the objects construct properly wthout them. + +def rbf(D,variance=1., lengthscale=1.): + """ + Construct an RBF kernel + + :param D: dimensionality of the kernel, obligatory + :type D: int + :param variance: the variance of the kernel + :type variance: float + :param lengthscale: the lengthscale of the kernel + :type lengthscale: float + """ + part = rbfpart(D,variance,lengthscale) + return kern(D, [part]) + +def rbf_ARD(D,variance=1., lengthscales=None): + """ + Construct an RBF kernel with Automatic Relevance Determination (ARD) + + :param D: dimensionality of the kernel, obligatory + :type D: int + :param variance: the variance of the kernel + :type variance: float + :param lengthscales: the lengthscales of the kernel + :type lengthscales: None|np.ndarray + """ + part = rbf_ARD_part(D,variance,lengthscales) + return kern(D, [part]) + +def linear(D,lengthscales=None): + """ + Construct a linear kernel. + + Arguments + --------- + D (int), obligatory + lengthscales (np.ndarray) + """ + part = linearpart(D,lengthscales) + return kern(D, [part]) + +def linear_ARD(D,lengthscales=None): + """ + Construct a linear ARD kernel. + + Arguments + --------- + D (int), obligatory + lengthscales (np.ndarray) + """ + part = linear_ARD_part(D,lengthscales) + return kern(D, [part]) + +def white(D,variance=1.): + """ + Construct a white kernel. + + Arguments + --------- + D (int), obligatory + variance (float) + """ + part = whitepart(D,variance) + return kern(D, [part]) + +def exponential(D,variance=1., lengthscales=None): + """ + Construct a exponential kernel. + + Arguments + --------- + D (int), obligatory + variance (float) + lengthscales (np.ndarray) + """ + part = exponentialpart(D,variance, lengthscales) + return kern(D, [part]) + +def Matern32(D,variance=1., lengthscales=None): + """ + Construct a Matern 3/2 kernel. + + Arguments + --------- + D (int), obligatory + variance (float) + lengthscales (np.ndarray) + """ + part = Matern32part(D,variance, lengthscales) + return kern(D, [part]) + +def Matern52(D,variance=1., lengthscales=None): + """ + Construct a Matern 5/2 kernel. + + Arguments + --------- + D (int), obligatory + variance (float) + lengthscales (np.ndarray) + """ + part = Matern52part(D,variance, lengthscales) + return kern(D, [part]) + +def bias(D,variance=1.): + """ + Construct a bias kernel. + + Arguments + --------- + D (int), obligatory + variance (float) + """ + part = biaspart(D,variance) + return kern(D, [part]) + +def finite_dimensional(D,F,G,variances=1.,weights=None): + """ + Construct a finite dimensional kernel. + D: int - the number of input dimensions + F: np.array of functions with shape (n,) - the n basis functions + G: np.array with shape (n,n) - the Gram matrix associated to F + variances : np.ndarray with shape (n,) + """ + part = finite_dimensionalpart(D,F,G,variances,weights) + return kern(D, [part]) + +def spline(D,variance=1.): + """ + Construct a spline kernel. + + :param D: Dimensionality of the kernel + :type D: int + :param variance: the variance of the kernel + :type variance: float + """ + part = splinepart(D,variance) + return kern(D, [part]) + +def Brownian(D,variance=1.): + """ + Construct a Brownian motion kernel. + + :param D: Dimensionality of the kernel + :type D: int + :param variance: the variance of the kernel + :type variance: float + """ + part = Brownianpart(D,variance) + return kern(D, [part]) diff --git a/GPy/kern/exponential.py b/GPy/kern/exponential.py new file mode 100644 index 00000000..f1faf51f --- /dev/null +++ b/GPy/kern/exponential.py @@ -0,0 +1,104 @@ +from kernpart import kernpart +import numpy as np +import hashlib +from scipy import integrate + +class exponential(kernpart): + """ + Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2) + + .. math:: + + k(r) = \sigma^2 \exp(- r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } + + :param D: the number of input dimensions + :type D: int + :param variance: the variance :math:`\sigma^2` + :type variance: float + :param lengthscale: the lengthscales :math:`\ell_i` + :type lengthscale: np.ndarray of size (D,) + :rtype: kernel object + + """ + def __init__(self,D,variance=1.,lengthscales=None): + self.D = D + if lengthscales is not None: + assert lengthscales.shape==(self.D,) + else: + lengthscales = np.ones(self.D) + self.Nparam = self.D + 1 + self.name = 'exp' + self.set_param(np.hstack((variance,lengthscales))) + + def get_param(self): + """return the value of the parameters.""" + return np.hstack((self.variance,self.lengthscales)) + def set_param(self,x): + """set the value of the parameters.""" + assert x.size==(self.D+1) + self.variance = x[0] + self.lengthscales = x[1:] + def get_param_names(self): + """return parameter names.""" + if self.D==1: + return ['variance','lengthscale'] + else: + return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)] + def K(self,X,X2,target): + """Compute the covariance matrix between X and X2.""" + if X2 is None: X2 = X + dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1)) + np.add(self.variance*np.exp(-dist), target,target) + def Kdiag(self,X,target): + """Compute the diagonal of the covariance matrix associated to X.""" + np.add(target,self.variance,target) + + def Gram_matrix(self,F,F1,lower,upper): + """ + Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1. + + :param F: vector of functions + :type F: np.array + :param F1: vector of derivatives of F + :type F1: np.array + :param lower,upper: boundaries of the input domain + :type lower,upper: floats + """ + assert self.D == 1 + def L(x,i): + return(1./self.lengthscales*F[i](x) + F1[i](x)) + n = F.shape[0] + G = np.zeros((n,n)) + for i in range(n): + for j in range(i,n): + G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0] + Flower = np.array([f(lower) for f in F])[:,None] + return(self.lengthscales/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T)) + + def dK_dtheta(self,X,X2,target): + """derivative of the cross-covariance matrix with respect to the parameters (shape is NxMxNparam)""" + if X2 is None: X2 = X + dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1)) + invdist = 1./np.where(dist!=0.,dist,np.inf) + dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3 + dvar = np.exp(-dist) + dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,np.newaxis] + np.add(target[:,:,0],dvar, target[:,:,0]) + np.add(target[:,:,1:],dl, target[:,:,1:]) + def dKdiag_dtheta(self,X,target): + """derivative of the diagonal of the covariance matrix with respect to the parameters (shape is NxNparam)""" + np.add(target[:,0],1.,target[:,0]) + def dK_dX(self,X,X2,target): + """derivative of the covariance matrix with respect to X (*! shape is NxMxD !*).""" + if X2 is None: X2 = X + dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None] + ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf) + target += - np.transpose(self.variance*np.exp(-dist)*ddist_dX,(1,0,2)) + def dKdiag_dX(self,X,target): + pass + + + + + + diff --git a/GPy/kern/finite_dimensional.py b/GPy/kern/finite_dimensional.py new file mode 100644 index 00000000..bb9d0a90 --- /dev/null +++ b/GPy/kern/finite_dimensional.py @@ -0,0 +1,70 @@ +from kernpart import kernpart +import numpy as np +from ..util.linalg import pdinv,mdot + +class finite_dimensional(kernpart): + def __init__(self, D, F, G, variance=1., weights=None): + """ + Argumnents + ---------- + D: int - the number of input dimensions + F: np.array of functions with shape (n,) - the n basis functions + G: np.array with shape (n,n) - the Gram matrix associated to F + weights : np.ndarray with shape (n,) + """ + self.D = D + self.F = F + self.G = G + self.G_1 ,tmp = pdinv(G) + self.n = F.shape[0] + if weights is not None: + assert weights.shape==(self.n,) + else: + weights = np.ones(self.n) + self.Nparam = self.n + 1 + self.name = 'finite_dim' + self.set_param(np.hstack((variance,weights))) + + def get_param(self): + return np.hstack((self.variance,self.weights)) + def set_param(self,x): + assert x.size == (self.Nparam) + self.variance = x[0] + self.weights = x[1:] + def get_param_names(self): + if self.n==1: + return ['variance','weight'] + else: + return ['variance']+['weight_%i'%i for i in range(self.weights.size)] + + def K(self,X,X2,target): + if X2 is None: X2 = X + FX = np.column_stack([f(X) for f in self.F]) + FX2 = np.column_stack([f(X2) for f in self.F]) + product = self.variance * mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T) + np.add(product,target,target) + def Kdiag(self,X,target): + product = np.diag(self.K(X,X2)) + np.add(target,product,target) + def dK_dtheta(self,X,X2,target): + """Return shape is NxMx(Ntheta)""" + if X2 is None: X2 = X + FX = np.column_stack([f(X) for f in self.F]) + FX2 = np.column_stack([f(X2) for f in self.F]) + DER = np.zeros((self.n,self.n,self.n)) + for i in range(self.n): + DER[i,i,i] = np.sqrt(self.weights[i]) + dw = self.variance * mdot(FX,DER,self.G_1,np.diag(np.sqrt(self.weights)),FX2.T) + dv = mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T) + np.add(target[:,:,0],np.transpose(dv,(0,2,1)), target[:,:,0]) + np.add(target[:,:,1:],np.transpose(dw,(0,2,1)), target[:,:,1:]) + def dKdiag_dtheta(self,X,target): + np.add(target[:,0],1.,target[:,0]) + + + + + + + + diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py new file mode 100644 index 00000000..a15ee872 --- /dev/null +++ b/GPy/kern/kern.py @@ -0,0 +1,308 @@ +import numpy as np +from ..core.parameterised import parameterised +from DelayedDecorator import DelayedDecorator +from functools import partial +from kernpart import kernpart + +class transform_gradients_(object): + """ + A decorator for gradient transformation. Accounts for constraining and tying. + + NB: this uses the Delayed_decorator class so allow it to decorate bound methods + """ + def __init__(self,f): + self.f = f + def __call__(self,*args,**kwargs): + kern = args[0] + x = kern.get_param() + g = self.f(*args,**kwargs) + + #first roll the axes of gradients. This is because we always want to acces the first axis, + #else slicing becomes more difficult + g = np.rollaxis(g,-1) + + ##TODO: the shape of the gradients is slightly problematic + + if len(g.shape)==2: + x_reshape = (-1,1) + elif len(g.shape)==3: + x_reshape = (-1,1,1) + else: + raise NotImplementedError + + #do the simple transforms first + np.multiply(g[kern.constrained_positive_indices], x[kern.constrained_positive_indices].reshape(x_reshape),g[kern.constrained_positive_indices]) + np.multiply(g[kern.constrained_negative_indices], x[kern.constrained_negative_indices].reshape(x_reshape),g[kern.constrained_negative_indices]) + [np.multiply(g[i], ((x[i]-l)*(h-x[i])/(h-l)).reshape(x_reshape), g[i]) for i,l,h in zip(kern.constrained_bounded_indices, kern.constrained_bounded_lowers, kern.constrained_bounded_uppers)] + + #work out which gradients need to be added (tieing) + [np.sum(g[i],g[i[0]], axis=0) for i in kern.tied_indices] + + #work out which gradients are simply cut out (due to fixing) + if len(kern.tied_indices) or len(kern.constrained_fixed_indices): + to_remove = np.hstack((kern.constrained_fixed_indices+[t[1:] for t in kern.tied_indices])) + g = np.delete(g,to_remove,axis=0) + + return np.swapaxes(np.rollaxis(g,0,-1),-1,-2) # undo the rolling of the axes + +transform_gradients = partial(DelayedDecorator,transform_gradients_) +#transform_param = partial(DelayedDecorator,_transform_param) TODO + + + + +class kern(parameterised): + def __init__(self,D,parts=[], input_slices=None): + """ + This kernel does 'compound' structures. + + The compund structure enables many features of GPy, including + - Hierarchical models + - Correleated output models + - multi-view learning + + Hadamard product and outer-product kernels will require a new class TODO. + + :param D: The dimensioality of the kernel's input space + :type D: int + :param parts: the 'parts' (PD functions) of the kernel + :type parts: list of kernpart objects + :param input_slices: the slices on the inputs which apply to each kernel + :type input_slices: list of slice objects, or list of bools + + """ + self.parts = parts + self.Nparts = len(parts) + self.Nparam = sum([p.Nparam for p in self.parts]) + + self.D = D + + #deal with input_slices + if input_slices is None: + self.input_slices = [slice(None) for p in self.parts] + else: + assert len(input_slices)==len(self.parts) + self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices] + + for p in self.parts: + assert isinstance(p,kernpart), "bad kernel part" + + + self.compute_param_slices() + + parameterised.__init__(self) + + #TODO: Xslices can be passed so that we can compute hierarchical and strutured kernels. these also need implementing for the psi statistics (straightforward) + + def compute_param_slices(self): + """create a set of slices that can index the parameters of each part""" + self.param_slices = [] + count = 0 + for p in self.parts: + self.param_slices.append(slice(count,count+p.Nparam)) + count += p.Nparam + + def _process_slices(self,slices1=None,slices2=None): + """ + Format the slices so that they can easily be used. + Both slices can be any of three things: + - If None, the new points covary through every kernel part (default) + - If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part + - If a list of booleans, specifying which kernel parts are active + + if the second arg is False, return only slices1 + + returns actual lists of slice objects + """ + if slices1 is None: + slices1 = [slice(None)]*self.Nparts + elif all([type(s_i) is bool for s_i in slices1]): + slices1 = [slice(None) if s_i else slice(0) for s_i in slices1] + else: + assert all([type(s_i) is slice for s_i in slices1]), "invalid slice objects" + if slices2 is None: + slices2 = [slice(None)]*self.Nparts + elif slices2 is False: + return slices1 + elif all([type(s_i) is bool for s_i in slices2]): + slices2 = [slice(None) if s_i else slice(0) for s_i in slices2] + else: + assert all([type(s_i) is slice for s_i in slices2]), "invalid slice objects" + return slices1, slices2 + + def __add__(self,other): + assert self.D == other.D + newkern = kern(self.D,self.parts+other.parts, self.input_slices + other.input_slices) + #transfer constraints: + newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices)) + newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices)) + newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices] + newkern.constrained_bounded_lowers = self.constrained_bounded_lowers + other.constrained_bounded_lowers + newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers + newkern.constrained_fixed_indices = self.constrained_fixed_indices + [self.Nparam + x for x in other.constrained_fixed_indices] + newkern.constrained_fixed_values = self.constrained_fixed_values + other.constrained_fixed_values + newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] + return newkern + def add(self,other): + """ + Add another kernel to this one. Both kernels are defined on the same _space_ + :param other: the other kernel to be added + :type other: GPy.kern + """ + return self + other + + def add_orthogonal(self,other): + """ + Add another kernel to this one. Both kernels are defined on separate spaces + :param other: the other kernel to be added + :type other: GPy.kern + """ + #deal with input slices + D = self.D + other.D + self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices] + other_input_indices = [sl.indices(other.D) for sl in other.input_slices] + other_input_slices = [slice(i[0]+self.D,i[1]+self.D,i[2]) for i in other_input_indices] + + newkern = kern(D,self.parts+other.parts, self_input_slices + other_input_slices) + + #transfer constraints: + newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices)) + newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices)) + newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices] + newkern.constrained_bounded_lowers = self.constrained_bounded_lowers + other.constrained_bounded_lowers + newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers + newkern.constrained_fixed_indices = self.constrained_fixed_indices + [self.Nparam + x for x in other.constrained_fixed_indices] + newkern.constrained_fixed_values = self.constrained_fixed_values + other.constrained_fixed_values + newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] + return newkern + + + def get_param(self): + return np.hstack([p.get_param() for p in self.parts]) + + def set_param(self,x): + [p.set_param(x[s]) for p, s in zip(self.parts, self.param_slices)] + + def get_param_names(self): + return sum([[k.name+'_'+str(i)+'_'+n for n in k.get_param_names()] for i,k in enumerate(self.parts)],[]) + + def K(self,X,X2=None,slices1=None,slices2=None): + assert X.shape[1]==self.D + slices1, slices2 = self._process_slices(slices1,slices2) + if X2 is None: + X2 = X + target = np.zeros((X.shape[0],X2.shape[0])) + [p.K(X[s1,i_s],X2[s2,i_s],target=target[s1,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] + return target + + #@transform_gradients + def dK_dtheta(self,X,X2=None,slices1=None,slices2=None): + """Return shape is NxMx(Ntheta)""" + assert X.shape[1]==self.D + slices1, slices2 = self._process_slices(slices1,slices2) + if X2 is None: + X2 = X + target = np.zeros((X.shape[0],X2.shape[0],self.Nparam)) + [p.dK_dtheta(X[s1,i_s],X2[s2,i_s],target[s1,s2,ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)] + return target + + def dK_dX(self,X,X2=None,slices1=None,slices2=None): + if X2 is None: + X2 = X + slices1, slices2 = self._process_slices(slices1,slices2) + target = np.zeros((X2.shape[0],X.shape[0],X.shape[1])) + [p.dK_dX(X[s1],X2[s2],target[s2,s1,:]) for p,ps,s1,s2 in zip(self.parts, self.param_slices,slices1,slices2)] + return target + + def Kdiag(self,X,slices=None): + assert X.shape[1]==self.D + slices = self._process_slices(slices,False) + target = np.zeros(X.shape[0]) + [p.Kdiag(X[s,i_s],target=target[s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)] + return target + + #@transform_gradients + def dKdiag_dtheta(self,X,slices=None): + assert X.shape[1]==self.D + slices = self._process_slices(slices,False) + target = np.zeros((X.shape[0],self.Nparam)) + [p.dKdiag_dtheta(X[s,i_s],target[s,ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)] + return target + + def dKdiag_dX(self, X, slices=None): + assert X.shape[1]==self.D + slices = self._process_slices(slices,False) + target = np.zeros((X.shape[0],X.shape[0],X.shape[1])) + [p.dKdiag_dX(X[s,i_s],target[s,:,:]) for p,i_s,s in zip(self.parts,self.input_slices,slices)] + return target + + def psi0(self,Z,mu,S,slices_mu=None,slices_Z=None): + target = np.zeros(mu.shape[0]) + [p.psi0(Z,mu,S,target) for p in self.parts] + return target + + #@transform_gradients + def dpsi0_dtheta(self,Z,mu,S): + target = np.zeros((mu.shape[0],self.Nparam)) + [p.dpsi0_dtheta(Z,mu,S,target[s]) for p,s in zip(self.parts, self.param_slices)] + return target + + def dpsi0_dmuS(self,Z,mu,S): + target_mu,target_S = np.zeros_like(mu),np.zeros_like(S) + [p.dpsi0_dmuS(Z,mu,S,target_mu,target_S) for p in self.parts] + return target_mu,target_S + + def psi1(self,Z,mu,S): + """Think N,M,Q """ + target = np.zeros((mu.shape[0],Z.shape[0])) + [p.psi1(Z,mu,S,target=target) for p in self.parts] + return target + + #@transform_gradients + def dpsi1_dtheta(self,Z,mu,S): + """N,M,(Ntheta)""" + target = np.zeros((mu.shape[0],Z.shape[0],self.Nparam)) + [p.dpsi1_dtheta(Z,mu,S,target[:,:,s]) for p,s in zip(self.parts, self.param_slices)] + return target + + def dpsi1_dZ(self,Z,mu,S): + """N,M,Q""" + target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[1])) + [p.dpsi1_dZ(Z,mu,S,target) for p in self.parts] + return target + + def dpsi1_dmuS(self,Z,mu,S): + """return shapes are N,M,Q""" + target_mu, target_S = np.zeros((2,mu.shape[0],Z.shape[0],Z.shape[1])) + [p.dpsi1_dmuS(Z,mu,S,target_mu=target_mu,target_S = target_S) for p in self.parts] + return target_mu, target_S + + def psi2(self,Z,mu,S): + """ + :Z: np.ndarray of inducing inputs (M x Q) + : mu, S: np.ndarrays of means and variacnes (each N x Q) + :returns psi2: np.ndarray (N,M,M,Q) """ + target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0])) + [p.psi2(Z,mu,S,target=target) for p in self.parts] + return target + + #@transform_gradients + def dpsi2_dtheta(self,Z,mu,S): + """Returns shape (N,M,M,Ntheta)""" + target = np.zeros((Z.shape[0],Z.shape[0],self.Nparam)) + [p.dpsi2_dtheta(Z,mu,S,target[:,:,s]) for p,s in zip(self.parts, self.param_slices)] + return target + + def dpsi2_dZ(self,Z,mu,S): + """N,M,M,Q""" + target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0],Z.shape[1])) + [p.dpsi2_dZ(Z,mu,S,target) for p in self.parts] + return target + + def dpsi2_dmuS(self,Z,mu,S): + """return shapes are N,M,M,Q""" + target_mu, target_S = np.zeros((2,mu.shape[0],Z.shape[0],Z.shape[0],Z.shape[1])) + [p.dpsi2_dmuS(Z,mu,S,target_mu=target_mu,target_S = target_S) for p in self.parts] + + #TODO: there are some extra terms to compute here! + return target_mu, target_S diff --git a/GPy/kern/kernpart.py b/GPy/kern/kernpart.py new file mode 100644 index 00000000..25eda4d5 --- /dev/null +++ b/GPy/kern/kernpart.py @@ -0,0 +1,53 @@ +class kernpart(object): + def __init__(self,D): + """ + The base class for a kernpart: a positive definite function which forms part of a kernel + + :param D: the number of input dimensions to the function + :type D: int + """ + self.D = D + self.Nparam = 1. + self.name = 'white' + self.set_param(np.array([variance]).flatten()) + + def get_param(self): + raise NotImplementedError + def set_param(self,x): + raise NotImplementedError + def get_param_names(self): + raise NotImplementedError + def K(self,X,X2,target): + raise NotImplementedError + def Kdiag(self,X,target): + raise NotImplementedError + def dK_dtheta(self,X,X2,target): + raise NotImplementedError + def dKdiag_dtheta(self,X,target): + raise NotImplementedError + def psi0(self,Z,mu,S,target): + raise NotImplementedError + def dpsi0_dtheta(self,Z,mu,S,target): + raise NotImplementedError + def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S): + raise NotImplementedError + def psi1(self,Z,mu,S,target): + raise NotImplementedError + def dpsi1_dtheta(self,Z,mu,S,target): + raise NotImplementedError + def dpsi1_dZ(self,Z,mu,S,target): + raise NotImplementedError + def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S): + raise NotImplementedError + def psi2(self,Z,mu,S,target): + raise NotImplementedError + def dpsi2_dZ(self,Z,mu,S,target): + raise NotImplementedError + def dpsi2_dtheta(self,Z,mu,S,target): + raise NotImplementedError + def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S): + raise NotImplementedError + def dK_dX(self,X,X2,target): + raise NotImplementedError + + diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py new file mode 100644 index 00000000..17215c75 --- /dev/null +++ b/GPy/kern/linear.py @@ -0,0 +1,103 @@ +from kernpart import kernpart +import numpy as np +class linear(kernpart): + """ + Linear kernel + + :param D: the number of input dimensions + :type D: int + :param variance: variance + :type variance: None|float + """ + + def __init__(self, D, variance=None): + self.D = D + if variance is None: + variance = 1.0 + self.Nparam = 1 + self.name = 'linear' + self.set_param(variance) + + def get_param(self): + return self.variance + + def set_param(self,x): + self.variance = x + + def get_param_names(self): + return ['variance'] + + def K(self,X,X2,target): + target += self.variance * np.dot(X, X2.T) + + def Kdiag(self,X,target): + np.add(target,np.sum(self.variance*np.square(X),-1),target) + + def dK_dtheta(self,X,X2,target): + """ + Computes the derivatives wrt theta + Return shape is NxMx(Ntheta) + + """ + + if X2 is None: X2 = X + product = np.dot(X, X2.T)[:,:, None]#X[:,None,:]*X2[None,:,:] + target += product + + def dK_dX(self,X,X2,target): + if X2 is None: X2 = X + np.add(target,X2[:,None,:]*self.variance,target) + + # def psi0(self,Z,mu,S,target): + # expected = np.square(mu) + S + # np.add(target,np.sum(self.variance*expected),target) + + # def dpsi0_dtheta(self,Z,mu,S,target): + # expected = np.square(mu) + S + # return -2.*np.sum(expected,0) + + # def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S): + # np.add(target_mu,2*mu*self.variances,target_mu) + # np.add(target_S,self.variances,target_S) + + # def dpsi0_dZ(self,Z,mu,S,target): + # pass + + # def psi1(self,Z,mu,S,target): + # """the variance, it does nothing""" + # self.K(mu,Z,target) + + # def dpsi1_dtheta(self,Z,mu,S,target): + # """the variance, it does nothing""" + # self.dK_dtheta(mu,Z,target) + + # def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S): + # """Do nothing for S, it does not affect psi1""" + # np.add(target_mu,Z/self.variances2,target_mu) + + # def dpsi1_dZ(self,Z,mu,S,target): + # self.dK_dX(mu,Z,target) + + # def psi2(self,Z,mu,S,target): + # """Think N,M,M,Q """ + # mu2_S = np.square(mu)+SN,Q, + # ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q + # psi2 = ZZ*np.square(self.variances)*mu2_S + # np.add(target, psi2.sum(-1),target) M,M + + # def dpsi2_dtheta(self,Z,mu,S,target): + # mu2_S = np.square(mu)+SN,Q, + # ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q + # target += 2.*ZZ*mu2_S*self.variances + + # def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S): + # """Think N,M,M,Q """ + # mu2_S = np.sum(np.square(mu)+S,0)Q, + # ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q + # tmp = ZZ*np.square(self.variances) M,M,Q + # np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) N,M,M,Q + # np.add(target_S, tmp, target_S) N,M,M,Q + + # def dpsi2_dZ(self,Z,mu,S,target): + # mu2_S = np.sum(np.square(mu)+S,0)Q, + # target += Z[:,None,:]*np.square(self.variances)*mu2_S diff --git a/GPy/kern/linear_ARD.py b/GPy/kern/linear_ARD.py new file mode 100644 index 00000000..3c7482d3 --- /dev/null +++ b/GPy/kern/linear_ARD.py @@ -0,0 +1,114 @@ +from kernpart import kernpart +import numpy as np + +class linear_ARD(kernpart): + """ + Linear ARD kernel + + :param D: the number of input dimensions + :type D: int + :param variances: ARD variances + :type variances: None|np.ndarray + """ + + def __init__(self,D,variances=None): + self.D = D + if variances is not None: + assert variances.shape==(self.D,) + else: + variances = np.ones(self.D) + self.Nparam = self.D + self.name = 'linear' + self.set_param(variances) + + def get_param(self): + return self.variances + + def set_param(self,x): + assert x.size==(self.Nparam) + self.variances = x + + def get_param_names(self): + if self.D==1: + return ['variance'] + else: + return ['variance_%i'%i for i in range(self.variances.size)] + + def K(self,X,X2,target): + XX = X*np.sqrt(self.variances) + XX2 = X2*np.sqrt(self.variances) + target += np.dot(XX, XX2.T) + + def Kdiag(self,X,target): + np.add(target,np.sum(self.variances*np.square(X),-1),target) + + def dK_dtheta(self,X,X2,target): + """ + Computes the derivatives wrt theta + Return shape is NxMx(Ntheta) + + """ + + if X2 is None: X2 = X + product = X[:,None,:]*X2[None,:,:] + target += product + + def dK_dX(self,X,X2,target): + if X2 is None: X2 = X + #product = X[:,None,:]*X2[None,:,:] + #scaled_product = product/self.variances2 + np.add(target,X2[:,None,:]*self.variances,target) + + def psi0(self,Z,mu,S,target): + expected = np.square(mu) + S + np.add(target,np.sum(self.variances*expected),target) + + def dpsi0_dtheta(self,Z,mu,S,target): + expected = np.square(mu) + S + return -2.*np.sum(expected,0) + + def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S): + np.add(target_mu,2*mu*self.variances,target_mu) + np.add(target_S,self.variances,target_S) + + def dpsi0_dZ(self,Z,mu,S,target): + pass + + def psi1(self,Z,mu,S,target): + """the variance, it does nothing""" + self.K(mu,Z,target) + + def dpsi1_dtheta(self,Z,mu,S,target): + """the variance, it does nothing""" + self.dK_dtheta(mu,Z,target) + + def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S): + """Do nothing for S, it does not affect psi1""" + np.add(target_mu,Z/self.variances2,target_mu) + + def dpsi1_dZ(self,Z,mu,S,target): + self.dK_dX(mu,Z,target) + + def psi2(self,Z,mu,S,target): + """Think N,M,M,Q """ + mu2_S = np.square(mu)+S# N,Q, + ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q + psi2 = ZZ*np.square(self.variances)*mu2_S + np.add(target, psi2.sum(-1),target) # M,M + + def dpsi2_dtheta(self,Z,mu,S,target): + mu2_S = np.square(mu)+S# N,Q, + ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q + target += 2.*ZZ*mu2_S*self.variances + + def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S): + """Think N,M,M,Q """ + mu2_S = np.sum(np.square(mu)+S,0)# Q, + ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q + tmp = ZZ*np.square(self.variances) # M,M,Q + np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) #N,M,M,Q + np.add(target_S, tmp, target_S) #N,M,M,Q + + def dpsi2_dZ(self,Z,mu,S,target): + mu2_S = np.sum(np.square(mu)+S,0)# Q, + target += Z[:,None,:]*np.square(self.variances)*mu2_S diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py new file mode 100644 index 00000000..dd498d6f --- /dev/null +++ b/GPy/kern/rbf.py @@ -0,0 +1,155 @@ +from kernpart import kernpart +import numpy as np +import hashlib + +class rbf(kernpart): + """ + Radial Basis Function kernel, aka squared-exponential or Gaussian kernel. + + :param D: the number of input dimensions + :type D: int + :param variance: the variance of the kernel + :type variance: float + :param lengthscale: the lengthscale of the kernel + :type lengthscale: float + + .. Note: for rbf with different lengthscales on each dimension, see rbf_ARD + """ + + def __init__(self,D,variance=1.,lengthscale=1.): + self.D = D + self.Nparam = 2 + self.name = 'rbf' + self.set_param(np.hstack((variance,lengthscale))) + + #initialize cache + self._Z, self._mu, self._S = np.empty(shape=(3,1)) + self._X, self._X2, self._params = np.empty(shape=(3,1)) + + def get_param(self): + return np.hstack((self.variance,self.lengthscale)) + + def set_param(self,x): + self.variance, self.lengthscale = x + self.lengthscale2 = np.square(self.lengthscale) + #reset cached results + self._X, self._X2, self._params = np.empty(shape=(3,1)) + self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S + + def get_param_names(self): + return ['variance','lengthscale'] + + def K(self,X,X2,target): + self._K_computations(X,X2) + np.add(self.variance*self._K_dvar, target,target) + + def Kdiag(self,X,target): + np.add(target,self.variance,target) + + def dK_dtheta(self,X,X2,target): + """Return shape is NxMx(Ntheta)""" + self._K_computations(X,X2) + target[:,:,0] += self._K_dvar + target[:,:,1] += self._K_dvar*self.variance*self._K_dist2/self.lengthscale + + def dKdiag_dtheta(self,X,target): + np.add(target[:,0],1.,target[:,0]) + + def dK_dX(self,X,X2,target): + self._K_computations(X,X2) + _K_dist = X[:,None,:]-X2[None,:,:] + target += np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2)) + + def dKdiag_dX(self,X,target): + pass + + def _K_computations(self,X,X2): + if not (np.all(X==self._X) and np.all(X2==self._X2)): + self._X = X + self._X2 = X2 + if X2 is None: X2 = X + XXT = np.dot(X,X2.T) + if X is X2: + self._K_dist2 = (-2.*XXT + np.diag(XXT)[:,np.newaxis] + np.diag(XXT)[np.newaxis,:])/self.lengthscale2 + else: + self._K_dist2 = (-2.*XXT + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])/self.lengthscale2 + self._K_exponent = -0.5*self._K_dist2 + self._K_dvar = np.exp(-0.5*self._K_dist2) + +if __name__=='__main__': + #run some simple tests on the kernel (TODO:move these to unititest) + #TODO: these are broken in this new structure! + N = 10 + M = 5 + Q = 3 + + Z = np.random.randn(M,Q) + mu = np.random.randn(N,Q) + S = np.random.rand(N,Q) + + var = 2.5 + lengthscales = np.ones(Q)*0.7 + + k = rbf(Q,var,lengthscales) + + from checkgrad import checkgrad + + def k_theta_test(param,k): + k.set_param(param) + K = k.K(Z) + dK_dtheta = k.dK_dtheta(Z) + f = np.sum(K) + df = dK_dtheta.sum(0).sum(0) + return f,np.array(df) + print "dk_dtheta_test" + checkgrad(k_theta_test,np.random.randn(1+Q),args=(k,)) + + + def psi1_mu_test(mu,k): + mu = mu.reshape(N,Q) + f = np.sum(k.psi1(Z,mu,S)) + df = k.dpsi1_dmuS(Z,mu,S)[0].sum(1) + return f,df.flatten() + print "psi1_mu_test" + checkgrad(psi1_mu_test,np.random.randn(N*Q),args=(k,)) + + def psi1_S_test(S,k): + S = S.reshape(N,Q) + f = np.sum(k.psi1(Z,mu,S)) + df = k.dpsi1_dmuS(Z,mu,S)[1].sum(1) + return f,df.flatten() + print "psi1_S_test" + checkgrad(psi1_S_test,np.random.rand(N*Q),args=(k,)) + + def psi1_theta_test(theta,k): + k.set_param(theta) + f = np.sum(k.psi1(Z,mu,S)) + df = np.array([np.sum(grad) for grad in k.dpsi1_dtheta(Z,mu,S)]) + return f,df + print "psi1_theta_test" + checkgrad(psi1_theta_test,np.random.rand(1+Q),args=(k,)) + + + def psi2_mu_test(mu,k): + mu = mu.reshape(N,Q) + f = np.sum(k.psi2(Z,mu,S)) + df = k.dpsi2_dmuS(Z,mu,S)[0].sum(1).sum(1) + return f,df.flatten() + print "psi2_mu_test" + checkgrad(psi2_mu_test,np.random.randn(N*Q),args=(k,)) + + def psi2_S_test(S,k): + S = S.reshape(N,Q) + f = np.sum(k.psi2(Z,mu,S)) + df = k.dpsi2_dmuS(Z,mu,S)[1].sum(1).sum(1) + return f,df.flatten() + print "psi2_S_test" + checkgrad(psi2_S_test,np.random.rand(N*Q),args=(k,)) + + def psi2_theta_test(theta,k): + k.set_param(theta) + f = np.sum(k.psi2(Z,mu,S)) + df = np.array([np.sum(grad) for grad in k.dpsi2_dtheta(Z,mu,S)]) + return f,df + print "psi2_theta_test" + checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,)) diff --git a/GPy/kern/rbf_ARD.py b/GPy/kern/rbf_ARD.py new file mode 100644 index 00000000..8e29b23a --- /dev/null +++ b/GPy/kern/rbf_ARD.py @@ -0,0 +1,234 @@ +from kernpart import kernpart +import numpy as np +import hashlib + +class rbf_ARD(kernpart): + def __init__(self,D,variance=1.,lengthscales=None): + """ + Arguments + ---------- + D: int - the number of input dimensions + variance: float + lengthscales : np.ndarray of shape (D,) + """ + self.D = D + if lengthscales is not None: + assert lengthscales.shape==(self.D,) + else: + lengthscales = np.ones(self.D) + self.Nparam = self.D + 1 + self.name = 'rbf_ARD' + self.set_param(np.hstack((variance,lengthscales))) + + #initialize cache + self._Z, self._mu, self._S = np.empty(shape=(3,1)) + self._X, self._X2, self._params = np.empty(shape=(3,1)) + + def get_param(self): + return np.hstack((self.variance,self.lengthscales)) + def set_param(self,x): + assert x.size==(self.D+1) + self.variance = x[0] + self.lengthscales = x[1:] + self.lengthscales2 = np.square(self.lengthscales) + #reset cached results + self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S + def get_param_names(self): + if self.D==1: + return ['variance','lengthscale'] + else: + return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)] + def K(self,X,X2,target): + self._K_computations(X,X2) + np.add(self.variance*self._K_dvar, target,target) + def Kdiag(self,X,target): + np.add(target,self.variance,target) + def dK_dtheta(self,X,X2,target): + """Return shape is NxMx(Ntheta)""" + self._K_computations(X,X2) + dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscales + np.add(target[:,:,0],self._K_dvar, target[:,:,0]) + np.add(target[:,:,1:],dl, target[:,:,1:]) + def dKdiag_dtheta(self,X,target): + np.add(target[:,0],1.,target[:,0]) + def dK_dX(self,X,X2,target): + self._K_computations(X,X2) + dZ = self.variance*self._K_dvar[:,:,None]*self._K_dist/self.lengthscales2 + np.add(target,-dZ.transpose(1,0,2),target) + def psi0(self,Z,mu,S,target): + np.add(target, self.variance, target) + def dpsi0_dtheta(self,Z,mu,S,target): + target[:,0] += 1. + def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S): + pass + def psi1(self,Z,mu,S,target): + """Think N,M,Q """ + self._psi_computations(Z,mu,S) + np.add(target, self._psi1,target) + + def dpsi1_dtheta(self,Z,mu,S,target): + """N,Q,(Ntheta)""" + self._psi_computations(Z,mu,S) + denom_deriv = S[:,None,:]/(self.lengthscales**3+self.lengthscales*S[:,None,:]) + d_length = self._psi1[:,:,None]*(self.lengthscales*np.square(self._psi1_dist/(self.lengthscales2+S[:,None,:])) + denom_deriv) + target[:,:,0] += self._psi1/self.variance + target[:,:,1:] += d_length + def dpsi1_dZ(self,Z,mu,S,target): + self._psi_computations(Z,mu,S) + np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target) + + def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S): + """return shapes are N,M,Q""" + self._psi_computations(Z,mu,S) + tmp = self._psi1[:,:,None]/self.lengthscales2/self._psi1_denom + np.add(target_mu,tmp*self._psi1_dist,target_mu) + np.add(target_S, 0.5*tmp*(self._psi1_dist_sq-1), target_S) + + def psi2(self,Z,mu,S,target): + """shape N,M,M""" + self._psi_computations(Z,mu,S) + np.add(target, self._psi2,target) + + def dpsi2_dtheta(self,Z,mu,S,target): + """Shape N,M,M,Ntheta""" + self._psi_computations(Z,mu,S) + d_var = np.sum(2.*self._psi2/self.variance,0) + d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscales2)/(self.lengthscales*self._psi2_denom) + d_length = d_length.sum(0) + target[:,:,0] += d_var + target[:,:,1:] += d_length + + def dpsi2_dZ(self,Z,mu,S,target): + """Returns shape N,M,M,Q""" + self._psi_computations(Z,mu,S) + dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) + target += dZ + + def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S): + """Think N,M,M,Q """ + self._psi_computations(Z,mu,S) + tmp = self._psi2[:,:,:,None]/self.lengthscales2/self._psi2_denom + np.add(target_mu, -tmp*(2.*self._psi2_mudist),target_mu) #N,M,M,Q + np.add(target_S, tmp*(2.*self._psi2_mudist_sq-1), target_S) #N,M,M,Q + + def _K_computations(self,X,X2): + if not (np.all(X==self._X) and np.all(X2==self._X2)): + self._X = X + self._X2 = X2 + if X2 is None: X2 = X + self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy + self._params = np.empty(shape=(1,0))#ensure the next section gets called + if not np.all(self._params == self.get_param()): + self._params == self.get_param() + self._K_dist2 = np.square(self._K_dist/self.lengthscales) + self._K_exponent = -0.5*self._K_dist2.sum(-1) + self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1)) + + def _psi_computations(self,Z,mu,S): + #here are the "statistics" for psi1 and psi2 + if not np.all(Z==self._Z): + #Z has changed, compute Z specific stuff + self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q + self._psi2_Zdist = Z[:,None,:]-Z[None,:,:] # M,M,Q + self._psi2_Zdist_sq = np.square(self._psi2_Zdist)/self.lengthscales2 # M,M,Q + self._Z = Z + + if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)): + #something's changed. recompute EVERYTHING + + #psi1 + self._psi1_denom = S[:,None,:]/self.lengthscales2 + 1. + self._psi1_dist = Z[None,:,:]-mu[:,None,:] + self._psi1_dist_sq = np.square(self._psi1_dist)/self.lengthscales2/self._psi1_denom + self._psi1_exponent = -0.5*np.sum(self._psi1_dist_sq+np.log(self._psi1_denom),-1) + self._psi1 = self.variance*np.exp(self._psi1_exponent) + + #psi2 + self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscales2+1. # N,M,M,Q + self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q + self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscales2*self._psi2_denom) + self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M + self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M + + self._Z, self._mu, self._S = Z, mu,S + + +if __name__=='__main__': + #run some simple tests on the kernel (TODO:move these to unititest) + #TODO: these are broken in this new structure! + N = 10 + M = 5 + Q = 3 + + Z = np.random.randn(M,Q) + mu = np.random.randn(N,Q) + S = np.random.rand(N,Q) + + var = 2.5 + lengthscales = np.ones(Q)*0.7 + + k = rbf(Q,var,lengthscales) + + from checkgrad import checkgrad + + def k_theta_test(param,k): + k.set_param(param) + K = k.K(Z) + dK_dtheta = k.dK_dtheta(Z) + f = np.sum(K) + df = dK_dtheta.sum(0).sum(0) + return f,np.array(df) + print "dk_dtheta_test" + checkgrad(k_theta_test,np.random.randn(1+Q),args=(k,)) + + + def psi1_mu_test(mu,k): + mu = mu.reshape(N,Q) + f = np.sum(k.psi1(Z,mu,S)) + df = k.dpsi1_dmuS(Z,mu,S)[0].sum(1) + return f,df.flatten() + print "psi1_mu_test" + checkgrad(psi1_mu_test,np.random.randn(N*Q),args=(k,)) + + def psi1_S_test(S,k): + S = S.reshape(N,Q) + f = np.sum(k.psi1(Z,mu,S)) + df = k.dpsi1_dmuS(Z,mu,S)[1].sum(1) + return f,df.flatten() + print "psi1_S_test" + checkgrad(psi1_S_test,np.random.rand(N*Q),args=(k,)) + + def psi1_theta_test(theta,k): + k.set_param(theta) + f = np.sum(k.psi1(Z,mu,S)) + df = np.array([np.sum(grad) for grad in k.dpsi1_dtheta(Z,mu,S)]) + return f,df + print "psi1_theta_test" + checkgrad(psi1_theta_test,np.random.rand(1+Q),args=(k,)) + + + def psi2_mu_test(mu,k): + mu = mu.reshape(N,Q) + f = np.sum(k.psi2(Z,mu,S)) + df = k.dpsi2_dmuS(Z,mu,S)[0].sum(1).sum(1) + return f,df.flatten() + print "psi2_mu_test" + checkgrad(psi2_mu_test,np.random.randn(N*Q),args=(k,)) + + def psi2_S_test(S,k): + S = S.reshape(N,Q) + f = np.sum(k.psi2(Z,mu,S)) + df = k.dpsi2_dmuS(Z,mu,S)[1].sum(1).sum(1) + return f,df.flatten() + print "psi2_S_test" + checkgrad(psi2_S_test,np.random.rand(N*Q),args=(k,)) + + def psi2_theta_test(theta,k): + k.set_param(theta) + f = np.sum(k.psi2(Z,mu,S)) + df = np.array([np.sum(grad) for grad in k.dpsi2_dtheta(Z,mu,S)]) + return f,df + print "psi2_theta_test" + checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,)) + + diff --git a/GPy/kern/spline.py b/GPy/kern/spline.py new file mode 100644 index 00000000..dc68f340 --- /dev/null +++ b/GPy/kern/spline.py @@ -0,0 +1,54 @@ +from kernpart import kernpart +import numpy as np +import hashlib +def theta(x): + """Heavisdie step function""" + return np.where(x>=0.,1.,0.) + +class spline(kernpart): + """ + Spline kernel + + :param D: the number of input dimensions (fixed to 1 right now TODO) + :type D: int + :param variance: the variance of the kernel + :type variance: float + + """ + + def __init__(self,D,variance=1.,lengthscale=1.): + self.D = D + assert self.D==1 + self.Nparam = 1 + self.name = 'spline' + self.set_param(np.squeeze(variance)) + + def get_param(self): + return self.variance + + def set_param(self,x): + self.variance = x + + def get_param_names(self): + return ['variance'] + + def K(self,X,X2,target): + assert np.all(X>0), "Spline covariance is for +ve domain only. TODO: symmetrise" + assert np.all(X2>0), "Spline covariance is for +ve domain only. TODO: symmetrise" + t = X + s = X2.T + s_t = s-t # broadcasted subtraction + target += self.variance*(0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.) + + def Kdiag(self,X,target): + target += self.variance*X.flatten()**3/3. + + def dK_dtheta(self,X,X2,target): + target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6. + + def dKdiag_dtheta(self,X,target): + target += X.flatten()**3/3. + + def dKdiag_dX(self,X,target): + target += self.variance*X**2 + diff --git a/GPy/kern/white.py b/GPy/kern/white.py new file mode 100644 index 00000000..147c2fd1 --- /dev/null +++ b/GPy/kern/white.py @@ -0,0 +1,82 @@ +from kernpart import kernpart +import numpy as np +class white(kernpart): + """ + White noise kernel. + + :param D: the number of input dimensions + :type D: int + :param variance: + :type variance: float + """ + def __init__(self,D,variance=1.): + self.D = D + self.Nparam = 1. + self.name = 'white' + self.set_param(np.array([variance]).flatten()) + + def get_param(self): + return self.variance + + def set_param(self,x): + assert x.shape==(1,) + self.variance = x + + def get_param_names(self): + return ['variance'] + + def K(self,X,X2,target): + if X.shape==X2.shape: + if np.all(X==X2): + np.add(target,np.eye(X.shape[0])*self.variance,target) + + def Kdiag(self,X,target): + target += self.variance + + def dK_dtheta(self,X,X2,target): + if X.shape==X2.shape: + if np.all(X==X2): + np.add(target[:,:,0],np.eye(X.shape[0]),target[:,:,0]) + + def dKdiag_dtheta(self,X,target): + np.add(target[:,0],1.,target[:,0]) + + def dK_dX(self,X,X2,target): + pass + + def dKdiag_dX(self,X,target): + pass + + def psi0(self,Z,mu,S,target): + target += self.variance + + def dpsi0_dtheta(self,Z,mu,S,target): + target += 1. + + def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S): + pass + + def psi1(self,Z,mu,S,target): + pass + + def dpsi1_dtheta(self,Z,mu,S,target): + pass + + def dpsi1_dZ(self,Z,mu,S,target): + pass + + def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S): + pass + + def psi2(self,Z,mu,S,target): + pass + + def dpsi2_dZ(self,Z,mu,S,target): + pass + + def dpsi2_dtheta(self,Z,mu,S,target): + pass + + def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S): + pass +