This commit is contained in:
Nicolo Fusi 2012-11-29 16:31:48 +00:00
parent e7a9a6a2fa
commit 63321e8409
18 changed files with 1861 additions and 0 deletions

6
GPy/__init__.py Normal file
View file

@ -0,0 +1,6 @@
import kern
import models
import inference
import util
import examples
from core import priors

56
GPy/kern/Brownian.py Normal file
View file

@ -0,0 +1,56 @@
from kernpart import kernpart
import numpy as np
def theta(x):
"""Heavisdie step function"""
return np.where(x>=0.,1.,0.)
class Brownian(kernpart):
"""
Brownian Motion kernel.
:param D: the number of input dimensions
:type D: int
:param variance:
:type variance: float
"""
def __init__(self,D,variance=1.):
self.D = D
assert self.D==1, "Brownian motion in 1D only"
self.Nparam = 1.
self.name = 'Brownian'
self.set_param(np.array([variance]).flatten())
def get_param(self):
return self.variance
def set_param(self,x):
assert x.shape==(1,)
self.variance = x
def get_param_names(self):
return ['variance']
def K(self,X,X2,target):
target += self.variance*np.fmin(X,X2.T)
def Kdiag(self,X,target):
target += self.variance*X.flatten()
def dK_dtheta(self,X,X2,target):
target += np.fmin(X,X2.T)
def dKdiag_dtheta(self,X,target):
target += X.flatten()
def dK_dX(self,X,X2,target):
target += self.variance
target -= self.variance*theta(X-X2.T)
if X.shape==X2.shape:
if np.all(X==X2):
np.add(target[:,:,0],self.variance*np.diag(X2.flatten()-X.flatten()),target[:,:,0])
def dKdiag_dX(self,X,target):
target += self.variance

View file

@ -0,0 +1,86 @@
from new import instancemethod
#this is here so that we can have nice syntatic sugar when taking gradients
# See the transform_gradients decorator in kern
class DelayedDecorator(object):
"""Wrapper that delays decorating a function until it is invoked.
This class allows a decorator to be used with both ordinary functions and
methods of classes. It wraps the function passed to it with the decorator
passed to it, but with some special handling:
- If the wrapped function is an ordinary function, it will be decorated
the first time it is called.
- If the wrapped function is a method of a class, it will be decorated
separately the first time it is called on each instance of the class.
It will also be decorated separately the first time it is called as
an unbound method of the class itself (though this use case should
be rare).
"""
def __init__(self, deco, func):
# The base decorated function (which may be modified, see below)
self._func = func
# The decorator that will be applied
self._deco = deco
# Variable to monitor calling as an ordinary function
self.__decofunc = None
# Variable to monitor calling as an unbound method
self.__clsfunc = None
def _decorated(self, cls=None, instance=None):
"""Return the decorated function.
This method is for internal use only; it can be implemented by
subclasses to modify the actual decorated function before it is
returned. The ``cls`` and ``instance`` parameters are supplied so
this method can tell how it was invoked. If it is not overridden,
the base function stored when this class was instantiated will
be decorated by the decorator passed when this class was instantiated,
and then returned.
Note that factoring out this method, in addition to allowing
subclasses to modify the decorated function, ensures that the
right thing is done automatically when the decorated function
itself is a higher-order function (e.g., a generator function).
Since this method is called every time the decorated function
is accessed, a new instance of whatever it returns will be
created (e.g., a new generator will be realized), which is
exactly the expected semantics.
"""
return self._deco(self._func)
def __call__(self, *args, **kwargs):
"""Direct function call syntax support.
This makes an instance of this class work just like the underlying
decorated function when called directly as an ordinary function.
An internal reference to the decorated function is stored so that
future direct calls will get the stored function.
"""
if not self.__decofunc:
self.__decofunc = self._decorated()
return self.__decofunc(*args, **kwargs)
def __get__(self, instance, cls):
"""Descriptor protocol support.
This makes an instance of this class function correctly when it
is used to decorate a method on a user-defined class. If called
as a bound method, we store the decorated function in the instance
dictionary, so we will not be called again for that instance. If
called as an unbound method, we store a reference to the decorated
function internally and use it on future unbound method calls.
"""
if instance:
deco = instancemethod(self._decorated(cls, instance), instance, cls)
# This prevents us from being called again for this instance
setattr(instance, self._func.__name__, deco)
elif cls:
if not self.__clsfunc:
self.__clsfunc = instancemethod(self._decorated(cls), None, cls)
deco = self.__clsfunc
else:
raise ValueError("Must supply instance or class to descriptor.")
return deco

107
GPy/kern/Matern32.py Normal file
View file

@ -0,0 +1,107 @@
from kernpart import kernpart
import numpy as np
import hashlib
from ..util.linalg import pdinv,mdot
from scipy import integrate
class Matern32(kernpart):
"""
Matern 3/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param D: the number of input dimensions
:type D: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the lengthscales :math:`\ell_i`
:type lengthscale: np.ndarray of size (D,)
:rtype: kernel object
"""
def __init__(self,D,variance=1.,lengthscales=None):
self.D = D
if lengthscales is not None:
assert lengthscales.shape==(self.D,)
else:
lengthscales = np.ones(self.D)
self.Nparam = self.D + 1
self.name = 'Mat32'
self.set_param(np.hstack((variance,lengthscales)))
def get_param(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscales))
def set_param(self,x):
"""set the value of the parameters."""
assert x.size==(self.D+1)
self.variance = x[0]
self.lengthscales = x[1:]
def get_param_names(self):
"""return parameter names."""
if self.D==1:
return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
np.add(self.variance*(1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist), target,target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target)
def Gram_matrix(self,F,F1,F2,lower,upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.D == 1
def L(x,i):
return(3./self.lengthscales**2*F[i](x) + 2*np.sqrt(3)/self.lengthscales*F1[i](x) + F2[i](x))
n = F.shape[0]
G = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
Flower = np.array([f(lower) for f in F])[:,None]
F1lower = np.array([f(lower) for f in F1])[:,None]
#print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
#return(G)
return(self.lengthscales**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscales**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
def dK_dtheta(self,X,X2,target):
"""derivative of the cross-covariance matrix with respect to the parameters (shape is NxMxNparam)"""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
dvar = (1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist)
invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
np.add(target[:,:,0],dvar, target[:,:,0])
np.add(target[:,:,1:],dl, target[:,:,1:])
def dKdiag_dtheta(self,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters (shape is NxNparam)"""
np.add(target[:,0],1.,target[:,0])
def dK_dX(self,X,X2,target):
"""derivative of the covariance matrix with respect to X (*! shape is NxMxD !*)."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
target += - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2))
def dKdiag_dX(self,X,target):
pass

112
GPy/kern/Matern52.py Normal file
View file

@ -0,0 +1,112 @@
from kernpart import kernpart
import numpy as np
import hashlib
from scipy import integrate
class Matern52(kernpart):
"""
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param D: the number of input dimensions
:type D: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the lengthscales :math:`\ell_i`
:type lengthscale: np.ndarray of size (D,)
:rtype: kernel object
"""
def __init__(self,D,variance=1.,lengthscales=None):
self.D = D
if lengthscales is not None:
assert lengthscales.shape==(self.D,)
else:
lengthscales = np.ones(self.D)
self.Nparam = self.D + 1
self.name = 'Mat52'
self.set_param(np.hstack((variance,lengthscales)))
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
def get_param(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscales))
def set_param(self,x):
"""set the value of the parameters."""
assert x.size==(self.D+1)
self.variance = x[0]
self.lengthscales = x[1:]
self.lengthscales2 = np.square(self.lengthscales)
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
def get_param_names(self):
"""return parameter names."""
if self.D==1:
return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target)
def Gram_matrix(self,F,F1,F2,F3,lower,upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param F3: vector of third derivatives of F
:type F3: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.D == 1
def L(x,i):
return(5*np.sqrt(5)/self.lengthscales**3*F[i](x) + 15./self.lengthscales**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscales*F2[i](x) + F3[i](x))
n = F.shape[0]
G = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
G_coef = 3.*self.lengthscales**5/(400*np.sqrt(5))
Flower = np.array([f(lower) for f in F])[:,None]
F1lower = np.array([f(lower) for f in F1])[:,None]
F2lower = np.array([f(lower) for f in F2])[:,None]
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscales**4/200*np.dot(F2lower,F2lower.T)
orig2 = 3./5*self.lengthscales**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
return(1./self.variance* (G_coef*G + orig + orig2))
def dK_dtheta(self,X,X2,target):
"""derivative of the cross-covariance matrix with respect to the parameters (shape is NxMxNparam)"""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
np.add(target[:,:,0],dvar, target[:,:,0])
np.add(target[:,:,1:],dl, target[:,:,1:])
def dKdiag_dtheta(self,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters (shape is NxNparam)"""
np.add(target[:,0],1.,target[:,0])
def dK_dX(self,X,X2,target):
"""derivative of the covariance matrix with respect to X (*! shape is NxMxD !*)."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
target += - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
def dKdiag_dX(self,X,target):
pass

2
GPy/kern/__init__.py Normal file
View file

@ -0,0 +1,2 @@
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD
from kern import kern

47
GPy/kern/bias.py Normal file
View file

@ -0,0 +1,47 @@
from kernpart import kernpart
import numpy as np
import hashlib
class bias(kernpart):
def __init__(self,D,variance=1.):
"""
Arguments
----------
D: int - the number of input dimensions
variance: float
"""
self.D = D
self.Nparam = 1
self.name = 'bias'
self.set_param(np.array([variance]).flatten())
def get_param(self):
return self.variance
def set_param(self,x):
assert x.shape==(1,)
self.variance = x
def get_param_names(self):
return ['variance']
def K(self,X,X2,target):
if X2 is None: X2 = X
np.add(self.variance, target,target)
def Kdiag(self,X,target):
np.add(target,self.variance,target)
def dK_dtheta(self,X,X2,target):
"""Return shape is NxMx(Ntheta)"""
if X2 is None: X2 = X
np.add(target[:,:,0],1., target[:,:,0])
def dKdiag_dtheta(self,X,target):
np.add(target[:,0],1.,target[:,0])
def dK_dX(self, X, X2, target):
pass
def dKdiag_dX(self,X,target):
pass

168
GPy/kern/constructors.py Normal file
View file

@ -0,0 +1,168 @@
import numpy as np
from kern import kern
from rbf import rbf as rbfpart
from rbf_ARD import rbf_ARD as rbf_ARD_part
from white import white as whitepart
from linear import linear as linearpart
from linear_ARD import linear_ARD as linear_ARD_part
from exponential import exponential as exponentialpart
from Matern32 import Matern32 as Matern32part
from Matern52 import Matern52 as Matern52part
from bias import bias as biaspart
from finite_dimensional import finite_dimensional as finite_dimensionalpart
from spline import spline as splinepart
from Brownian import Brownian as Brownianpart
#TODO these s=constructors are not as clean as we'd like. Tidy the code up
#using meta-classes to make the objects construct properly wthout them.
def rbf(D,variance=1., lengthscale=1.):
"""
Construct an RBF kernel
:param D: dimensionality of the kernel, obligatory
:type D: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
"""
part = rbfpart(D,variance,lengthscale)
return kern(D, [part])
def rbf_ARD(D,variance=1., lengthscales=None):
"""
Construct an RBF kernel with Automatic Relevance Determination (ARD)
:param D: dimensionality of the kernel, obligatory
:type D: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscales: the lengthscales of the kernel
:type lengthscales: None|np.ndarray
"""
part = rbf_ARD_part(D,variance,lengthscales)
return kern(D, [part])
def linear(D,lengthscales=None):
"""
Construct a linear kernel.
Arguments
---------
D (int), obligatory
lengthscales (np.ndarray)
"""
part = linearpart(D,lengthscales)
return kern(D, [part])
def linear_ARD(D,lengthscales=None):
"""
Construct a linear ARD kernel.
Arguments
---------
D (int), obligatory
lengthscales (np.ndarray)
"""
part = linear_ARD_part(D,lengthscales)
return kern(D, [part])
def white(D,variance=1.):
"""
Construct a white kernel.
Arguments
---------
D (int), obligatory
variance (float)
"""
part = whitepart(D,variance)
return kern(D, [part])
def exponential(D,variance=1., lengthscales=None):
"""
Construct a exponential kernel.
Arguments
---------
D (int), obligatory
variance (float)
lengthscales (np.ndarray)
"""
part = exponentialpart(D,variance, lengthscales)
return kern(D, [part])
def Matern32(D,variance=1., lengthscales=None):
"""
Construct a Matern 3/2 kernel.
Arguments
---------
D (int), obligatory
variance (float)
lengthscales (np.ndarray)
"""
part = Matern32part(D,variance, lengthscales)
return kern(D, [part])
def Matern52(D,variance=1., lengthscales=None):
"""
Construct a Matern 5/2 kernel.
Arguments
---------
D (int), obligatory
variance (float)
lengthscales (np.ndarray)
"""
part = Matern52part(D,variance, lengthscales)
return kern(D, [part])
def bias(D,variance=1.):
"""
Construct a bias kernel.
Arguments
---------
D (int), obligatory
variance (float)
"""
part = biaspart(D,variance)
return kern(D, [part])
def finite_dimensional(D,F,G,variances=1.,weights=None):
"""
Construct a finite dimensional kernel.
D: int - the number of input dimensions
F: np.array of functions with shape (n,) - the n basis functions
G: np.array with shape (n,n) - the Gram matrix associated to F
variances : np.ndarray with shape (n,)
"""
part = finite_dimensionalpart(D,F,G,variances,weights)
return kern(D, [part])
def spline(D,variance=1.):
"""
Construct a spline kernel.
:param D: Dimensionality of the kernel
:type D: int
:param variance: the variance of the kernel
:type variance: float
"""
part = splinepart(D,variance)
return kern(D, [part])
def Brownian(D,variance=1.):
"""
Construct a Brownian motion kernel.
:param D: Dimensionality of the kernel
:type D: int
:param variance: the variance of the kernel
:type variance: float
"""
part = Brownianpart(D,variance)
return kern(D, [part])

104
GPy/kern/exponential.py Normal file
View file

@ -0,0 +1,104 @@
from kernpart import kernpart
import numpy as np
import hashlib
from scipy import integrate
class exponential(kernpart):
"""
Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2)
.. math::
k(r) = \sigma^2 \exp(- r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param D: the number of input dimensions
:type D: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the lengthscales :math:`\ell_i`
:type lengthscale: np.ndarray of size (D,)
:rtype: kernel object
"""
def __init__(self,D,variance=1.,lengthscales=None):
self.D = D
if lengthscales is not None:
assert lengthscales.shape==(self.D,)
else:
lengthscales = np.ones(self.D)
self.Nparam = self.D + 1
self.name = 'exp'
self.set_param(np.hstack((variance,lengthscales)))
def get_param(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscales))
def set_param(self,x):
"""set the value of the parameters."""
assert x.size==(self.D+1)
self.variance = x[0]
self.lengthscales = x[1:]
def get_param_names(self):
"""return parameter names."""
if self.D==1:
return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
np.add(self.variance*np.exp(-dist), target,target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target)
def Gram_matrix(self,F,F1,lower,upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.D == 1
def L(x,i):
return(1./self.lengthscales*F[i](x) + F1[i](x))
n = F.shape[0]
G = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
Flower = np.array([f(lower) for f in F])[:,None]
return(self.lengthscales/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T))
def dK_dtheta(self,X,X2,target):
"""derivative of the cross-covariance matrix with respect to the parameters (shape is NxMxNparam)"""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
dvar = np.exp(-dist)
dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,np.newaxis]
np.add(target[:,:,0],dvar, target[:,:,0])
np.add(target[:,:,1:],dl, target[:,:,1:])
def dKdiag_dtheta(self,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters (shape is NxNparam)"""
np.add(target[:,0],1.,target[:,0])
def dK_dX(self,X,X2,target):
"""derivative of the covariance matrix with respect to X (*! shape is NxMxD !*)."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
target += - np.transpose(self.variance*np.exp(-dist)*ddist_dX,(1,0,2))
def dKdiag_dX(self,X,target):
pass

View file

@ -0,0 +1,70 @@
from kernpart import kernpart
import numpy as np
from ..util.linalg import pdinv,mdot
class finite_dimensional(kernpart):
def __init__(self, D, F, G, variance=1., weights=None):
"""
Argumnents
----------
D: int - the number of input dimensions
F: np.array of functions with shape (n,) - the n basis functions
G: np.array with shape (n,n) - the Gram matrix associated to F
weights : np.ndarray with shape (n,)
"""
self.D = D
self.F = F
self.G = G
self.G_1 ,tmp = pdinv(G)
self.n = F.shape[0]
if weights is not None:
assert weights.shape==(self.n,)
else:
weights = np.ones(self.n)
self.Nparam = self.n + 1
self.name = 'finite_dim'
self.set_param(np.hstack((variance,weights)))
def get_param(self):
return np.hstack((self.variance,self.weights))
def set_param(self,x):
assert x.size == (self.Nparam)
self.variance = x[0]
self.weights = x[1:]
def get_param_names(self):
if self.n==1:
return ['variance','weight']
else:
return ['variance']+['weight_%i'%i for i in range(self.weights.size)]
def K(self,X,X2,target):
if X2 is None: X2 = X
FX = np.column_stack([f(X) for f in self.F])
FX2 = np.column_stack([f(X2) for f in self.F])
product = self.variance * mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
np.add(product,target,target)
def Kdiag(self,X,target):
product = np.diag(self.K(X,X2))
np.add(target,product,target)
def dK_dtheta(self,X,X2,target):
"""Return shape is NxMx(Ntheta)"""
if X2 is None: X2 = X
FX = np.column_stack([f(X) for f in self.F])
FX2 = np.column_stack([f(X2) for f in self.F])
DER = np.zeros((self.n,self.n,self.n))
for i in range(self.n):
DER[i,i,i] = np.sqrt(self.weights[i])
dw = self.variance * mdot(FX,DER,self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
dv = mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
np.add(target[:,:,0],np.transpose(dv,(0,2,1)), target[:,:,0])
np.add(target[:,:,1:],np.transpose(dw,(0,2,1)), target[:,:,1:])
def dKdiag_dtheta(self,X,target):
np.add(target[:,0],1.,target[:,0])

308
GPy/kern/kern.py Normal file
View file

@ -0,0 +1,308 @@
import numpy as np
from ..core.parameterised import parameterised
from DelayedDecorator import DelayedDecorator
from functools import partial
from kernpart import kernpart
class transform_gradients_(object):
"""
A decorator for gradient transformation. Accounts for constraining and tying.
NB: this uses the Delayed_decorator class so allow it to decorate bound methods
"""
def __init__(self,f):
self.f = f
def __call__(self,*args,**kwargs):
kern = args[0]
x = kern.get_param()
g = self.f(*args,**kwargs)
#first roll the axes of gradients. This is because we always want to acces the first axis,
#else slicing becomes more difficult
g = np.rollaxis(g,-1)
##TODO: the shape of the gradients is slightly problematic
if len(g.shape)==2:
x_reshape = (-1,1)
elif len(g.shape)==3:
x_reshape = (-1,1,1)
else:
raise NotImplementedError
#do the simple transforms first
np.multiply(g[kern.constrained_positive_indices], x[kern.constrained_positive_indices].reshape(x_reshape),g[kern.constrained_positive_indices])
np.multiply(g[kern.constrained_negative_indices], x[kern.constrained_negative_indices].reshape(x_reshape),g[kern.constrained_negative_indices])
[np.multiply(g[i], ((x[i]-l)*(h-x[i])/(h-l)).reshape(x_reshape), g[i]) for i,l,h in zip(kern.constrained_bounded_indices, kern.constrained_bounded_lowers, kern.constrained_bounded_uppers)]
#work out which gradients need to be added (tieing)
[np.sum(g[i],g[i[0]], axis=0) for i in kern.tied_indices]
#work out which gradients are simply cut out (due to fixing)
if len(kern.tied_indices) or len(kern.constrained_fixed_indices):
to_remove = np.hstack((kern.constrained_fixed_indices+[t[1:] for t in kern.tied_indices]))
g = np.delete(g,to_remove,axis=0)
return np.swapaxes(np.rollaxis(g,0,-1),-1,-2) # undo the rolling of the axes
transform_gradients = partial(DelayedDecorator,transform_gradients_)
#transform_param = partial(DelayedDecorator,_transform_param) TODO
class kern(parameterised):
def __init__(self,D,parts=[], input_slices=None):
"""
This kernel does 'compound' structures.
The compund structure enables many features of GPy, including
- Hierarchical models
- Correleated output models
- multi-view learning
Hadamard product and outer-product kernels will require a new class TODO.
:param D: The dimensioality of the kernel's input space
:type D: int
:param parts: the 'parts' (PD functions) of the kernel
:type parts: list of kernpart objects
:param input_slices: the slices on the inputs which apply to each kernel
:type input_slices: list of slice objects, or list of bools
"""
self.parts = parts
self.Nparts = len(parts)
self.Nparam = sum([p.Nparam for p in self.parts])
self.D = D
#deal with input_slices
if input_slices is None:
self.input_slices = [slice(None) for p in self.parts]
else:
assert len(input_slices)==len(self.parts)
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
for p in self.parts:
assert isinstance(p,kernpart), "bad kernel part"
self.compute_param_slices()
parameterised.__init__(self)
#TODO: Xslices can be passed so that we can compute hierarchical and strutured kernels. these also need implementing for the psi statistics (straightforward)
def compute_param_slices(self):
"""create a set of slices that can index the parameters of each part"""
self.param_slices = []
count = 0
for p in self.parts:
self.param_slices.append(slice(count,count+p.Nparam))
count += p.Nparam
def _process_slices(self,slices1=None,slices2=None):
"""
Format the slices so that they can easily be used.
Both slices can be any of three things:
- If None, the new points covary through every kernel part (default)
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
- If a list of booleans, specifying which kernel parts are active
if the second arg is False, return only slices1
returns actual lists of slice objects
"""
if slices1 is None:
slices1 = [slice(None)]*self.Nparts
elif all([type(s_i) is bool for s_i in slices1]):
slices1 = [slice(None) if s_i else slice(0) for s_i in slices1]
else:
assert all([type(s_i) is slice for s_i in slices1]), "invalid slice objects"
if slices2 is None:
slices2 = [slice(None)]*self.Nparts
elif slices2 is False:
return slices1
elif all([type(s_i) is bool for s_i in slices2]):
slices2 = [slice(None) if s_i else slice(0) for s_i in slices2]
else:
assert all([type(s_i) is slice for s_i in slices2]), "invalid slice objects"
return slices1, slices2
def __add__(self,other):
assert self.D == other.D
newkern = kern(self.D,self.parts+other.parts, self.input_slices + other.input_slices)
#transfer constraints:
newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices))
newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices))
newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices]
newkern.constrained_bounded_lowers = self.constrained_bounded_lowers + other.constrained_bounded_lowers
newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers
newkern.constrained_fixed_indices = self.constrained_fixed_indices + [self.Nparam + x for x in other.constrained_fixed_indices]
newkern.constrained_fixed_values = self.constrained_fixed_values + other.constrained_fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern
def add(self,other):
"""
Add another kernel to this one. Both kernels are defined on the same _space_
:param other: the other kernel to be added
:type other: GPy.kern
"""
return self + other
def add_orthogonal(self,other):
"""
Add another kernel to this one. Both kernels are defined on separate spaces
:param other: the other kernel to be added
:type other: GPy.kern
"""
#deal with input slices
D = self.D + other.D
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
other_input_slices = [slice(i[0]+self.D,i[1]+self.D,i[2]) for i in other_input_indices]
newkern = kern(D,self.parts+other.parts, self_input_slices + other_input_slices)
#transfer constraints:
newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices))
newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices))
newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices]
newkern.constrained_bounded_lowers = self.constrained_bounded_lowers + other.constrained_bounded_lowers
newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers
newkern.constrained_fixed_indices = self.constrained_fixed_indices + [self.Nparam + x for x in other.constrained_fixed_indices]
newkern.constrained_fixed_values = self.constrained_fixed_values + other.constrained_fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern
def get_param(self):
return np.hstack([p.get_param() for p in self.parts])
def set_param(self,x):
[p.set_param(x[s]) for p, s in zip(self.parts, self.param_slices)]
def get_param_names(self):
return sum([[k.name+'_'+str(i)+'_'+n for n in k.get_param_names()] for i,k in enumerate(self.parts)],[])
def K(self,X,X2=None,slices1=None,slices2=None):
assert X.shape[1]==self.D
slices1, slices2 = self._process_slices(slices1,slices2)
if X2 is None:
X2 = X
target = np.zeros((X.shape[0],X2.shape[0]))
[p.K(X[s1,i_s],X2[s2,i_s],target=target[s1,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
return target
#@transform_gradients
def dK_dtheta(self,X,X2=None,slices1=None,slices2=None):
"""Return shape is NxMx(Ntheta)"""
assert X.shape[1]==self.D
slices1, slices2 = self._process_slices(slices1,slices2)
if X2 is None:
X2 = X
target = np.zeros((X.shape[0],X2.shape[0],self.Nparam))
[p.dK_dtheta(X[s1,i_s],X2[s2,i_s],target[s1,s2,ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
return target
def dK_dX(self,X,X2=None,slices1=None,slices2=None):
if X2 is None:
X2 = X
slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros((X2.shape[0],X.shape[0],X.shape[1]))
[p.dK_dX(X[s1],X2[s2],target[s2,s1,:]) for p,ps,s1,s2 in zip(self.parts, self.param_slices,slices1,slices2)]
return target
def Kdiag(self,X,slices=None):
assert X.shape[1]==self.D
slices = self._process_slices(slices,False)
target = np.zeros(X.shape[0])
[p.Kdiag(X[s,i_s],target=target[s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
return target
#@transform_gradients
def dKdiag_dtheta(self,X,slices=None):
assert X.shape[1]==self.D
slices = self._process_slices(slices,False)
target = np.zeros((X.shape[0],self.Nparam))
[p.dKdiag_dtheta(X[s,i_s],target[s,ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)]
return target
def dKdiag_dX(self, X, slices=None):
assert X.shape[1]==self.D
slices = self._process_slices(slices,False)
target = np.zeros((X.shape[0],X.shape[0],X.shape[1]))
[p.dKdiag_dX(X[s,i_s],target[s,:,:]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
return target
def psi0(self,Z,mu,S,slices_mu=None,slices_Z=None):
target = np.zeros(mu.shape[0])
[p.psi0(Z,mu,S,target) for p in self.parts]
return target
#@transform_gradients
def dpsi0_dtheta(self,Z,mu,S):
target = np.zeros((mu.shape[0],self.Nparam))
[p.dpsi0_dtheta(Z,mu,S,target[s]) for p,s in zip(self.parts, self.param_slices)]
return target
def dpsi0_dmuS(self,Z,mu,S):
target_mu,target_S = np.zeros_like(mu),np.zeros_like(S)
[p.dpsi0_dmuS(Z,mu,S,target_mu,target_S) for p in self.parts]
return target_mu,target_S
def psi1(self,Z,mu,S):
"""Think N,M,Q """
target = np.zeros((mu.shape[0],Z.shape[0]))
[p.psi1(Z,mu,S,target=target) for p in self.parts]
return target
#@transform_gradients
def dpsi1_dtheta(self,Z,mu,S):
"""N,M,(Ntheta)"""
target = np.zeros((mu.shape[0],Z.shape[0],self.Nparam))
[p.dpsi1_dtheta(Z,mu,S,target[:,:,s]) for p,s in zip(self.parts, self.param_slices)]
return target
def dpsi1_dZ(self,Z,mu,S):
"""N,M,Q"""
target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[1]))
[p.dpsi1_dZ(Z,mu,S,target) for p in self.parts]
return target
def dpsi1_dmuS(self,Z,mu,S):
"""return shapes are N,M,Q"""
target_mu, target_S = np.zeros((2,mu.shape[0],Z.shape[0],Z.shape[1]))
[p.dpsi1_dmuS(Z,mu,S,target_mu=target_mu,target_S = target_S) for p in self.parts]
return target_mu, target_S
def psi2(self,Z,mu,S):
"""
:Z: np.ndarray of inducing inputs (M x Q)
: mu, S: np.ndarrays of means and variacnes (each N x Q)
:returns psi2: np.ndarray (N,M,M,Q) """
target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0]))
[p.psi2(Z,mu,S,target=target) for p in self.parts]
return target
#@transform_gradients
def dpsi2_dtheta(self,Z,mu,S):
"""Returns shape (N,M,M,Ntheta)"""
target = np.zeros((Z.shape[0],Z.shape[0],self.Nparam))
[p.dpsi2_dtheta(Z,mu,S,target[:,:,s]) for p,s in zip(self.parts, self.param_slices)]
return target
def dpsi2_dZ(self,Z,mu,S):
"""N,M,M,Q"""
target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0],Z.shape[1]))
[p.dpsi2_dZ(Z,mu,S,target) for p in self.parts]
return target
def dpsi2_dmuS(self,Z,mu,S):
"""return shapes are N,M,M,Q"""
target_mu, target_S = np.zeros((2,mu.shape[0],Z.shape[0],Z.shape[0],Z.shape[1]))
[p.dpsi2_dmuS(Z,mu,S,target_mu=target_mu,target_S = target_S) for p in self.parts]
#TODO: there are some extra terms to compute here!
return target_mu, target_S

53
GPy/kern/kernpart.py Normal file
View file

@ -0,0 +1,53 @@
class kernpart(object):
def __init__(self,D):
"""
The base class for a kernpart: a positive definite function which forms part of a kernel
:param D: the number of input dimensions to the function
:type D: int
"""
self.D = D
self.Nparam = 1.
self.name = 'white'
self.set_param(np.array([variance]).flatten())
def get_param(self):
raise NotImplementedError
def set_param(self,x):
raise NotImplementedError
def get_param_names(self):
raise NotImplementedError
def K(self,X,X2,target):
raise NotImplementedError
def Kdiag(self,X,target):
raise NotImplementedError
def dK_dtheta(self,X,X2,target):
raise NotImplementedError
def dKdiag_dtheta(self,X,target):
raise NotImplementedError
def psi0(self,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dtheta(self,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi1(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dtheta(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dZ(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi2(self,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dZ(self,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dtheta(self,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def dK_dX(self,X,X2,target):
raise NotImplementedError

103
GPy/kern/linear.py Normal file
View file

@ -0,0 +1,103 @@
from kernpart import kernpart
import numpy as np
class linear(kernpart):
"""
Linear kernel
:param D: the number of input dimensions
:type D: int
:param variance: variance
:type variance: None|float
"""
def __init__(self, D, variance=None):
self.D = D
if variance is None:
variance = 1.0
self.Nparam = 1
self.name = 'linear'
self.set_param(variance)
def get_param(self):
return self.variance
def set_param(self,x):
self.variance = x
def get_param_names(self):
return ['variance']
def K(self,X,X2,target):
target += self.variance * np.dot(X, X2.T)
def Kdiag(self,X,target):
np.add(target,np.sum(self.variance*np.square(X),-1),target)
def dK_dtheta(self,X,X2,target):
"""
Computes the derivatives wrt theta
Return shape is NxMx(Ntheta)
"""
if X2 is None: X2 = X
product = np.dot(X, X2.T)[:,:, None]#X[:,None,:]*X2[None,:,:]
target += product
def dK_dX(self,X,X2,target):
if X2 is None: X2 = X
np.add(target,X2[:,None,:]*self.variance,target)
# def psi0(self,Z,mu,S,target):
# expected = np.square(mu) + S
# np.add(target,np.sum(self.variance*expected),target)
# def dpsi0_dtheta(self,Z,mu,S,target):
# expected = np.square(mu) + S
# return -2.*np.sum(expected,0)
# def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
# np.add(target_mu,2*mu*self.variances,target_mu)
# np.add(target_S,self.variances,target_S)
# def dpsi0_dZ(self,Z,mu,S,target):
# pass
# def psi1(self,Z,mu,S,target):
# """the variance, it does nothing"""
# self.K(mu,Z,target)
# def dpsi1_dtheta(self,Z,mu,S,target):
# """the variance, it does nothing"""
# self.dK_dtheta(mu,Z,target)
# def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
# """Do nothing for S, it does not affect psi1"""
# np.add(target_mu,Z/self.variances2,target_mu)
# def dpsi1_dZ(self,Z,mu,S,target):
# self.dK_dX(mu,Z,target)
# def psi2(self,Z,mu,S,target):
# """Think N,M,M,Q """
# mu2_S = np.square(mu)+SN,Q,
# ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
# psi2 = ZZ*np.square(self.variances)*mu2_S
# np.add(target, psi2.sum(-1),target) M,M
# def dpsi2_dtheta(self,Z,mu,S,target):
# mu2_S = np.square(mu)+SN,Q,
# ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
# target += 2.*ZZ*mu2_S*self.variances
# def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
# """Think N,M,M,Q """
# mu2_S = np.sum(np.square(mu)+S,0)Q,
# ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
# tmp = ZZ*np.square(self.variances) M,M,Q
# np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) N,M,M,Q
# np.add(target_S, tmp, target_S) N,M,M,Q
# def dpsi2_dZ(self,Z,mu,S,target):
# mu2_S = np.sum(np.square(mu)+S,0)Q,
# target += Z[:,None,:]*np.square(self.variances)*mu2_S

114
GPy/kern/linear_ARD.py Normal file
View file

@ -0,0 +1,114 @@
from kernpart import kernpart
import numpy as np
class linear_ARD(kernpart):
"""
Linear ARD kernel
:param D: the number of input dimensions
:type D: int
:param variances: ARD variances
:type variances: None|np.ndarray
"""
def __init__(self,D,variances=None):
self.D = D
if variances is not None:
assert variances.shape==(self.D,)
else:
variances = np.ones(self.D)
self.Nparam = self.D
self.name = 'linear'
self.set_param(variances)
def get_param(self):
return self.variances
def set_param(self,x):
assert x.size==(self.Nparam)
self.variances = x
def get_param_names(self):
if self.D==1:
return ['variance']
else:
return ['variance_%i'%i for i in range(self.variances.size)]
def K(self,X,X2,target):
XX = X*np.sqrt(self.variances)
XX2 = X2*np.sqrt(self.variances)
target += np.dot(XX, XX2.T)
def Kdiag(self,X,target):
np.add(target,np.sum(self.variances*np.square(X),-1),target)
def dK_dtheta(self,X,X2,target):
"""
Computes the derivatives wrt theta
Return shape is NxMx(Ntheta)
"""
if X2 is None: X2 = X
product = X[:,None,:]*X2[None,:,:]
target += product
def dK_dX(self,X,X2,target):
if X2 is None: X2 = X
#product = X[:,None,:]*X2[None,:,:]
#scaled_product = product/self.variances2
np.add(target,X2[:,None,:]*self.variances,target)
def psi0(self,Z,mu,S,target):
expected = np.square(mu) + S
np.add(target,np.sum(self.variances*expected),target)
def dpsi0_dtheta(self,Z,mu,S,target):
expected = np.square(mu) + S
return -2.*np.sum(expected,0)
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
np.add(target_mu,2*mu*self.variances,target_mu)
np.add(target_S,self.variances,target_S)
def dpsi0_dZ(self,Z,mu,S,target):
pass
def psi1(self,Z,mu,S,target):
"""the variance, it does nothing"""
self.K(mu,Z,target)
def dpsi1_dtheta(self,Z,mu,S,target):
"""the variance, it does nothing"""
self.dK_dtheta(mu,Z,target)
def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
"""Do nothing for S, it does not affect psi1"""
np.add(target_mu,Z/self.variances2,target_mu)
def dpsi1_dZ(self,Z,mu,S,target):
self.dK_dX(mu,Z,target)
def psi2(self,Z,mu,S,target):
"""Think N,M,M,Q """
mu2_S = np.square(mu)+S# N,Q,
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
psi2 = ZZ*np.square(self.variances)*mu2_S
np.add(target, psi2.sum(-1),target) # M,M
def dpsi2_dtheta(self,Z,mu,S,target):
mu2_S = np.square(mu)+S# N,Q,
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
target += 2.*ZZ*mu2_S*self.variances
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """
mu2_S = np.sum(np.square(mu)+S,0)# Q,
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
tmp = ZZ*np.square(self.variances) # M,M,Q
np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) #N,M,M,Q
np.add(target_S, tmp, target_S) #N,M,M,Q
def dpsi2_dZ(self,Z,mu,S,target):
mu2_S = np.sum(np.square(mu)+S,0)# Q,
target += Z[:,None,:]*np.square(self.variances)*mu2_S

155
GPy/kern/rbf.py Normal file
View file

@ -0,0 +1,155 @@
from kernpart import kernpart
import numpy as np
import hashlib
class rbf(kernpart):
"""
Radial Basis Function kernel, aka squared-exponential or Gaussian kernel.
:param D: the number of input dimensions
:type D: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
.. Note: for rbf with different lengthscales on each dimension, see rbf_ARD
"""
def __init__(self,D,variance=1.,lengthscale=1.):
self.D = D
self.Nparam = 2
self.name = 'rbf'
self.set_param(np.hstack((variance,lengthscale)))
#initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3,1))
self._X, self._X2, self._params = np.empty(shape=(3,1))
def get_param(self):
return np.hstack((self.variance,self.lengthscale))
def set_param(self,x):
self.variance, self.lengthscale = x
self.lengthscale2 = np.square(self.lengthscale)
#reset cached results
self._X, self._X2, self._params = np.empty(shape=(3,1))
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
def get_param_names(self):
return ['variance','lengthscale']
def K(self,X,X2,target):
self._K_computations(X,X2)
np.add(self.variance*self._K_dvar, target,target)
def Kdiag(self,X,target):
np.add(target,self.variance,target)
def dK_dtheta(self,X,X2,target):
"""Return shape is NxMx(Ntheta)"""
self._K_computations(X,X2)
target[:,:,0] += self._K_dvar
target[:,:,1] += self._K_dvar*self.variance*self._K_dist2/self.lengthscale
def dKdiag_dtheta(self,X,target):
np.add(target[:,0],1.,target[:,0])
def dK_dX(self,X,X2,target):
self._K_computations(X,X2)
_K_dist = X[:,None,:]-X2[None,:,:]
target += np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2))
def dKdiag_dX(self,X,target):
pass
def _K_computations(self,X,X2):
if not (np.all(X==self._X) and np.all(X2==self._X2)):
self._X = X
self._X2 = X2
if X2 is None: X2 = X
XXT = np.dot(X,X2.T)
if X is X2:
self._K_dist2 = (-2.*XXT + np.diag(XXT)[:,np.newaxis] + np.diag(XXT)[np.newaxis,:])/self.lengthscale2
else:
self._K_dist2 = (-2.*XXT + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])/self.lengthscale2
self._K_exponent = -0.5*self._K_dist2
self._K_dvar = np.exp(-0.5*self._K_dist2)
if __name__=='__main__':
#run some simple tests on the kernel (TODO:move these to unititest)
#TODO: these are broken in this new structure!
N = 10
M = 5
Q = 3
Z = np.random.randn(M,Q)
mu = np.random.randn(N,Q)
S = np.random.rand(N,Q)
var = 2.5
lengthscales = np.ones(Q)*0.7
k = rbf(Q,var,lengthscales)
from checkgrad import checkgrad
def k_theta_test(param,k):
k.set_param(param)
K = k.K(Z)
dK_dtheta = k.dK_dtheta(Z)
f = np.sum(K)
df = dK_dtheta.sum(0).sum(0)
return f,np.array(df)
print "dk_dtheta_test"
checkgrad(k_theta_test,np.random.randn(1+Q),args=(k,))
def psi1_mu_test(mu,k):
mu = mu.reshape(N,Q)
f = np.sum(k.psi1(Z,mu,S))
df = k.dpsi1_dmuS(Z,mu,S)[0].sum(1)
return f,df.flatten()
print "psi1_mu_test"
checkgrad(psi1_mu_test,np.random.randn(N*Q),args=(k,))
def psi1_S_test(S,k):
S = S.reshape(N,Q)
f = np.sum(k.psi1(Z,mu,S))
df = k.dpsi1_dmuS(Z,mu,S)[1].sum(1)
return f,df.flatten()
print "psi1_S_test"
checkgrad(psi1_S_test,np.random.rand(N*Q),args=(k,))
def psi1_theta_test(theta,k):
k.set_param(theta)
f = np.sum(k.psi1(Z,mu,S))
df = np.array([np.sum(grad) for grad in k.dpsi1_dtheta(Z,mu,S)])
return f,df
print "psi1_theta_test"
checkgrad(psi1_theta_test,np.random.rand(1+Q),args=(k,))
def psi2_mu_test(mu,k):
mu = mu.reshape(N,Q)
f = np.sum(k.psi2(Z,mu,S))
df = k.dpsi2_dmuS(Z,mu,S)[0].sum(1).sum(1)
return f,df.flatten()
print "psi2_mu_test"
checkgrad(psi2_mu_test,np.random.randn(N*Q),args=(k,))
def psi2_S_test(S,k):
S = S.reshape(N,Q)
f = np.sum(k.psi2(Z,mu,S))
df = k.dpsi2_dmuS(Z,mu,S)[1].sum(1).sum(1)
return f,df.flatten()
print "psi2_S_test"
checkgrad(psi2_S_test,np.random.rand(N*Q),args=(k,))
def psi2_theta_test(theta,k):
k.set_param(theta)
f = np.sum(k.psi2(Z,mu,S))
df = np.array([np.sum(grad) for grad in k.dpsi2_dtheta(Z,mu,S)])
return f,df
print "psi2_theta_test"
checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,))

234
GPy/kern/rbf_ARD.py Normal file
View file

@ -0,0 +1,234 @@
from kernpart import kernpart
import numpy as np
import hashlib
class rbf_ARD(kernpart):
def __init__(self,D,variance=1.,lengthscales=None):
"""
Arguments
----------
D: int - the number of input dimensions
variance: float
lengthscales : np.ndarray of shape (D,)
"""
self.D = D
if lengthscales is not None:
assert lengthscales.shape==(self.D,)
else:
lengthscales = np.ones(self.D)
self.Nparam = self.D + 1
self.name = 'rbf_ARD'
self.set_param(np.hstack((variance,lengthscales)))
#initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3,1))
self._X, self._X2, self._params = np.empty(shape=(3,1))
def get_param(self):
return np.hstack((self.variance,self.lengthscales))
def set_param(self,x):
assert x.size==(self.D+1)
self.variance = x[0]
self.lengthscales = x[1:]
self.lengthscales2 = np.square(self.lengthscales)
#reset cached results
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
def get_param_names(self):
if self.D==1:
return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
def K(self,X,X2,target):
self._K_computations(X,X2)
np.add(self.variance*self._K_dvar, target,target)
def Kdiag(self,X,target):
np.add(target,self.variance,target)
def dK_dtheta(self,X,X2,target):
"""Return shape is NxMx(Ntheta)"""
self._K_computations(X,X2)
dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscales
np.add(target[:,:,0],self._K_dvar, target[:,:,0])
np.add(target[:,:,1:],dl, target[:,:,1:])
def dKdiag_dtheta(self,X,target):
np.add(target[:,0],1.,target[:,0])
def dK_dX(self,X,X2,target):
self._K_computations(X,X2)
dZ = self.variance*self._K_dvar[:,:,None]*self._K_dist/self.lengthscales2
np.add(target,-dZ.transpose(1,0,2),target)
def psi0(self,Z,mu,S,target):
np.add(target, self.variance, target)
def dpsi0_dtheta(self,Z,mu,S,target):
target[:,0] += 1.
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
pass
def psi1(self,Z,mu,S,target):
"""Think N,M,Q """
self._psi_computations(Z,mu,S)
np.add(target, self._psi1,target)
def dpsi1_dtheta(self,Z,mu,S,target):
"""N,Q,(Ntheta)"""
self._psi_computations(Z,mu,S)
denom_deriv = S[:,None,:]/(self.lengthscales**3+self.lengthscales*S[:,None,:])
d_length = self._psi1[:,:,None]*(self.lengthscales*np.square(self._psi1_dist/(self.lengthscales2+S[:,None,:])) + denom_deriv)
target[:,:,0] += self._psi1/self.variance
target[:,:,1:] += d_length
def dpsi1_dZ(self,Z,mu,S,target):
self._psi_computations(Z,mu,S)
np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target)
def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
"""return shapes are N,M,Q"""
self._psi_computations(Z,mu,S)
tmp = self._psi1[:,:,None]/self.lengthscales2/self._psi1_denom
np.add(target_mu,tmp*self._psi1_dist,target_mu)
np.add(target_S, 0.5*tmp*(self._psi1_dist_sq-1), target_S)
def psi2(self,Z,mu,S,target):
"""shape N,M,M"""
self._psi_computations(Z,mu,S)
np.add(target, self._psi2,target)
def dpsi2_dtheta(self,Z,mu,S,target):
"""Shape N,M,M,Ntheta"""
self._psi_computations(Z,mu,S)
d_var = np.sum(2.*self._psi2/self.variance,0)
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscales2)/(self.lengthscales*self._psi2_denom)
d_length = d_length.sum(0)
target[:,:,0] += d_var
target[:,:,1:] += d_length
def dpsi2_dZ(self,Z,mu,S,target):
"""Returns shape N,M,M,Q"""
self._psi_computations(Z,mu,S)
dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
target += dZ
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """
self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscales2/self._psi2_denom
np.add(target_mu, -tmp*(2.*self._psi2_mudist),target_mu) #N,M,M,Q
np.add(target_S, tmp*(2.*self._psi2_mudist_sq-1), target_S) #N,M,M,Q
def _K_computations(self,X,X2):
if not (np.all(X==self._X) and np.all(X2==self._X2)):
self._X = X
self._X2 = X2
if X2 is None: X2 = X
self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
self._params = np.empty(shape=(1,0))#ensure the next section gets called
if not np.all(self._params == self.get_param()):
self._params == self.get_param()
self._K_dist2 = np.square(self._K_dist/self.lengthscales)
self._K_exponent = -0.5*self._K_dist2.sum(-1)
self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1))
def _psi_computations(self,Z,mu,S):
#here are the "statistics" for psi1 and psi2
if not np.all(Z==self._Z):
#Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
self._psi2_Zdist = Z[:,None,:]-Z[None,:,:] # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist)/self.lengthscales2 # M,M,Q
self._Z = Z
if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
#something's changed. recompute EVERYTHING
#psi1
self._psi1_denom = S[:,None,:]/self.lengthscales2 + 1.
self._psi1_dist = Z[None,:,:]-mu[:,None,:]
self._psi1_dist_sq = np.square(self._psi1_dist)/self.lengthscales2/self._psi1_denom
self._psi1_exponent = -0.5*np.sum(self._psi1_dist_sq+np.log(self._psi1_denom),-1)
self._psi1 = self.variance*np.exp(self._psi1_exponent)
#psi2
self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscales2+1. # N,M,M,Q
self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscales2*self._psi2_denom)
self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
self._Z, self._mu, self._S = Z, mu,S
if __name__=='__main__':
#run some simple tests on the kernel (TODO:move these to unititest)
#TODO: these are broken in this new structure!
N = 10
M = 5
Q = 3
Z = np.random.randn(M,Q)
mu = np.random.randn(N,Q)
S = np.random.rand(N,Q)
var = 2.5
lengthscales = np.ones(Q)*0.7
k = rbf(Q,var,lengthscales)
from checkgrad import checkgrad
def k_theta_test(param,k):
k.set_param(param)
K = k.K(Z)
dK_dtheta = k.dK_dtheta(Z)
f = np.sum(K)
df = dK_dtheta.sum(0).sum(0)
return f,np.array(df)
print "dk_dtheta_test"
checkgrad(k_theta_test,np.random.randn(1+Q),args=(k,))
def psi1_mu_test(mu,k):
mu = mu.reshape(N,Q)
f = np.sum(k.psi1(Z,mu,S))
df = k.dpsi1_dmuS(Z,mu,S)[0].sum(1)
return f,df.flatten()
print "psi1_mu_test"
checkgrad(psi1_mu_test,np.random.randn(N*Q),args=(k,))
def psi1_S_test(S,k):
S = S.reshape(N,Q)
f = np.sum(k.psi1(Z,mu,S))
df = k.dpsi1_dmuS(Z,mu,S)[1].sum(1)
return f,df.flatten()
print "psi1_S_test"
checkgrad(psi1_S_test,np.random.rand(N*Q),args=(k,))
def psi1_theta_test(theta,k):
k.set_param(theta)
f = np.sum(k.psi1(Z,mu,S))
df = np.array([np.sum(grad) for grad in k.dpsi1_dtheta(Z,mu,S)])
return f,df
print "psi1_theta_test"
checkgrad(psi1_theta_test,np.random.rand(1+Q),args=(k,))
def psi2_mu_test(mu,k):
mu = mu.reshape(N,Q)
f = np.sum(k.psi2(Z,mu,S))
df = k.dpsi2_dmuS(Z,mu,S)[0].sum(1).sum(1)
return f,df.flatten()
print "psi2_mu_test"
checkgrad(psi2_mu_test,np.random.randn(N*Q),args=(k,))
def psi2_S_test(S,k):
S = S.reshape(N,Q)
f = np.sum(k.psi2(Z,mu,S))
df = k.dpsi2_dmuS(Z,mu,S)[1].sum(1).sum(1)
return f,df.flatten()
print "psi2_S_test"
checkgrad(psi2_S_test,np.random.rand(N*Q),args=(k,))
def psi2_theta_test(theta,k):
k.set_param(theta)
f = np.sum(k.psi2(Z,mu,S))
df = np.array([np.sum(grad) for grad in k.dpsi2_dtheta(Z,mu,S)])
return f,df
print "psi2_theta_test"
checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,))

54
GPy/kern/spline.py Normal file
View file

@ -0,0 +1,54 @@
from kernpart import kernpart
import numpy as np
import hashlib
def theta(x):
"""Heavisdie step function"""
return np.where(x>=0.,1.,0.)
class spline(kernpart):
"""
Spline kernel
:param D: the number of input dimensions (fixed to 1 right now TODO)
:type D: int
:param variance: the variance of the kernel
:type variance: float
"""
def __init__(self,D,variance=1.,lengthscale=1.):
self.D = D
assert self.D==1
self.Nparam = 1
self.name = 'spline'
self.set_param(np.squeeze(variance))
def get_param(self):
return self.variance
def set_param(self,x):
self.variance = x
def get_param_names(self):
return ['variance']
def K(self,X,X2,target):
assert np.all(X>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
assert np.all(X2>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
t = X
s = X2.T
s_t = s-t # broadcasted subtraction
target += self.variance*(0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.)
def Kdiag(self,X,target):
target += self.variance*X.flatten()**3/3.
def dK_dtheta(self,X,X2,target):
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
def dKdiag_dtheta(self,X,target):
target += X.flatten()**3/3.
def dKdiag_dX(self,X,target):
target += self.variance*X**2

82
GPy/kern/white.py Normal file
View file

@ -0,0 +1,82 @@
from kernpart import kernpart
import numpy as np
class white(kernpart):
"""
White noise kernel.
:param D: the number of input dimensions
:type D: int
:param variance:
:type variance: float
"""
def __init__(self,D,variance=1.):
self.D = D
self.Nparam = 1.
self.name = 'white'
self.set_param(np.array([variance]).flatten())
def get_param(self):
return self.variance
def set_param(self,x):
assert x.shape==(1,)
self.variance = x
def get_param_names(self):
return ['variance']
def K(self,X,X2,target):
if X.shape==X2.shape:
if np.all(X==X2):
np.add(target,np.eye(X.shape[0])*self.variance,target)
def Kdiag(self,X,target):
target += self.variance
def dK_dtheta(self,X,X2,target):
if X.shape==X2.shape:
if np.all(X==X2):
np.add(target[:,:,0],np.eye(X.shape[0]),target[:,:,0])
def dKdiag_dtheta(self,X,target):
np.add(target[:,0],1.,target[:,0])
def dK_dX(self,X,X2,target):
pass
def dKdiag_dX(self,X,target):
pass
def psi0(self,Z,mu,S,target):
target += self.variance
def dpsi0_dtheta(self,Z,mu,S,target):
target += 1.
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
pass
def psi1(self,Z,mu,S,target):
pass
def dpsi1_dtheta(self,Z,mu,S,target):
pass
def dpsi1_dZ(self,Z,mu,S,target):
pass
def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
pass
def psi2(self,Z,mu,S,target):
pass
def dpsi2_dZ(self,Z,mu,S,target):
pass
def dpsi2_dtheta(self,Z,mu,S,target):
pass
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
pass