mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-11 21:12:38 +02:00
kerns
This commit is contained in:
parent
e7a9a6a2fa
commit
63321e8409
18 changed files with 1861 additions and 0 deletions
6
GPy/__init__.py
Normal file
6
GPy/__init__.py
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
import kern
|
||||
import models
|
||||
import inference
|
||||
import util
|
||||
import examples
|
||||
from core import priors
|
||||
56
GPy/kern/Brownian.py
Normal file
56
GPy/kern/Brownian.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
|
||||
def theta(x):
|
||||
"""Heavisdie step function"""
|
||||
return np.where(x>=0.,1.,0.)
|
||||
|
||||
class Brownian(kernpart):
|
||||
"""
|
||||
Brownian Motion kernel.
|
||||
|
||||
:param D: the number of input dimensions
|
||||
:type D: int
|
||||
:param variance:
|
||||
:type variance: float
|
||||
"""
|
||||
def __init__(self,D,variance=1.):
|
||||
self.D = D
|
||||
assert self.D==1, "Brownian motion in 1D only"
|
||||
self.Nparam = 1.
|
||||
self.name = 'Brownian'
|
||||
self.set_param(np.array([variance]).flatten())
|
||||
|
||||
def get_param(self):
|
||||
return self.variance
|
||||
|
||||
def set_param(self,x):
|
||||
assert x.shape==(1,)
|
||||
self.variance = x
|
||||
|
||||
def get_param_names(self):
|
||||
return ['variance']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
target += self.variance*np.fmin(X,X2.T)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
target += self.variance*X.flatten()
|
||||
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
target += np.fmin(X,X2.T)
|
||||
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
target += X.flatten()
|
||||
|
||||
def dK_dX(self,X,X2,target):
|
||||
target += self.variance
|
||||
target -= self.variance*theta(X-X2.T)
|
||||
if X.shape==X2.shape:
|
||||
if np.all(X==X2):
|
||||
np.add(target[:,:,0],self.variance*np.diag(X2.flatten()-X.flatten()),target[:,:,0])
|
||||
|
||||
|
||||
def dKdiag_dX(self,X,target):
|
||||
target += self.variance
|
||||
|
||||
86
GPy/kern/DelayedDecorator.py
Normal file
86
GPy/kern/DelayedDecorator.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
from new import instancemethod
|
||||
#this is here so that we can have nice syntatic sugar when taking gradients
|
||||
# See the transform_gradients decorator in kern
|
||||
|
||||
class DelayedDecorator(object):
|
||||
"""Wrapper that delays decorating a function until it is invoked.
|
||||
|
||||
This class allows a decorator to be used with both ordinary functions and
|
||||
methods of classes. It wraps the function passed to it with the decorator
|
||||
passed to it, but with some special handling:
|
||||
|
||||
- If the wrapped function is an ordinary function, it will be decorated
|
||||
the first time it is called.
|
||||
|
||||
- If the wrapped function is a method of a class, it will be decorated
|
||||
separately the first time it is called on each instance of the class.
|
||||
It will also be decorated separately the first time it is called as
|
||||
an unbound method of the class itself (though this use case should
|
||||
be rare).
|
||||
"""
|
||||
|
||||
def __init__(self, deco, func):
|
||||
# The base decorated function (which may be modified, see below)
|
||||
self._func = func
|
||||
# The decorator that will be applied
|
||||
self._deco = deco
|
||||
# Variable to monitor calling as an ordinary function
|
||||
self.__decofunc = None
|
||||
# Variable to monitor calling as an unbound method
|
||||
self.__clsfunc = None
|
||||
|
||||
def _decorated(self, cls=None, instance=None):
|
||||
"""Return the decorated function.
|
||||
|
||||
This method is for internal use only; it can be implemented by
|
||||
subclasses to modify the actual decorated function before it is
|
||||
returned. The ``cls`` and ``instance`` parameters are supplied so
|
||||
this method can tell how it was invoked. If it is not overridden,
|
||||
the base function stored when this class was instantiated will
|
||||
be decorated by the decorator passed when this class was instantiated,
|
||||
and then returned.
|
||||
|
||||
Note that factoring out this method, in addition to allowing
|
||||
subclasses to modify the decorated function, ensures that the
|
||||
right thing is done automatically when the decorated function
|
||||
itself is a higher-order function (e.g., a generator function).
|
||||
Since this method is called every time the decorated function
|
||||
is accessed, a new instance of whatever it returns will be
|
||||
created (e.g., a new generator will be realized), which is
|
||||
exactly the expected semantics.
|
||||
"""
|
||||
return self._deco(self._func)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""Direct function call syntax support.
|
||||
|
||||
This makes an instance of this class work just like the underlying
|
||||
decorated function when called directly as an ordinary function.
|
||||
An internal reference to the decorated function is stored so that
|
||||
future direct calls will get the stored function.
|
||||
"""
|
||||
if not self.__decofunc:
|
||||
self.__decofunc = self._decorated()
|
||||
return self.__decofunc(*args, **kwargs)
|
||||
|
||||
def __get__(self, instance, cls):
|
||||
"""Descriptor protocol support.
|
||||
|
||||
This makes an instance of this class function correctly when it
|
||||
is used to decorate a method on a user-defined class. If called
|
||||
as a bound method, we store the decorated function in the instance
|
||||
dictionary, so we will not be called again for that instance. If
|
||||
called as an unbound method, we store a reference to the decorated
|
||||
function internally and use it on future unbound method calls.
|
||||
"""
|
||||
if instance:
|
||||
deco = instancemethod(self._decorated(cls, instance), instance, cls)
|
||||
# This prevents us from being called again for this instance
|
||||
setattr(instance, self._func.__name__, deco)
|
||||
elif cls:
|
||||
if not self.__clsfunc:
|
||||
self.__clsfunc = instancemethod(self._decorated(cls), None, cls)
|
||||
deco = self.__clsfunc
|
||||
else:
|
||||
raise ValueError("Must supply instance or class to descriptor.")
|
||||
return deco
|
||||
107
GPy/kern/Matern32.py
Normal file
107
GPy/kern/Matern32.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
from ..util.linalg import pdinv,mdot
|
||||
from scipy import integrate
|
||||
|
||||
class Matern32(kernpart):
|
||||
"""
|
||||
Matern 3/2 kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
|
||||
:param D: the number of input dimensions
|
||||
:type D: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type variance: float
|
||||
:param lengthscale: the lengthscales :math:`\ell_i`
|
||||
:type lengthscale: np.ndarray of size (D,)
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,D,variance=1.,lengthscales=None):
|
||||
self.D = D
|
||||
if lengthscales is not None:
|
||||
assert lengthscales.shape==(self.D,)
|
||||
else:
|
||||
lengthscales = np.ones(self.D)
|
||||
self.Nparam = self.D + 1
|
||||
self.name = 'Mat32'
|
||||
self.set_param(np.hstack((variance,lengthscales)))
|
||||
|
||||
def get_param(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.variance,self.lengthscales))
|
||||
def set_param(self,x):
|
||||
"""set the value of the parameters."""
|
||||
assert x.size==(self.D+1)
|
||||
self.variance = x[0]
|
||||
self.lengthscales = x[1:]
|
||||
def get_param_names(self):
|
||||
"""return parameter names."""
|
||||
if self.D==1:
|
||||
return ['variance','lengthscale']
|
||||
else:
|
||||
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
|
||||
|
||||
def K(self,X,X2,target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
|
||||
np.add(self.variance*(1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist), target,target)
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
np.add(target,self.variance,target)
|
||||
|
||||
def Gram_matrix(self,F,F1,F2,lower,upper):
|
||||
"""
|
||||
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1.
|
||||
|
||||
:param F: vector of functions
|
||||
:type F: np.array
|
||||
:param F1: vector of derivatives of F
|
||||
:type F1: np.array
|
||||
:param F2: vector of second derivatives of F
|
||||
:type F2: np.array
|
||||
:param lower,upper: boundaries of the input domain
|
||||
:type lower,upper: floats
|
||||
"""
|
||||
assert self.D == 1
|
||||
def L(x,i):
|
||||
return(3./self.lengthscales**2*F[i](x) + 2*np.sqrt(3)/self.lengthscales*F1[i](x) + F2[i](x))
|
||||
n = F.shape[0]
|
||||
G = np.zeros((n,n))
|
||||
for i in range(n):
|
||||
for j in range(i,n):
|
||||
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
|
||||
Flower = np.array([f(lower) for f in F])[:,None]
|
||||
F1lower = np.array([f(lower) for f in F1])[:,None]
|
||||
#print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
|
||||
#return(G)
|
||||
return(self.lengthscales**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscales**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
|
||||
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
"""derivative of the cross-covariance matrix with respect to the parameters (shape is NxMxNparam)"""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
|
||||
dvar = (1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist)
|
||||
invdist = 1./np.where(dist!=0.,dist,np.inf)
|
||||
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
|
||||
dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
|
||||
np.add(target[:,:,0],dvar, target[:,:,0])
|
||||
np.add(target[:,:,1:],dl, target[:,:,1:])
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters (shape is NxNparam)"""
|
||||
np.add(target[:,0],1.,target[:,0])
|
||||
def dK_dX(self,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X (*! shape is NxMxD !*)."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
|
||||
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
|
||||
target += - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2))
|
||||
def dKdiag_dX(self,X,target):
|
||||
pass
|
||||
|
||||
112
GPy/kern/Matern52.py
Normal file
112
GPy/kern/Matern52.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
from scipy import integrate
|
||||
|
||||
class Matern52(kernpart):
|
||||
"""
|
||||
Matern 5/2 kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
|
||||
:param D: the number of input dimensions
|
||||
:type D: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type variance: float
|
||||
:param lengthscale: the lengthscales :math:`\ell_i`
|
||||
:type lengthscale: np.ndarray of size (D,)
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
def __init__(self,D,variance=1.,lengthscales=None):
|
||||
self.D = D
|
||||
if lengthscales is not None:
|
||||
assert lengthscales.shape==(self.D,)
|
||||
else:
|
||||
lengthscales = np.ones(self.D)
|
||||
self.Nparam = self.D + 1
|
||||
self.name = 'Mat52'
|
||||
self.set_param(np.hstack((variance,lengthscales)))
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
|
||||
|
||||
def get_param(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.variance,self.lengthscales))
|
||||
def set_param(self,x):
|
||||
"""set the value of the parameters."""
|
||||
assert x.size==(self.D+1)
|
||||
self.variance = x[0]
|
||||
self.lengthscales = x[1:]
|
||||
self.lengthscales2 = np.square(self.lengthscales)
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
|
||||
def get_param_names(self):
|
||||
"""return parameter names."""
|
||||
if self.D==1:
|
||||
return ['variance','lengthscale']
|
||||
else:
|
||||
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
|
||||
def K(self,X,X2,target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
|
||||
np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target)
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
np.add(target,self.variance,target)
|
||||
|
||||
def Gram_matrix(self,F,F1,F2,F3,lower,upper):
|
||||
"""
|
||||
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1.
|
||||
|
||||
:param F: vector of functions
|
||||
:type F: np.array
|
||||
:param F1: vector of derivatives of F
|
||||
:type F1: np.array
|
||||
:param F2: vector of second derivatives of F
|
||||
:type F2: np.array
|
||||
:param F3: vector of third derivatives of F
|
||||
:type F3: np.array
|
||||
:param lower,upper: boundaries of the input domain
|
||||
:type lower,upper: floats
|
||||
"""
|
||||
assert self.D == 1
|
||||
def L(x,i):
|
||||
return(5*np.sqrt(5)/self.lengthscales**3*F[i](x) + 15./self.lengthscales**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscales*F2[i](x) + F3[i](x))
|
||||
n = F.shape[0]
|
||||
G = np.zeros((n,n))
|
||||
for i in range(n):
|
||||
for j in range(i,n):
|
||||
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
|
||||
G_coef = 3.*self.lengthscales**5/(400*np.sqrt(5))
|
||||
Flower = np.array([f(lower) for f in F])[:,None]
|
||||
F1lower = np.array([f(lower) for f in F1])[:,None]
|
||||
F2lower = np.array([f(lower) for f in F2])[:,None]
|
||||
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscales**4/200*np.dot(F2lower,F2lower.T)
|
||||
orig2 = 3./5*self.lengthscales**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
|
||||
return(1./self.variance* (G_coef*G + orig + orig2))
|
||||
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
"""derivative of the cross-covariance matrix with respect to the parameters (shape is NxMxNparam)"""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
|
||||
invdist = 1./np.where(dist!=0.,dist,np.inf)
|
||||
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
|
||||
dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
|
||||
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
|
||||
np.add(target[:,:,0],dvar, target[:,:,0])
|
||||
np.add(target[:,:,1:],dl, target[:,:,1:])
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters (shape is NxNparam)"""
|
||||
np.add(target[:,0],1.,target[:,0])
|
||||
def dK_dX(self,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X (*! shape is NxMxD !*)."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
|
||||
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
|
||||
target += - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
|
||||
def dKdiag_dX(self,X,target):
|
||||
pass
|
||||
|
||||
|
||||
2
GPy/kern/__init__.py
Normal file
2
GPy/kern/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD
|
||||
from kern import kern
|
||||
47
GPy/kern/bias.py
Normal file
47
GPy/kern/bias.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
|
||||
class bias(kernpart):
|
||||
def __init__(self,D,variance=1.):
|
||||
"""
|
||||
Arguments
|
||||
----------
|
||||
D: int - the number of input dimensions
|
||||
variance: float
|
||||
"""
|
||||
self.D = D
|
||||
self.Nparam = 1
|
||||
self.name = 'bias'
|
||||
self.set_param(np.array([variance]).flatten())
|
||||
|
||||
def get_param(self):
|
||||
return self.variance
|
||||
|
||||
def set_param(self,x):
|
||||
assert x.shape==(1,)
|
||||
self.variance = x
|
||||
|
||||
def get_param_names(self):
|
||||
return ['variance']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
if X2 is None: X2 = X
|
||||
np.add(self.variance, target,target)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
np.add(target,self.variance,target)
|
||||
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
"""Return shape is NxMx(Ntheta)"""
|
||||
if X2 is None: X2 = X
|
||||
np.add(target[:,:,0],1., target[:,:,0])
|
||||
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
np.add(target[:,0],1.,target[:,0])
|
||||
|
||||
def dK_dX(self, X, X2, target):
|
||||
pass
|
||||
|
||||
def dKdiag_dX(self,X,target):
|
||||
pass
|
||||
168
GPy/kern/constructors.py
Normal file
168
GPy/kern/constructors.py
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
import numpy as np
|
||||
from kern import kern
|
||||
|
||||
from rbf import rbf as rbfpart
|
||||
from rbf_ARD import rbf_ARD as rbf_ARD_part
|
||||
from white import white as whitepart
|
||||
from linear import linear as linearpart
|
||||
from linear_ARD import linear_ARD as linear_ARD_part
|
||||
from exponential import exponential as exponentialpart
|
||||
from Matern32 import Matern32 as Matern32part
|
||||
from Matern52 import Matern52 as Matern52part
|
||||
from bias import bias as biaspart
|
||||
from finite_dimensional import finite_dimensional as finite_dimensionalpart
|
||||
from spline import spline as splinepart
|
||||
from Brownian import Brownian as Brownianpart
|
||||
|
||||
#TODO these s=constructors are not as clean as we'd like. Tidy the code up
|
||||
#using meta-classes to make the objects construct properly wthout them.
|
||||
|
||||
def rbf(D,variance=1., lengthscale=1.):
|
||||
"""
|
||||
Construct an RBF kernel
|
||||
|
||||
:param D: dimensionality of the kernel, obligatory
|
||||
:type D: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
:param lengthscale: the lengthscale of the kernel
|
||||
:type lengthscale: float
|
||||
"""
|
||||
part = rbfpart(D,variance,lengthscale)
|
||||
return kern(D, [part])
|
||||
|
||||
def rbf_ARD(D,variance=1., lengthscales=None):
|
||||
"""
|
||||
Construct an RBF kernel with Automatic Relevance Determination (ARD)
|
||||
|
||||
:param D: dimensionality of the kernel, obligatory
|
||||
:type D: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
:param lengthscales: the lengthscales of the kernel
|
||||
:type lengthscales: None|np.ndarray
|
||||
"""
|
||||
part = rbf_ARD_part(D,variance,lengthscales)
|
||||
return kern(D, [part])
|
||||
|
||||
def linear(D,lengthscales=None):
|
||||
"""
|
||||
Construct a linear kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
D (int), obligatory
|
||||
lengthscales (np.ndarray)
|
||||
"""
|
||||
part = linearpart(D,lengthscales)
|
||||
return kern(D, [part])
|
||||
|
||||
def linear_ARD(D,lengthscales=None):
|
||||
"""
|
||||
Construct a linear ARD kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
D (int), obligatory
|
||||
lengthscales (np.ndarray)
|
||||
"""
|
||||
part = linear_ARD_part(D,lengthscales)
|
||||
return kern(D, [part])
|
||||
|
||||
def white(D,variance=1.):
|
||||
"""
|
||||
Construct a white kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
D (int), obligatory
|
||||
variance (float)
|
||||
"""
|
||||
part = whitepart(D,variance)
|
||||
return kern(D, [part])
|
||||
|
||||
def exponential(D,variance=1., lengthscales=None):
|
||||
"""
|
||||
Construct a exponential kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
D (int), obligatory
|
||||
variance (float)
|
||||
lengthscales (np.ndarray)
|
||||
"""
|
||||
part = exponentialpart(D,variance, lengthscales)
|
||||
return kern(D, [part])
|
||||
|
||||
def Matern32(D,variance=1., lengthscales=None):
|
||||
"""
|
||||
Construct a Matern 3/2 kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
D (int), obligatory
|
||||
variance (float)
|
||||
lengthscales (np.ndarray)
|
||||
"""
|
||||
part = Matern32part(D,variance, lengthscales)
|
||||
return kern(D, [part])
|
||||
|
||||
def Matern52(D,variance=1., lengthscales=None):
|
||||
"""
|
||||
Construct a Matern 5/2 kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
D (int), obligatory
|
||||
variance (float)
|
||||
lengthscales (np.ndarray)
|
||||
"""
|
||||
part = Matern52part(D,variance, lengthscales)
|
||||
return kern(D, [part])
|
||||
|
||||
def bias(D,variance=1.):
|
||||
"""
|
||||
Construct a bias kernel.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
D (int), obligatory
|
||||
variance (float)
|
||||
"""
|
||||
part = biaspart(D,variance)
|
||||
return kern(D, [part])
|
||||
|
||||
def finite_dimensional(D,F,G,variances=1.,weights=None):
|
||||
"""
|
||||
Construct a finite dimensional kernel.
|
||||
D: int - the number of input dimensions
|
||||
F: np.array of functions with shape (n,) - the n basis functions
|
||||
G: np.array with shape (n,n) - the Gram matrix associated to F
|
||||
variances : np.ndarray with shape (n,)
|
||||
"""
|
||||
part = finite_dimensionalpart(D,F,G,variances,weights)
|
||||
return kern(D, [part])
|
||||
|
||||
def spline(D,variance=1.):
|
||||
"""
|
||||
Construct a spline kernel.
|
||||
|
||||
:param D: Dimensionality of the kernel
|
||||
:type D: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
"""
|
||||
part = splinepart(D,variance)
|
||||
return kern(D, [part])
|
||||
|
||||
def Brownian(D,variance=1.):
|
||||
"""
|
||||
Construct a Brownian motion kernel.
|
||||
|
||||
:param D: Dimensionality of the kernel
|
||||
:type D: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
"""
|
||||
part = Brownianpart(D,variance)
|
||||
return kern(D, [part])
|
||||
104
GPy/kern/exponential.py
Normal file
104
GPy/kern/exponential.py
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
from scipy import integrate
|
||||
|
||||
class exponential(kernpart):
|
||||
"""
|
||||
Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2)
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp(- r) \qquad \qquad \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
|
||||
:param D: the number of input dimensions
|
||||
:type D: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type variance: float
|
||||
:param lengthscale: the lengthscales :math:`\ell_i`
|
||||
:type lengthscale: np.ndarray of size (D,)
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
def __init__(self,D,variance=1.,lengthscales=None):
|
||||
self.D = D
|
||||
if lengthscales is not None:
|
||||
assert lengthscales.shape==(self.D,)
|
||||
else:
|
||||
lengthscales = np.ones(self.D)
|
||||
self.Nparam = self.D + 1
|
||||
self.name = 'exp'
|
||||
self.set_param(np.hstack((variance,lengthscales)))
|
||||
|
||||
def get_param(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.variance,self.lengthscales))
|
||||
def set_param(self,x):
|
||||
"""set the value of the parameters."""
|
||||
assert x.size==(self.D+1)
|
||||
self.variance = x[0]
|
||||
self.lengthscales = x[1:]
|
||||
def get_param_names(self):
|
||||
"""return parameter names."""
|
||||
if self.D==1:
|
||||
return ['variance','lengthscale']
|
||||
else:
|
||||
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
|
||||
def K(self,X,X2,target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
|
||||
np.add(self.variance*np.exp(-dist), target,target)
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
np.add(target,self.variance,target)
|
||||
|
||||
def Gram_matrix(self,F,F1,lower,upper):
|
||||
"""
|
||||
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1.
|
||||
|
||||
:param F: vector of functions
|
||||
:type F: np.array
|
||||
:param F1: vector of derivatives of F
|
||||
:type F1: np.array
|
||||
:param lower,upper: boundaries of the input domain
|
||||
:type lower,upper: floats
|
||||
"""
|
||||
assert self.D == 1
|
||||
def L(x,i):
|
||||
return(1./self.lengthscales*F[i](x) + F1[i](x))
|
||||
n = F.shape[0]
|
||||
G = np.zeros((n,n))
|
||||
for i in range(n):
|
||||
for j in range(i,n):
|
||||
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
|
||||
Flower = np.array([f(lower) for f in F])[:,None]
|
||||
return(self.lengthscales/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T))
|
||||
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
"""derivative of the cross-covariance matrix with respect to the parameters (shape is NxMxNparam)"""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
|
||||
invdist = 1./np.where(dist!=0.,dist,np.inf)
|
||||
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
|
||||
dvar = np.exp(-dist)
|
||||
dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,np.newaxis]
|
||||
np.add(target[:,:,0],dvar, target[:,:,0])
|
||||
np.add(target[:,:,1:],dl, target[:,:,1:])
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters (shape is NxNparam)"""
|
||||
np.add(target[:,0],1.,target[:,0])
|
||||
def dK_dX(self,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X (*! shape is NxMxD !*)."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
|
||||
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
|
||||
target += - np.transpose(self.variance*np.exp(-dist)*ddist_dX,(1,0,2))
|
||||
def dKdiag_dX(self,X,target):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
70
GPy/kern/finite_dimensional.py
Normal file
70
GPy/kern/finite_dimensional.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
from ..util.linalg import pdinv,mdot
|
||||
|
||||
class finite_dimensional(kernpart):
|
||||
def __init__(self, D, F, G, variance=1., weights=None):
|
||||
"""
|
||||
Argumnents
|
||||
----------
|
||||
D: int - the number of input dimensions
|
||||
F: np.array of functions with shape (n,) - the n basis functions
|
||||
G: np.array with shape (n,n) - the Gram matrix associated to F
|
||||
weights : np.ndarray with shape (n,)
|
||||
"""
|
||||
self.D = D
|
||||
self.F = F
|
||||
self.G = G
|
||||
self.G_1 ,tmp = pdinv(G)
|
||||
self.n = F.shape[0]
|
||||
if weights is not None:
|
||||
assert weights.shape==(self.n,)
|
||||
else:
|
||||
weights = np.ones(self.n)
|
||||
self.Nparam = self.n + 1
|
||||
self.name = 'finite_dim'
|
||||
self.set_param(np.hstack((variance,weights)))
|
||||
|
||||
def get_param(self):
|
||||
return np.hstack((self.variance,self.weights))
|
||||
def set_param(self,x):
|
||||
assert x.size == (self.Nparam)
|
||||
self.variance = x[0]
|
||||
self.weights = x[1:]
|
||||
def get_param_names(self):
|
||||
if self.n==1:
|
||||
return ['variance','weight']
|
||||
else:
|
||||
return ['variance']+['weight_%i'%i for i in range(self.weights.size)]
|
||||
|
||||
def K(self,X,X2,target):
|
||||
if X2 is None: X2 = X
|
||||
FX = np.column_stack([f(X) for f in self.F])
|
||||
FX2 = np.column_stack([f(X2) for f in self.F])
|
||||
product = self.variance * mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
|
||||
np.add(product,target,target)
|
||||
def Kdiag(self,X,target):
|
||||
product = np.diag(self.K(X,X2))
|
||||
np.add(target,product,target)
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
"""Return shape is NxMx(Ntheta)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = np.column_stack([f(X) for f in self.F])
|
||||
FX2 = np.column_stack([f(X2) for f in self.F])
|
||||
DER = np.zeros((self.n,self.n,self.n))
|
||||
for i in range(self.n):
|
||||
DER[i,i,i] = np.sqrt(self.weights[i])
|
||||
dw = self.variance * mdot(FX,DER,self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
|
||||
dv = mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
|
||||
np.add(target[:,:,0],np.transpose(dv,(0,2,1)), target[:,:,0])
|
||||
np.add(target[:,:,1:],np.transpose(dw,(0,2,1)), target[:,:,1:])
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
np.add(target[:,0],1.,target[:,0])
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
308
GPy/kern/kern.py
Normal file
308
GPy/kern/kern.py
Normal file
|
|
@ -0,0 +1,308 @@
|
|||
import numpy as np
|
||||
from ..core.parameterised import parameterised
|
||||
from DelayedDecorator import DelayedDecorator
|
||||
from functools import partial
|
||||
from kernpart import kernpart
|
||||
|
||||
class transform_gradients_(object):
|
||||
"""
|
||||
A decorator for gradient transformation. Accounts for constraining and tying.
|
||||
|
||||
NB: this uses the Delayed_decorator class so allow it to decorate bound methods
|
||||
"""
|
||||
def __init__(self,f):
|
||||
self.f = f
|
||||
def __call__(self,*args,**kwargs):
|
||||
kern = args[0]
|
||||
x = kern.get_param()
|
||||
g = self.f(*args,**kwargs)
|
||||
|
||||
#first roll the axes of gradients. This is because we always want to acces the first axis,
|
||||
#else slicing becomes more difficult
|
||||
g = np.rollaxis(g,-1)
|
||||
|
||||
##TODO: the shape of the gradients is slightly problematic
|
||||
|
||||
if len(g.shape)==2:
|
||||
x_reshape = (-1,1)
|
||||
elif len(g.shape)==3:
|
||||
x_reshape = (-1,1,1)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
#do the simple transforms first
|
||||
np.multiply(g[kern.constrained_positive_indices], x[kern.constrained_positive_indices].reshape(x_reshape),g[kern.constrained_positive_indices])
|
||||
np.multiply(g[kern.constrained_negative_indices], x[kern.constrained_negative_indices].reshape(x_reshape),g[kern.constrained_negative_indices])
|
||||
[np.multiply(g[i], ((x[i]-l)*(h-x[i])/(h-l)).reshape(x_reshape), g[i]) for i,l,h in zip(kern.constrained_bounded_indices, kern.constrained_bounded_lowers, kern.constrained_bounded_uppers)]
|
||||
|
||||
#work out which gradients need to be added (tieing)
|
||||
[np.sum(g[i],g[i[0]], axis=0) for i in kern.tied_indices]
|
||||
|
||||
#work out which gradients are simply cut out (due to fixing)
|
||||
if len(kern.tied_indices) or len(kern.constrained_fixed_indices):
|
||||
to_remove = np.hstack((kern.constrained_fixed_indices+[t[1:] for t in kern.tied_indices]))
|
||||
g = np.delete(g,to_remove,axis=0)
|
||||
|
||||
return np.swapaxes(np.rollaxis(g,0,-1),-1,-2) # undo the rolling of the axes
|
||||
|
||||
transform_gradients = partial(DelayedDecorator,transform_gradients_)
|
||||
#transform_param = partial(DelayedDecorator,_transform_param) TODO
|
||||
|
||||
|
||||
|
||||
|
||||
class kern(parameterised):
|
||||
def __init__(self,D,parts=[], input_slices=None):
|
||||
"""
|
||||
This kernel does 'compound' structures.
|
||||
|
||||
The compund structure enables many features of GPy, including
|
||||
- Hierarchical models
|
||||
- Correleated output models
|
||||
- multi-view learning
|
||||
|
||||
Hadamard product and outer-product kernels will require a new class TODO.
|
||||
|
||||
:param D: The dimensioality of the kernel's input space
|
||||
:type D: int
|
||||
:param parts: the 'parts' (PD functions) of the kernel
|
||||
:type parts: list of kernpart objects
|
||||
:param input_slices: the slices on the inputs which apply to each kernel
|
||||
:type input_slices: list of slice objects, or list of bools
|
||||
|
||||
"""
|
||||
self.parts = parts
|
||||
self.Nparts = len(parts)
|
||||
self.Nparam = sum([p.Nparam for p in self.parts])
|
||||
|
||||
self.D = D
|
||||
|
||||
#deal with input_slices
|
||||
if input_slices is None:
|
||||
self.input_slices = [slice(None) for p in self.parts]
|
||||
else:
|
||||
assert len(input_slices)==len(self.parts)
|
||||
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
|
||||
|
||||
for p in self.parts:
|
||||
assert isinstance(p,kernpart), "bad kernel part"
|
||||
|
||||
|
||||
self.compute_param_slices()
|
||||
|
||||
parameterised.__init__(self)
|
||||
|
||||
#TODO: Xslices can be passed so that we can compute hierarchical and strutured kernels. these also need implementing for the psi statistics (straightforward)
|
||||
|
||||
def compute_param_slices(self):
|
||||
"""create a set of slices that can index the parameters of each part"""
|
||||
self.param_slices = []
|
||||
count = 0
|
||||
for p in self.parts:
|
||||
self.param_slices.append(slice(count,count+p.Nparam))
|
||||
count += p.Nparam
|
||||
|
||||
def _process_slices(self,slices1=None,slices2=None):
|
||||
"""
|
||||
Format the slices so that they can easily be used.
|
||||
Both slices can be any of three things:
|
||||
- If None, the new points covary through every kernel part (default)
|
||||
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
|
||||
- If a list of booleans, specifying which kernel parts are active
|
||||
|
||||
if the second arg is False, return only slices1
|
||||
|
||||
returns actual lists of slice objects
|
||||
"""
|
||||
if slices1 is None:
|
||||
slices1 = [slice(None)]*self.Nparts
|
||||
elif all([type(s_i) is bool for s_i in slices1]):
|
||||
slices1 = [slice(None) if s_i else slice(0) for s_i in slices1]
|
||||
else:
|
||||
assert all([type(s_i) is slice for s_i in slices1]), "invalid slice objects"
|
||||
if slices2 is None:
|
||||
slices2 = [slice(None)]*self.Nparts
|
||||
elif slices2 is False:
|
||||
return slices1
|
||||
elif all([type(s_i) is bool for s_i in slices2]):
|
||||
slices2 = [slice(None) if s_i else slice(0) for s_i in slices2]
|
||||
else:
|
||||
assert all([type(s_i) is slice for s_i in slices2]), "invalid slice objects"
|
||||
return slices1, slices2
|
||||
|
||||
def __add__(self,other):
|
||||
assert self.D == other.D
|
||||
newkern = kern(self.D,self.parts+other.parts, self.input_slices + other.input_slices)
|
||||
#transfer constraints:
|
||||
newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices))
|
||||
newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices))
|
||||
newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices]
|
||||
newkern.constrained_bounded_lowers = self.constrained_bounded_lowers + other.constrained_bounded_lowers
|
||||
newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers
|
||||
newkern.constrained_fixed_indices = self.constrained_fixed_indices + [self.Nparam + x for x in other.constrained_fixed_indices]
|
||||
newkern.constrained_fixed_values = self.constrained_fixed_values + other.constrained_fixed_values
|
||||
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
|
||||
return newkern
|
||||
def add(self,other):
|
||||
"""
|
||||
Add another kernel to this one. Both kernels are defined on the same _space_
|
||||
:param other: the other kernel to be added
|
||||
:type other: GPy.kern
|
||||
"""
|
||||
return self + other
|
||||
|
||||
def add_orthogonal(self,other):
|
||||
"""
|
||||
Add another kernel to this one. Both kernels are defined on separate spaces
|
||||
:param other: the other kernel to be added
|
||||
:type other: GPy.kern
|
||||
"""
|
||||
#deal with input slices
|
||||
D = self.D + other.D
|
||||
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
|
||||
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
|
||||
other_input_slices = [slice(i[0]+self.D,i[1]+self.D,i[2]) for i in other_input_indices]
|
||||
|
||||
newkern = kern(D,self.parts+other.parts, self_input_slices + other_input_slices)
|
||||
|
||||
#transfer constraints:
|
||||
newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices))
|
||||
newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices))
|
||||
newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices]
|
||||
newkern.constrained_bounded_lowers = self.constrained_bounded_lowers + other.constrained_bounded_lowers
|
||||
newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers
|
||||
newkern.constrained_fixed_indices = self.constrained_fixed_indices + [self.Nparam + x for x in other.constrained_fixed_indices]
|
||||
newkern.constrained_fixed_values = self.constrained_fixed_values + other.constrained_fixed_values
|
||||
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
|
||||
return newkern
|
||||
|
||||
|
||||
def get_param(self):
|
||||
return np.hstack([p.get_param() for p in self.parts])
|
||||
|
||||
def set_param(self,x):
|
||||
[p.set_param(x[s]) for p, s in zip(self.parts, self.param_slices)]
|
||||
|
||||
def get_param_names(self):
|
||||
return sum([[k.name+'_'+str(i)+'_'+n for n in k.get_param_names()] for i,k in enumerate(self.parts)],[])
|
||||
|
||||
def K(self,X,X2=None,slices1=None,slices2=None):
|
||||
assert X.shape[1]==self.D
|
||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
target = np.zeros((X.shape[0],X2.shape[0]))
|
||||
[p.K(X[s1,i_s],X2[s2,i_s],target=target[s1,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
|
||||
return target
|
||||
|
||||
#@transform_gradients
|
||||
def dK_dtheta(self,X,X2=None,slices1=None,slices2=None):
|
||||
"""Return shape is NxMx(Ntheta)"""
|
||||
assert X.shape[1]==self.D
|
||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
target = np.zeros((X.shape[0],X2.shape[0],self.Nparam))
|
||||
[p.dK_dtheta(X[s1,i_s],X2[s2,i_s],target[s1,s2,ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
|
||||
return target
|
||||
|
||||
def dK_dX(self,X,X2=None,slices1=None,slices2=None):
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
||||
target = np.zeros((X2.shape[0],X.shape[0],X.shape[1]))
|
||||
[p.dK_dX(X[s1],X2[s2],target[s2,s1,:]) for p,ps,s1,s2 in zip(self.parts, self.param_slices,slices1,slices2)]
|
||||
return target
|
||||
|
||||
def Kdiag(self,X,slices=None):
|
||||
assert X.shape[1]==self.D
|
||||
slices = self._process_slices(slices,False)
|
||||
target = np.zeros(X.shape[0])
|
||||
[p.Kdiag(X[s,i_s],target=target[s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
|
||||
return target
|
||||
|
||||
#@transform_gradients
|
||||
def dKdiag_dtheta(self,X,slices=None):
|
||||
assert X.shape[1]==self.D
|
||||
slices = self._process_slices(slices,False)
|
||||
target = np.zeros((X.shape[0],self.Nparam))
|
||||
[p.dKdiag_dtheta(X[s,i_s],target[s,ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)]
|
||||
return target
|
||||
|
||||
def dKdiag_dX(self, X, slices=None):
|
||||
assert X.shape[1]==self.D
|
||||
slices = self._process_slices(slices,False)
|
||||
target = np.zeros((X.shape[0],X.shape[0],X.shape[1]))
|
||||
[p.dKdiag_dX(X[s,i_s],target[s,:,:]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
|
||||
return target
|
||||
|
||||
def psi0(self,Z,mu,S,slices_mu=None,slices_Z=None):
|
||||
target = np.zeros(mu.shape[0])
|
||||
[p.psi0(Z,mu,S,target) for p in self.parts]
|
||||
return target
|
||||
|
||||
#@transform_gradients
|
||||
def dpsi0_dtheta(self,Z,mu,S):
|
||||
target = np.zeros((mu.shape[0],self.Nparam))
|
||||
[p.dpsi0_dtheta(Z,mu,S,target[s]) for p,s in zip(self.parts, self.param_slices)]
|
||||
return target
|
||||
|
||||
def dpsi0_dmuS(self,Z,mu,S):
|
||||
target_mu,target_S = np.zeros_like(mu),np.zeros_like(S)
|
||||
[p.dpsi0_dmuS(Z,mu,S,target_mu,target_S) for p in self.parts]
|
||||
return target_mu,target_S
|
||||
|
||||
def psi1(self,Z,mu,S):
|
||||
"""Think N,M,Q """
|
||||
target = np.zeros((mu.shape[0],Z.shape[0]))
|
||||
[p.psi1(Z,mu,S,target=target) for p in self.parts]
|
||||
return target
|
||||
|
||||
#@transform_gradients
|
||||
def dpsi1_dtheta(self,Z,mu,S):
|
||||
"""N,M,(Ntheta)"""
|
||||
target = np.zeros((mu.shape[0],Z.shape[0],self.Nparam))
|
||||
[p.dpsi1_dtheta(Z,mu,S,target[:,:,s]) for p,s in zip(self.parts, self.param_slices)]
|
||||
return target
|
||||
|
||||
def dpsi1_dZ(self,Z,mu,S):
|
||||
"""N,M,Q"""
|
||||
target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[1]))
|
||||
[p.dpsi1_dZ(Z,mu,S,target) for p in self.parts]
|
||||
return target
|
||||
|
||||
def dpsi1_dmuS(self,Z,mu,S):
|
||||
"""return shapes are N,M,Q"""
|
||||
target_mu, target_S = np.zeros((2,mu.shape[0],Z.shape[0],Z.shape[1]))
|
||||
[p.dpsi1_dmuS(Z,mu,S,target_mu=target_mu,target_S = target_S) for p in self.parts]
|
||||
return target_mu, target_S
|
||||
|
||||
def psi2(self,Z,mu,S):
|
||||
"""
|
||||
:Z: np.ndarray of inducing inputs (M x Q)
|
||||
: mu, S: np.ndarrays of means and variacnes (each N x Q)
|
||||
:returns psi2: np.ndarray (N,M,M,Q) """
|
||||
target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0]))
|
||||
[p.psi2(Z,mu,S,target=target) for p in self.parts]
|
||||
return target
|
||||
|
||||
#@transform_gradients
|
||||
def dpsi2_dtheta(self,Z,mu,S):
|
||||
"""Returns shape (N,M,M,Ntheta)"""
|
||||
target = np.zeros((Z.shape[0],Z.shape[0],self.Nparam))
|
||||
[p.dpsi2_dtheta(Z,mu,S,target[:,:,s]) for p,s in zip(self.parts, self.param_slices)]
|
||||
return target
|
||||
|
||||
def dpsi2_dZ(self,Z,mu,S):
|
||||
"""N,M,M,Q"""
|
||||
target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0],Z.shape[1]))
|
||||
[p.dpsi2_dZ(Z,mu,S,target) for p in self.parts]
|
||||
return target
|
||||
|
||||
def dpsi2_dmuS(self,Z,mu,S):
|
||||
"""return shapes are N,M,M,Q"""
|
||||
target_mu, target_S = np.zeros((2,mu.shape[0],Z.shape[0],Z.shape[0],Z.shape[1]))
|
||||
[p.dpsi2_dmuS(Z,mu,S,target_mu=target_mu,target_S = target_S) for p in self.parts]
|
||||
|
||||
#TODO: there are some extra terms to compute here!
|
||||
return target_mu, target_S
|
||||
53
GPy/kern/kernpart.py
Normal file
53
GPy/kern/kernpart.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
class kernpart(object):
|
||||
def __init__(self,D):
|
||||
"""
|
||||
The base class for a kernpart: a positive definite function which forms part of a kernel
|
||||
|
||||
:param D: the number of input dimensions to the function
|
||||
:type D: int
|
||||
"""
|
||||
self.D = D
|
||||
self.Nparam = 1.
|
||||
self.name = 'white'
|
||||
self.set_param(np.array([variance]).flatten())
|
||||
|
||||
def get_param(self):
|
||||
raise NotImplementedError
|
||||
def set_param(self,x):
|
||||
raise NotImplementedError
|
||||
def get_param_names(self):
|
||||
raise NotImplementedError
|
||||
def K(self,X,X2,target):
|
||||
raise NotImplementedError
|
||||
def Kdiag(self,X,target):
|
||||
raise NotImplementedError
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
raise NotImplementedError
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
raise NotImplementedError
|
||||
def psi0(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi0_dtheta(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def psi1(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi1_dtheta(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi1_dZ(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def psi2(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi2_dZ(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi2_dtheta(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def dK_dX(self,X,X2,target):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
103
GPy/kern/linear.py
Normal file
103
GPy/kern/linear.py
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
class linear(kernpart):
|
||||
"""
|
||||
Linear kernel
|
||||
|
||||
:param D: the number of input dimensions
|
||||
:type D: int
|
||||
:param variance: variance
|
||||
:type variance: None|float
|
||||
"""
|
||||
|
||||
def __init__(self, D, variance=None):
|
||||
self.D = D
|
||||
if variance is None:
|
||||
variance = 1.0
|
||||
self.Nparam = 1
|
||||
self.name = 'linear'
|
||||
self.set_param(variance)
|
||||
|
||||
def get_param(self):
|
||||
return self.variance
|
||||
|
||||
def set_param(self,x):
|
||||
self.variance = x
|
||||
|
||||
def get_param_names(self):
|
||||
return ['variance']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
target += self.variance * np.dot(X, X2.T)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
np.add(target,np.sum(self.variance*np.square(X),-1),target)
|
||||
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
"""
|
||||
Computes the derivatives wrt theta
|
||||
Return shape is NxMx(Ntheta)
|
||||
|
||||
"""
|
||||
|
||||
if X2 is None: X2 = X
|
||||
product = np.dot(X, X2.T)[:,:, None]#X[:,None,:]*X2[None,:,:]
|
||||
target += product
|
||||
|
||||
def dK_dX(self,X,X2,target):
|
||||
if X2 is None: X2 = X
|
||||
np.add(target,X2[:,None,:]*self.variance,target)
|
||||
|
||||
# def psi0(self,Z,mu,S,target):
|
||||
# expected = np.square(mu) + S
|
||||
# np.add(target,np.sum(self.variance*expected),target)
|
||||
|
||||
# def dpsi0_dtheta(self,Z,mu,S,target):
|
||||
# expected = np.square(mu) + S
|
||||
# return -2.*np.sum(expected,0)
|
||||
|
||||
# def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
# np.add(target_mu,2*mu*self.variances,target_mu)
|
||||
# np.add(target_S,self.variances,target_S)
|
||||
|
||||
# def dpsi0_dZ(self,Z,mu,S,target):
|
||||
# pass
|
||||
|
||||
# def psi1(self,Z,mu,S,target):
|
||||
# """the variance, it does nothing"""
|
||||
# self.K(mu,Z,target)
|
||||
|
||||
# def dpsi1_dtheta(self,Z,mu,S,target):
|
||||
# """the variance, it does nothing"""
|
||||
# self.dK_dtheta(mu,Z,target)
|
||||
|
||||
# def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
# """Do nothing for S, it does not affect psi1"""
|
||||
# np.add(target_mu,Z/self.variances2,target_mu)
|
||||
|
||||
# def dpsi1_dZ(self,Z,mu,S,target):
|
||||
# self.dK_dX(mu,Z,target)
|
||||
|
||||
# def psi2(self,Z,mu,S,target):
|
||||
# """Think N,M,M,Q """
|
||||
# mu2_S = np.square(mu)+SN,Q,
|
||||
# ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
|
||||
# psi2 = ZZ*np.square(self.variances)*mu2_S
|
||||
# np.add(target, psi2.sum(-1),target) M,M
|
||||
|
||||
# def dpsi2_dtheta(self,Z,mu,S,target):
|
||||
# mu2_S = np.square(mu)+SN,Q,
|
||||
# ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
|
||||
# target += 2.*ZZ*mu2_S*self.variances
|
||||
|
||||
# def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
# """Think N,M,M,Q """
|
||||
# mu2_S = np.sum(np.square(mu)+S,0)Q,
|
||||
# ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
|
||||
# tmp = ZZ*np.square(self.variances) M,M,Q
|
||||
# np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) N,M,M,Q
|
||||
# np.add(target_S, tmp, target_S) N,M,M,Q
|
||||
|
||||
# def dpsi2_dZ(self,Z,mu,S,target):
|
||||
# mu2_S = np.sum(np.square(mu)+S,0)Q,
|
||||
# target += Z[:,None,:]*np.square(self.variances)*mu2_S
|
||||
114
GPy/kern/linear_ARD.py
Normal file
114
GPy/kern/linear_ARD.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
|
||||
class linear_ARD(kernpart):
|
||||
"""
|
||||
Linear ARD kernel
|
||||
|
||||
:param D: the number of input dimensions
|
||||
:type D: int
|
||||
:param variances: ARD variances
|
||||
:type variances: None|np.ndarray
|
||||
"""
|
||||
|
||||
def __init__(self,D,variances=None):
|
||||
self.D = D
|
||||
if variances is not None:
|
||||
assert variances.shape==(self.D,)
|
||||
else:
|
||||
variances = np.ones(self.D)
|
||||
self.Nparam = self.D
|
||||
self.name = 'linear'
|
||||
self.set_param(variances)
|
||||
|
||||
def get_param(self):
|
||||
return self.variances
|
||||
|
||||
def set_param(self,x):
|
||||
assert x.size==(self.Nparam)
|
||||
self.variances = x
|
||||
|
||||
def get_param_names(self):
|
||||
if self.D==1:
|
||||
return ['variance']
|
||||
else:
|
||||
return ['variance_%i'%i for i in range(self.variances.size)]
|
||||
|
||||
def K(self,X,X2,target):
|
||||
XX = X*np.sqrt(self.variances)
|
||||
XX2 = X2*np.sqrt(self.variances)
|
||||
target += np.dot(XX, XX2.T)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
np.add(target,np.sum(self.variances*np.square(X),-1),target)
|
||||
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
"""
|
||||
Computes the derivatives wrt theta
|
||||
Return shape is NxMx(Ntheta)
|
||||
|
||||
"""
|
||||
|
||||
if X2 is None: X2 = X
|
||||
product = X[:,None,:]*X2[None,:,:]
|
||||
target += product
|
||||
|
||||
def dK_dX(self,X,X2,target):
|
||||
if X2 is None: X2 = X
|
||||
#product = X[:,None,:]*X2[None,:,:]
|
||||
#scaled_product = product/self.variances2
|
||||
np.add(target,X2[:,None,:]*self.variances,target)
|
||||
|
||||
def psi0(self,Z,mu,S,target):
|
||||
expected = np.square(mu) + S
|
||||
np.add(target,np.sum(self.variances*expected),target)
|
||||
|
||||
def dpsi0_dtheta(self,Z,mu,S,target):
|
||||
expected = np.square(mu) + S
|
||||
return -2.*np.sum(expected,0)
|
||||
|
||||
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
np.add(target_mu,2*mu*self.variances,target_mu)
|
||||
np.add(target_S,self.variances,target_S)
|
||||
|
||||
def dpsi0_dZ(self,Z,mu,S,target):
|
||||
pass
|
||||
|
||||
def psi1(self,Z,mu,S,target):
|
||||
"""the variance, it does nothing"""
|
||||
self.K(mu,Z,target)
|
||||
|
||||
def dpsi1_dtheta(self,Z,mu,S,target):
|
||||
"""the variance, it does nothing"""
|
||||
self.dK_dtheta(mu,Z,target)
|
||||
|
||||
def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
"""Do nothing for S, it does not affect psi1"""
|
||||
np.add(target_mu,Z/self.variances2,target_mu)
|
||||
|
||||
def dpsi1_dZ(self,Z,mu,S,target):
|
||||
self.dK_dX(mu,Z,target)
|
||||
|
||||
def psi2(self,Z,mu,S,target):
|
||||
"""Think N,M,M,Q """
|
||||
mu2_S = np.square(mu)+S# N,Q,
|
||||
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
|
||||
psi2 = ZZ*np.square(self.variances)*mu2_S
|
||||
np.add(target, psi2.sum(-1),target) # M,M
|
||||
|
||||
def dpsi2_dtheta(self,Z,mu,S,target):
|
||||
mu2_S = np.square(mu)+S# N,Q,
|
||||
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
|
||||
target += 2.*ZZ*mu2_S*self.variances
|
||||
|
||||
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
"""Think N,M,M,Q """
|
||||
mu2_S = np.sum(np.square(mu)+S,0)# Q,
|
||||
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
|
||||
tmp = ZZ*np.square(self.variances) # M,M,Q
|
||||
np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) #N,M,M,Q
|
||||
np.add(target_S, tmp, target_S) #N,M,M,Q
|
||||
|
||||
def dpsi2_dZ(self,Z,mu,S,target):
|
||||
mu2_S = np.sum(np.square(mu)+S,0)# Q,
|
||||
target += Z[:,None,:]*np.square(self.variances)*mu2_S
|
||||
155
GPy/kern/rbf.py
Normal file
155
GPy/kern/rbf.py
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
|
||||
class rbf(kernpart):
|
||||
"""
|
||||
Radial Basis Function kernel, aka squared-exponential or Gaussian kernel.
|
||||
|
||||
:param D: the number of input dimensions
|
||||
:type D: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
:param lengthscale: the lengthscale of the kernel
|
||||
:type lengthscale: float
|
||||
|
||||
.. Note: for rbf with different lengthscales on each dimension, see rbf_ARD
|
||||
"""
|
||||
|
||||
def __init__(self,D,variance=1.,lengthscale=1.):
|
||||
self.D = D
|
||||
self.Nparam = 2
|
||||
self.name = 'rbf'
|
||||
self.set_param(np.hstack((variance,lengthscale)))
|
||||
|
||||
#initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3,1))
|
||||
self._X, self._X2, self._params = np.empty(shape=(3,1))
|
||||
|
||||
def get_param(self):
|
||||
return np.hstack((self.variance,self.lengthscale))
|
||||
|
||||
def set_param(self,x):
|
||||
self.variance, self.lengthscale = x
|
||||
self.lengthscale2 = np.square(self.lengthscale)
|
||||
#reset cached results
|
||||
self._X, self._X2, self._params = np.empty(shape=(3,1))
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
|
||||
|
||||
def get_param_names(self):
|
||||
return ['variance','lengthscale']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
np.add(self.variance*self._K_dvar, target,target)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
np.add(target,self.variance,target)
|
||||
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
"""Return shape is NxMx(Ntheta)"""
|
||||
self._K_computations(X,X2)
|
||||
target[:,:,0] += self._K_dvar
|
||||
target[:,:,1] += self._K_dvar*self.variance*self._K_dist2/self.lengthscale
|
||||
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
np.add(target[:,0],1.,target[:,0])
|
||||
|
||||
def dK_dX(self,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
_K_dist = X[:,None,:]-X2[None,:,:]
|
||||
target += np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2))
|
||||
|
||||
def dKdiag_dX(self,X,target):
|
||||
pass
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
if not (np.all(X==self._X) and np.all(X2==self._X2)):
|
||||
self._X = X
|
||||
self._X2 = X2
|
||||
if X2 is None: X2 = X
|
||||
XXT = np.dot(X,X2.T)
|
||||
if X is X2:
|
||||
self._K_dist2 = (-2.*XXT + np.diag(XXT)[:,np.newaxis] + np.diag(XXT)[np.newaxis,:])/self.lengthscale2
|
||||
else:
|
||||
self._K_dist2 = (-2.*XXT + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])/self.lengthscale2
|
||||
self._K_exponent = -0.5*self._K_dist2
|
||||
self._K_dvar = np.exp(-0.5*self._K_dist2)
|
||||
|
||||
if __name__=='__main__':
|
||||
#run some simple tests on the kernel (TODO:move these to unititest)
|
||||
#TODO: these are broken in this new structure!
|
||||
N = 10
|
||||
M = 5
|
||||
Q = 3
|
||||
|
||||
Z = np.random.randn(M,Q)
|
||||
mu = np.random.randn(N,Q)
|
||||
S = np.random.rand(N,Q)
|
||||
|
||||
var = 2.5
|
||||
lengthscales = np.ones(Q)*0.7
|
||||
|
||||
k = rbf(Q,var,lengthscales)
|
||||
|
||||
from checkgrad import checkgrad
|
||||
|
||||
def k_theta_test(param,k):
|
||||
k.set_param(param)
|
||||
K = k.K(Z)
|
||||
dK_dtheta = k.dK_dtheta(Z)
|
||||
f = np.sum(K)
|
||||
df = dK_dtheta.sum(0).sum(0)
|
||||
return f,np.array(df)
|
||||
print "dk_dtheta_test"
|
||||
checkgrad(k_theta_test,np.random.randn(1+Q),args=(k,))
|
||||
|
||||
|
||||
def psi1_mu_test(mu,k):
|
||||
mu = mu.reshape(N,Q)
|
||||
f = np.sum(k.psi1(Z,mu,S))
|
||||
df = k.dpsi1_dmuS(Z,mu,S)[0].sum(1)
|
||||
return f,df.flatten()
|
||||
print "psi1_mu_test"
|
||||
checkgrad(psi1_mu_test,np.random.randn(N*Q),args=(k,))
|
||||
|
||||
def psi1_S_test(S,k):
|
||||
S = S.reshape(N,Q)
|
||||
f = np.sum(k.psi1(Z,mu,S))
|
||||
df = k.dpsi1_dmuS(Z,mu,S)[1].sum(1)
|
||||
return f,df.flatten()
|
||||
print "psi1_S_test"
|
||||
checkgrad(psi1_S_test,np.random.rand(N*Q),args=(k,))
|
||||
|
||||
def psi1_theta_test(theta,k):
|
||||
k.set_param(theta)
|
||||
f = np.sum(k.psi1(Z,mu,S))
|
||||
df = np.array([np.sum(grad) for grad in k.dpsi1_dtheta(Z,mu,S)])
|
||||
return f,df
|
||||
print "psi1_theta_test"
|
||||
checkgrad(psi1_theta_test,np.random.rand(1+Q),args=(k,))
|
||||
|
||||
|
||||
def psi2_mu_test(mu,k):
|
||||
mu = mu.reshape(N,Q)
|
||||
f = np.sum(k.psi2(Z,mu,S))
|
||||
df = k.dpsi2_dmuS(Z,mu,S)[0].sum(1).sum(1)
|
||||
return f,df.flatten()
|
||||
print "psi2_mu_test"
|
||||
checkgrad(psi2_mu_test,np.random.randn(N*Q),args=(k,))
|
||||
|
||||
def psi2_S_test(S,k):
|
||||
S = S.reshape(N,Q)
|
||||
f = np.sum(k.psi2(Z,mu,S))
|
||||
df = k.dpsi2_dmuS(Z,mu,S)[1].sum(1).sum(1)
|
||||
return f,df.flatten()
|
||||
print "psi2_S_test"
|
||||
checkgrad(psi2_S_test,np.random.rand(N*Q),args=(k,))
|
||||
|
||||
def psi2_theta_test(theta,k):
|
||||
k.set_param(theta)
|
||||
f = np.sum(k.psi2(Z,mu,S))
|
||||
df = np.array([np.sum(grad) for grad in k.dpsi2_dtheta(Z,mu,S)])
|
||||
return f,df
|
||||
print "psi2_theta_test"
|
||||
checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,))
|
||||
234
GPy/kern/rbf_ARD.py
Normal file
234
GPy/kern/rbf_ARD.py
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
|
||||
class rbf_ARD(kernpart):
|
||||
def __init__(self,D,variance=1.,lengthscales=None):
|
||||
"""
|
||||
Arguments
|
||||
----------
|
||||
D: int - the number of input dimensions
|
||||
variance: float
|
||||
lengthscales : np.ndarray of shape (D,)
|
||||
"""
|
||||
self.D = D
|
||||
if lengthscales is not None:
|
||||
assert lengthscales.shape==(self.D,)
|
||||
else:
|
||||
lengthscales = np.ones(self.D)
|
||||
self.Nparam = self.D + 1
|
||||
self.name = 'rbf_ARD'
|
||||
self.set_param(np.hstack((variance,lengthscales)))
|
||||
|
||||
#initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3,1))
|
||||
self._X, self._X2, self._params = np.empty(shape=(3,1))
|
||||
|
||||
def get_param(self):
|
||||
return np.hstack((self.variance,self.lengthscales))
|
||||
def set_param(self,x):
|
||||
assert x.size==(self.D+1)
|
||||
self.variance = x[0]
|
||||
self.lengthscales = x[1:]
|
||||
self.lengthscales2 = np.square(self.lengthscales)
|
||||
#reset cached results
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
|
||||
def get_param_names(self):
|
||||
if self.D==1:
|
||||
return ['variance','lengthscale']
|
||||
else:
|
||||
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
|
||||
def K(self,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
np.add(self.variance*self._K_dvar, target,target)
|
||||
def Kdiag(self,X,target):
|
||||
np.add(target,self.variance,target)
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
"""Return shape is NxMx(Ntheta)"""
|
||||
self._K_computations(X,X2)
|
||||
dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscales
|
||||
np.add(target[:,:,0],self._K_dvar, target[:,:,0])
|
||||
np.add(target[:,:,1:],dl, target[:,:,1:])
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
np.add(target[:,0],1.,target[:,0])
|
||||
def dK_dX(self,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
dZ = self.variance*self._K_dvar[:,:,None]*self._K_dist/self.lengthscales2
|
||||
np.add(target,-dZ.transpose(1,0,2),target)
|
||||
def psi0(self,Z,mu,S,target):
|
||||
np.add(target, self.variance, target)
|
||||
def dpsi0_dtheta(self,Z,mu,S,target):
|
||||
target[:,0] += 1.
|
||||
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
pass
|
||||
def psi1(self,Z,mu,S,target):
|
||||
"""Think N,M,Q """
|
||||
self._psi_computations(Z,mu,S)
|
||||
np.add(target, self._psi1,target)
|
||||
|
||||
def dpsi1_dtheta(self,Z,mu,S,target):
|
||||
"""N,Q,(Ntheta)"""
|
||||
self._psi_computations(Z,mu,S)
|
||||
denom_deriv = S[:,None,:]/(self.lengthscales**3+self.lengthscales*S[:,None,:])
|
||||
d_length = self._psi1[:,:,None]*(self.lengthscales*np.square(self._psi1_dist/(self.lengthscales2+S[:,None,:])) + denom_deriv)
|
||||
target[:,:,0] += self._psi1/self.variance
|
||||
target[:,:,1:] += d_length
|
||||
def dpsi1_dZ(self,Z,mu,S,target):
|
||||
self._psi_computations(Z,mu,S)
|
||||
np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target)
|
||||
|
||||
def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
"""return shapes are N,M,Q"""
|
||||
self._psi_computations(Z,mu,S)
|
||||
tmp = self._psi1[:,:,None]/self.lengthscales2/self._psi1_denom
|
||||
np.add(target_mu,tmp*self._psi1_dist,target_mu)
|
||||
np.add(target_S, 0.5*tmp*(self._psi1_dist_sq-1), target_S)
|
||||
|
||||
def psi2(self,Z,mu,S,target):
|
||||
"""shape N,M,M"""
|
||||
self._psi_computations(Z,mu,S)
|
||||
np.add(target, self._psi2,target)
|
||||
|
||||
def dpsi2_dtheta(self,Z,mu,S,target):
|
||||
"""Shape N,M,M,Ntheta"""
|
||||
self._psi_computations(Z,mu,S)
|
||||
d_var = np.sum(2.*self._psi2/self.variance,0)
|
||||
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscales2)/(self.lengthscales*self._psi2_denom)
|
||||
d_length = d_length.sum(0)
|
||||
target[:,:,0] += d_var
|
||||
target[:,:,1:] += d_length
|
||||
|
||||
def dpsi2_dZ(self,Z,mu,S,target):
|
||||
"""Returns shape N,M,M,Q"""
|
||||
self._psi_computations(Z,mu,S)
|
||||
dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
|
||||
target += dZ
|
||||
|
||||
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
"""Think N,M,M,Q """
|
||||
self._psi_computations(Z,mu,S)
|
||||
tmp = self._psi2[:,:,:,None]/self.lengthscales2/self._psi2_denom
|
||||
np.add(target_mu, -tmp*(2.*self._psi2_mudist),target_mu) #N,M,M,Q
|
||||
np.add(target_S, tmp*(2.*self._psi2_mudist_sq-1), target_S) #N,M,M,Q
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
if not (np.all(X==self._X) and np.all(X2==self._X2)):
|
||||
self._X = X
|
||||
self._X2 = X2
|
||||
if X2 is None: X2 = X
|
||||
self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
|
||||
self._params = np.empty(shape=(1,0))#ensure the next section gets called
|
||||
if not np.all(self._params == self.get_param()):
|
||||
self._params == self.get_param()
|
||||
self._K_dist2 = np.square(self._K_dist/self.lengthscales)
|
||||
self._K_exponent = -0.5*self._K_dist2.sum(-1)
|
||||
self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1))
|
||||
|
||||
def _psi_computations(self,Z,mu,S):
|
||||
#here are the "statistics" for psi1 and psi2
|
||||
if not np.all(Z==self._Z):
|
||||
#Z has changed, compute Z specific stuff
|
||||
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
|
||||
self._psi2_Zdist = Z[:,None,:]-Z[None,:,:] # M,M,Q
|
||||
self._psi2_Zdist_sq = np.square(self._psi2_Zdist)/self.lengthscales2 # M,M,Q
|
||||
self._Z = Z
|
||||
|
||||
if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
|
||||
#something's changed. recompute EVERYTHING
|
||||
|
||||
#psi1
|
||||
self._psi1_denom = S[:,None,:]/self.lengthscales2 + 1.
|
||||
self._psi1_dist = Z[None,:,:]-mu[:,None,:]
|
||||
self._psi1_dist_sq = np.square(self._psi1_dist)/self.lengthscales2/self._psi1_denom
|
||||
self._psi1_exponent = -0.5*np.sum(self._psi1_dist_sq+np.log(self._psi1_denom),-1)
|
||||
self._psi1 = self.variance*np.exp(self._psi1_exponent)
|
||||
|
||||
#psi2
|
||||
self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscales2+1. # N,M,M,Q
|
||||
self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
|
||||
self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscales2*self._psi2_denom)
|
||||
self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
|
||||
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
|
||||
|
||||
self._Z, self._mu, self._S = Z, mu,S
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
#run some simple tests on the kernel (TODO:move these to unititest)
|
||||
#TODO: these are broken in this new structure!
|
||||
N = 10
|
||||
M = 5
|
||||
Q = 3
|
||||
|
||||
Z = np.random.randn(M,Q)
|
||||
mu = np.random.randn(N,Q)
|
||||
S = np.random.rand(N,Q)
|
||||
|
||||
var = 2.5
|
||||
lengthscales = np.ones(Q)*0.7
|
||||
|
||||
k = rbf(Q,var,lengthscales)
|
||||
|
||||
from checkgrad import checkgrad
|
||||
|
||||
def k_theta_test(param,k):
|
||||
k.set_param(param)
|
||||
K = k.K(Z)
|
||||
dK_dtheta = k.dK_dtheta(Z)
|
||||
f = np.sum(K)
|
||||
df = dK_dtheta.sum(0).sum(0)
|
||||
return f,np.array(df)
|
||||
print "dk_dtheta_test"
|
||||
checkgrad(k_theta_test,np.random.randn(1+Q),args=(k,))
|
||||
|
||||
|
||||
def psi1_mu_test(mu,k):
|
||||
mu = mu.reshape(N,Q)
|
||||
f = np.sum(k.psi1(Z,mu,S))
|
||||
df = k.dpsi1_dmuS(Z,mu,S)[0].sum(1)
|
||||
return f,df.flatten()
|
||||
print "psi1_mu_test"
|
||||
checkgrad(psi1_mu_test,np.random.randn(N*Q),args=(k,))
|
||||
|
||||
def psi1_S_test(S,k):
|
||||
S = S.reshape(N,Q)
|
||||
f = np.sum(k.psi1(Z,mu,S))
|
||||
df = k.dpsi1_dmuS(Z,mu,S)[1].sum(1)
|
||||
return f,df.flatten()
|
||||
print "psi1_S_test"
|
||||
checkgrad(psi1_S_test,np.random.rand(N*Q),args=(k,))
|
||||
|
||||
def psi1_theta_test(theta,k):
|
||||
k.set_param(theta)
|
||||
f = np.sum(k.psi1(Z,mu,S))
|
||||
df = np.array([np.sum(grad) for grad in k.dpsi1_dtheta(Z,mu,S)])
|
||||
return f,df
|
||||
print "psi1_theta_test"
|
||||
checkgrad(psi1_theta_test,np.random.rand(1+Q),args=(k,))
|
||||
|
||||
|
||||
def psi2_mu_test(mu,k):
|
||||
mu = mu.reshape(N,Q)
|
||||
f = np.sum(k.psi2(Z,mu,S))
|
||||
df = k.dpsi2_dmuS(Z,mu,S)[0].sum(1).sum(1)
|
||||
return f,df.flatten()
|
||||
print "psi2_mu_test"
|
||||
checkgrad(psi2_mu_test,np.random.randn(N*Q),args=(k,))
|
||||
|
||||
def psi2_S_test(S,k):
|
||||
S = S.reshape(N,Q)
|
||||
f = np.sum(k.psi2(Z,mu,S))
|
||||
df = k.dpsi2_dmuS(Z,mu,S)[1].sum(1).sum(1)
|
||||
return f,df.flatten()
|
||||
print "psi2_S_test"
|
||||
checkgrad(psi2_S_test,np.random.rand(N*Q),args=(k,))
|
||||
|
||||
def psi2_theta_test(theta,k):
|
||||
k.set_param(theta)
|
||||
f = np.sum(k.psi2(Z,mu,S))
|
||||
df = np.array([np.sum(grad) for grad in k.dpsi2_dtheta(Z,mu,S)])
|
||||
return f,df
|
||||
print "psi2_theta_test"
|
||||
checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,))
|
||||
|
||||
|
||||
54
GPy/kern/spline.py
Normal file
54
GPy/kern/spline.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
def theta(x):
|
||||
"""Heavisdie step function"""
|
||||
return np.where(x>=0.,1.,0.)
|
||||
|
||||
class spline(kernpart):
|
||||
"""
|
||||
Spline kernel
|
||||
|
||||
:param D: the number of input dimensions (fixed to 1 right now TODO)
|
||||
:type D: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,D,variance=1.,lengthscale=1.):
|
||||
self.D = D
|
||||
assert self.D==1
|
||||
self.Nparam = 1
|
||||
self.name = 'spline'
|
||||
self.set_param(np.squeeze(variance))
|
||||
|
||||
def get_param(self):
|
||||
return self.variance
|
||||
|
||||
def set_param(self,x):
|
||||
self.variance = x
|
||||
|
||||
def get_param_names(self):
|
||||
return ['variance']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
assert np.all(X>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
|
||||
assert np.all(X2>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
|
||||
t = X
|
||||
s = X2.T
|
||||
s_t = s-t # broadcasted subtraction
|
||||
target += self.variance*(0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
target += self.variance*X.flatten()**3/3.
|
||||
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
|
||||
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
target += X.flatten()**3/3.
|
||||
|
||||
def dKdiag_dX(self,X,target):
|
||||
target += self.variance*X**2
|
||||
|
||||
82
GPy/kern/white.py
Normal file
82
GPy/kern/white.py
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
class white(kernpart):
|
||||
"""
|
||||
White noise kernel.
|
||||
|
||||
:param D: the number of input dimensions
|
||||
:type D: int
|
||||
:param variance:
|
||||
:type variance: float
|
||||
"""
|
||||
def __init__(self,D,variance=1.):
|
||||
self.D = D
|
||||
self.Nparam = 1.
|
||||
self.name = 'white'
|
||||
self.set_param(np.array([variance]).flatten())
|
||||
|
||||
def get_param(self):
|
||||
return self.variance
|
||||
|
||||
def set_param(self,x):
|
||||
assert x.shape==(1,)
|
||||
self.variance = x
|
||||
|
||||
def get_param_names(self):
|
||||
return ['variance']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
if X.shape==X2.shape:
|
||||
if np.all(X==X2):
|
||||
np.add(target,np.eye(X.shape[0])*self.variance,target)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
target += self.variance
|
||||
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
if X.shape==X2.shape:
|
||||
if np.all(X==X2):
|
||||
np.add(target[:,:,0],np.eye(X.shape[0]),target[:,:,0])
|
||||
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
np.add(target[:,0],1.,target[:,0])
|
||||
|
||||
def dK_dX(self,X,X2,target):
|
||||
pass
|
||||
|
||||
def dKdiag_dX(self,X,target):
|
||||
pass
|
||||
|
||||
def psi0(self,Z,mu,S,target):
|
||||
target += self.variance
|
||||
|
||||
def dpsi0_dtheta(self,Z,mu,S,target):
|
||||
target += 1.
|
||||
|
||||
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
pass
|
||||
|
||||
def psi1(self,Z,mu,S,target):
|
||||
pass
|
||||
|
||||
def dpsi1_dtheta(self,Z,mu,S,target):
|
||||
pass
|
||||
|
||||
def dpsi1_dZ(self,Z,mu,S,target):
|
||||
pass
|
||||
|
||||
def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
pass
|
||||
|
||||
def psi2(self,Z,mu,S,target):
|
||||
pass
|
||||
|
||||
def dpsi2_dZ(self,Z,mu,S,target):
|
||||
pass
|
||||
|
||||
def dpsi2_dtheta(self,Z,mu,S,target):
|
||||
pass
|
||||
|
||||
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
pass
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue