linear kernel now has an ARD flag

This commit is contained in:
Nicolas 2013-01-28 16:21:32 +00:00
parent 1680c71445
commit 2f68f6de86
5 changed files with 208 additions and 204 deletions

View file

@ -2,5 +2,5 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, linear_ARD, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52 from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52
from kern import kern from kern import kern

View file

@ -8,7 +8,6 @@ from kern import kern
from rbf import rbf as rbfpart from rbf import rbf as rbfpart
from white import white as whitepart from white import white as whitepart
from linear import linear as linearpart from linear import linear as linearpart
from linear_ARD import linear_ARD as linear_ARD_part
from exponential import exponential as exponentialpart from exponential import exponential as exponentialpart
from Matern32 import Matern32 as Matern32part from Matern32 import Matern32 as Matern32part
from Matern52 import Matern52 as Matern52part from Matern52 import Matern52 as Matern52part
@ -40,28 +39,17 @@ def rbf(D,variance=1., lengthscale=None,ARD=False):
part = rbfpart(D,variance,lengthscale,ARD) part = rbfpart(D,variance,lengthscale,ARD)
return kern(D, [part]) return kern(D, [part])
def linear(D,lengthscales=None): def linear(D,variances=None,ARD=True):
""" """
Construct a linear kernel. Construct a linear kernel.
Arguments Arguments
--------- ---------
D (int), obligatory D (int), obligatory
lengthscales (np.ndarray) variances (np.ndarray)
ARD (boolean)
""" """
part = linearpart(D,lengthscales) part = linearpart(D,variances,ARD)
return kern(D, [part])
def linear_ARD(D,lengthscales=None):
"""
Construct a linear ARD kernel.
Arguments
---------
D (int), obligatory
lengthscales (np.ndarray)
"""
part = linear_ARD_part(D,lengthscales)
return kern(D, [part]) return kern(D, [part])
def white(D,variance=1.): def white(D,variance=1.):

View file

@ -1,124 +1,143 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import kernpart
import numpy as np import numpy as np
class linear(kernpart): class linear(kernpart):
""" """
Linear kernel Linear kernel
.. math::
k(x,y) = \sum_{i=1}^D \sigma^2_i x_iy_i
:param D: the number of input dimensions :param D: the number of input dimensions
:type D: int :type D: int
:param variance: variance :param variances: the vector of variances :math:`\sigma^2_i`
:type variance: None|float :type variances: np.ndarray of size (1,) or (D,) depending on ARD
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single variance parameter \sigma^2), otherwise there is one variance parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
""" """
def __init__(self, D, variance=None): def __init__(self,D,variances=None,ARD=True):
self.D = D self.D = D
if variance is None: self.ARD = ARD
variance = 1.0 if ARD == False:
self.Nparam = 1 self.Nparam = 1
self.name = 'linear' self.name = 'linear'
self._set_params(variance) if variances is not None:
self._Xcache, self._X2cache = np.empty(shape=(2,)) assert variances.shape == (1,)
else:
variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,))
else:
self.Nparam = self.D
self.name = 'linear_ARD'
if variances is not None:
assert variances.shape == (self.D,)
else:
variances = np.ones(self.D)
self._set_params(variances)
def _get_params(self): def _get_params(self):
return self.variance return self.variances
def _set_params(self,x): def _set_params(self,x):
self.variance = x assert x.size==(self.Nparam)
self.variances = x
def _get_param_names(self): def _get_param_names(self):
return ['variance'] if self.Nparam == 1:
return ['variance']
else:
return ['variance_%i'%i for i in range(self.variances.size)]
def K(self,X,X2,target): def K(self,X,X2,target):
self._K_computations(X, X2) if self.ARD:
target += self.variance * self._dot_product XX = X*np.sqrt(self.variances)
XX2 = X2*np.sqrt(self.variances)
def Kdiag(self,X,target): target += np.dot(XX, XX2.T)
np.add(target,np.sum(self.variance*np.square(X),-1),target) else:
self._K_computations(X, X2)
def dK_dtheta(self,partial,X,X2,target): target += self.variances * self._dot_product
"""
Computes the derivatives wrt theta
Return shape is NxMx(Ntheta)
"""
self._K_computations(X, X2)
product = self._dot_product
# product = np.dot(X, X2.T)
target += np.sum(product*partial)
def dK_dX(self,partial,X,X2,target):
target += self.variance * np.sum(partial[:,None,:]*X2.T[None,:,:],-1)
def dKdiag_dtheta(self,partial,X,target):
target += np.sum(partial*np.square(X).sum(1))
def _K_computations(self,X,X2): def _K_computations(self,X,X2):
# (Nicolo) changed the logic here. If X2 is None, we want to cache
# (X,X). In practice X2 should always be passed.
if X2 is None: if X2 is None:
X2 = X X2 = X
if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)): if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)):
self._Xcache = X self._Xcache = X
self._X2cache = X2 self._X2cache = X2
self._dot_product = np.dot(X,X2.T) self._dot_product = np.dot(X,X2.T)
else: else:
# print "Cache hit!" # print "Cache hit!"
pass # TODO: insert debug message here (logging framework) pass # TODO: insert debug message here (logging framework)
def Kdiag(self,X,target):
np.add(target,np.sum(self.variances*np.square(X),-1),target)
# def psi0(self,Z,mu,S,target): def dK_dtheta(self,partial,X,X2,target):
# expected = np.square(mu) + S if self.ARD:
# np.add(target,np.sum(self.variance*expected),target) product = X[:,None,:]*X2[None,:,:]
target += (partial[:,:,None]*product).sum(0).sum(0)
else:
self._K_computations(X, X2)
target += np.sum(self._dot_product*partial)
# def dpsi0_dtheta(self,Z,mu,S,target): def dK_dX(self,partial,X,X2,target):
# expected = np.square(mu) + S target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0)
# return -2.*np.sum(expected,0)
# def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S): def psi0(self,Z,mu,S,target):
# np.add(target_mu,2*mu*self.variances,target_mu) expected = np.square(mu) + S
# np.add(target_S,self.variances,target_S) target += np.sum(self.variances*expected)
# def dpsi0_dZ(self,Z,mu,S,target): def dpsi0_dtheta(self,Z,mu,S,target):
# pass expected = np.square(mu) + S
return -2.*np.sum(expected,0)
# def psi1(self,Z,mu,S,target): def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
# """the variance, it does nothing""" np.add(target_mu,2*mu*self.variances,target_mu)
# self.K(mu,Z,target) np.add(target_S,self.variances,target_S)
# def dpsi1_dtheta(self,Z,mu,S,target): def dpsi0_dZ(self,Z,mu,S,target):
# """the variance, it does nothing""" pass
# self.dK_dtheta(mu,Z,target)
# def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S): def psi1(self,Z,mu,S,target):
# """Do nothing for S, it does not affect psi1""" """the variance, it does nothing"""
# np.add(target_mu,Z/self.variances2,target_mu) self.K(mu,Z,target)
# def dpsi1_dZ(self,Z,mu,S,target): def dpsi1_dtheta(self,Z,mu,S,target):
# self.dK_dX(mu,Z,target) """the variance, it does nothing"""
self.dK_dtheta(mu,Z,target)
# def psi2(self,Z,mu,S,target): def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
# """Think N,M,M,Q """ """Do nothing for S, it does not affect psi1"""
# mu2_S = np.square(mu)+SN,Q, np.add(target_mu,Z/self.variances2,target_mu)
# ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
# psi2 = ZZ*np.square(self.variances)*mu2_S
# np.add(target, psi2.sum(-1),target) M,M
# def dpsi2_dtheta(self,Z,mu,S,target): def dpsi1_dZ(self,Z,mu,S,target):
# mu2_S = np.square(mu)+SN,Q, self.dK_dX(mu,Z,target)
# ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
# target += 2.*ZZ*mu2_S*self.variances
# def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S): def psi2(self,Z,mu,S,target):
# """Think N,M,M,Q """ """Think N,M,M,Q """
# mu2_S = np.sum(np.square(mu)+S,0)Q, mu2_S = np.square(mu)+S# N,Q,
# ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
# tmp = ZZ*np.square(self.variances) M,M,Q psi2 = ZZ*np.square(self.variances)*mu2_S
# np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) N,M,M,Q np.add(target, psi2.sum(-1),target) # M,M
# np.add(target_S, tmp, target_S) N,M,M,Q
# def dpsi2_dZ(self,Z,mu,S,target): def dpsi2_dtheta(self,Z,mu,S,target):
# mu2_S = np.sum(np.square(mu)+S,0)Q, mu2_S = np.square(mu)+S# N,Q,
# target += Z[:,None,:]*np.square(self.variances)*mu2_S ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
target += 2.*ZZ*mu2_S*self.variances
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """
mu2_S = np.sum(np.square(mu)+S,0)# Q,
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
tmp = ZZ*np.square(self.variances) # M,M,Q
np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) #N,M,M,Q
np.add(target_S, tmp, target_S) #N,M,M,Q
def dpsi2_dZ(self,Z,mu,S,target):
mu2_S = np.sum(np.square(mu)+S,0)# Q,
target += Z[:,None,:]*np.square(self.variances)*mu2_S

View file

@ -1,108 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart
import numpy as np
class linear_ARD(kernpart):
"""
Linear ARD kernel
:param D: the number of input dimensions
:type D: int
:param variances: ARD variances
:type variances: None|np.ndarray
"""
def __init__(self,D,variances=None):
self.D = D
if variances is not None:
assert variances.shape==(self.D,)
else:
variances = np.ones(self.D)
self.Nparam = int(self.D)
self.name = 'linear'
self._set_params(variances)
def _get_params(self):
return self.variances
def _set_params(self,x):
assert x.size==(self.Nparam)
self.variances = x
def _get_param_names(self):
if self.D==1:
return ['variance']
else:
return ['variance_%i'%i for i in range(self.variances.size)]
def K(self,X,X2,target):
XX = X*np.sqrt(self.variances)
XX2 = X2*np.sqrt(self.variances)
target += np.dot(XX, XX2.T)
def Kdiag(self,X,target):
np.add(target,np.sum(self.variances*np.square(X),-1),target)
def dK_dtheta(self,partial,X,X2,target):
product = X[:,None,:]*X2[None,:,:]
target += (partial[:,:,None]*product).sum(0).sum(0)
def dK_dX(self,partial,X,X2,target):
target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0)
def psi0(self,Z,mu,S,target):
expected = np.square(mu) + S
np.add(target,np.sum(self.variances*expected),target)
def dpsi0_dtheta(self,Z,mu,S,target):
expected = np.square(mu) + S
return -2.*np.sum(expected,0)
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
np.add(target_mu,2*mu*self.variances,target_mu)
np.add(target_S,self.variances,target_S)
def dpsi0_dZ(self,Z,mu,S,target):
pass
def psi1(self,Z,mu,S,target):
"""the variance, it does nothing"""
self.K(mu,Z,target)
def dpsi1_dtheta(self,Z,mu,S,target):
"""the variance, it does nothing"""
self.dK_dtheta(mu,Z,target)
def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
"""Do nothing for S, it does not affect psi1"""
np.add(target_mu,Z/self.variances2,target_mu)
def dpsi1_dZ(self,Z,mu,S,target):
self.dK_dX(mu,Z,target)
def psi2(self,Z,mu,S,target):
"""Think N,M,M,Q """
mu2_S = np.square(mu)+S# N,Q,
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
psi2 = ZZ*np.square(self.variances)*mu2_S
np.add(target, psi2.sum(-1),target) # M,M
def dpsi2_dtheta(self,Z,mu,S,target):
mu2_S = np.square(mu)+S# N,Q,
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
target += 2.*ZZ*mu2_S*self.variances
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """
mu2_S = np.sum(np.square(mu)+S,0)# Q,
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
tmp = ZZ*np.square(self.variances) # M,M,Q
np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) #N,M,M,Q
np.add(target_S, tmp, target_S) #N,M,M,Q
def dpsi2_dZ(self,Z,mu,S,target):
mu2_S = np.sum(np.square(mu)+S,0)# Q,
target += Z[:,None,:]*np.square(self.variances)*mu2_S

105
doc/tuto_GP_regression.rst Normal file
View file

@ -0,0 +1,105 @@
*************************************
Gaussian process regression tutorial
*************************************
We will see in this tutorial the basics for building a 1 dimensional and a 2 dimensional Gaussian process model, also known as a kriging model.
We first import the libraries we will need: ::
import pylab as pb
pb.ion()
import numpy as np
import GPy
1 dimensional model
===================
For this toy example, we assume we have the following inputs and outputs::
X = np.random.uniform(-3.,3.,(20,1))
Y = np.sin(X) + np.random.randn(20,1)*0.05
Note that the observations Y include some noise.
The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential) plus some white noise::
Gaussian = GPy.kern.rbf(D=1)
noise = GPy.kern.white(D=1)
kernel = Gaussian + noise
The parameter D stands for the dimension of the input space. Note that many other kernels are implemented such as:
* linear (``GPy.kern.linear``)
* exponential kernel (``GPy.kern.exponential``)
* Matern 3/2 (``GPy.kern.Matern32``)
* Matern 5/2 (``GPy.kern.Matern52``)
* spline (``GPy.kern.spline``)
* and many others...
The inputs required for building the model are the observations and the kernel::
m = GPy.models.GP_regression(X,Y,kernel)
The functions ``print`` and ``plot`` can help us understand the model we have just build::
print m
m.plot()
The default values of the kernel parameters may not be relevant for the current data (for example, the confidence intervals seems too wide on the previous figure). A common approach is find the values of the parameters that maximize the likelihood of the data. There are two steps for doing that with GPy:
* Constrain the parameters of the kernel to ensure the kernel will always be a valid covariance structure (For example, we don\'t want some variances to be negative!).
* Run the optimization
There are various ways to constrain the parameters of the kernel. The most basic is to constrain all the parameters to be positive::
m.constrain_positive('')
but it is also possible to set a range on to constrain one parameter to be fixed. The parameter of ``m.constrain_positive`` is a regular expression that matches the name of the parameters to be constrained (as seen in ``print m``). For example, if we want the variance to be positive, the lengthscale to be in [1,10] and the noise variance to be fixed we can write::
#m.unconstrain('') # Required if the model has been previously constrained
m.constrain_positive('rbf_variance')
m.constrain_bounded('lengthscale',1.,10. )
m.constrain_fixed('white',0.0025)
Once the constrains have bee imposed, the model can be optimized::
m.optimize()
If we want to perform some restarts to try to improve the result of the optimization, we can use the optimize_restart function::
m.optimize_restarts(Nrestarts = 10)
m.plot()
print(m)
2 dimensional example
=====================
Here is a 2 dimensional example::
import pylab as pb
pb.ion()
import numpy as np
import GPy
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(50,2))
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
# define kernel
ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
# create simple GP model
m = GPy.models.GP_regression(X,Y,ker)
# contrain all parameters to be positive
m.constrain_positive('')
# optimize and plot
pb.figure()
m.optimize('tnc', max_f_eval = 1000)
m.plot()
print(m)
The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic).