Merged in real gpy

This commit is contained in:
Alan Saul 2013-09-09 13:33:54 +01:00
commit 29358519d9
170 changed files with 30940 additions and 5 deletions

9
.gitignore vendored
View file

@ -9,7 +9,6 @@
dist
build
eggs
parts
bin
var
sdist
@ -39,3 +38,11 @@ nosetests.xml
#bfgs optimiser leaves this lying around
iterate.dat
# Nosetests #
#############
*.noseids
# git merge files #
###################
*.orig

View file

@ -7,15 +7,20 @@ virtualenv:
system_site_packages: true
# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
before_install:
before_install:
- sudo apt-get install -qq python-scipy python-pip
- sudo apt-get install -qq python-matplotlib
# Workaround for a permissions issue with Travis virtual machine images
# that breaks Python's multiprocessing:
# https://github.com/travis-ci/travis-cookbooks/issues/155
- sudo rm -rf /dev/shm
- sudo ln -s /run/shm /dev/shm
install:
- pip install --upgrade numpy==1.7.1
- pip install sphinx
- pip install --upgrade numpy==1.7.1
- pip install sphinx
- pip install nose
- pip install . --use-mirrors
# command to run tests, e.g. python setup.py test
script:
script:
- nosetests GPy/testing

7
AUTHORS.txt Normal file
View file

@ -0,0 +1,7 @@
James Hensman
Nicolo Fusi
Ricardo Andrade
Nicolas Durrande
Alan Saul
Max Zwiessele
Neil D. Lawrence

21
GPy/__init__.py Normal file
View file

@ -0,0 +1,21 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
import core
import models
import mappings
import inference
import util
import examples
import likelihoods
import testing
from numpy.testing import Tester
from nose.tools import nottest
import kern
from core import priors
@nottest
def tests():
Tester(testing).test(verbose=10)

11
GPy/core/__init__.py Normal file
View file

@ -0,0 +1,11 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from model import *
from parameterized import *
import priors
from gp import GP
from sparse_gp import SparseGP
from fitc import FITC
from svigp import SVIGP
from mapping import *

26
GPy/core/domains.py Normal file
View file

@ -0,0 +1,26 @@
'''
Created on 4 Jun 2013
@author: maxz
(Hyper-)Parameter domains defined for :py:mod:`~GPy.core.priors` and :py:mod:`~GPy.kern`.
These domains specify the legitimate realm of the parameters to live in.
:const:`~GPy.core.domains.REAL` :
real domain, all values in the real numbers are allowed
:const:`~GPy.core.domains.POSITIVE`:
positive domain, only positive real values are allowed
:const:`~GPy.core.domains.NEGATIVE`:
same as :const:`~GPy.core.domains.POSITIVE`, but only negative values are allowed
:const:`~GPy.core.domains.BOUNDED`:
only values within the bounded range are allowed,
the bounds are specified withing the object with the bounded range
'''
REAL = 'real'
POSITIVE = "positive"
NEGATIVE = 'negative'
BOUNDED = 'bounded'

247
GPy/core/fitc.py Normal file
View file

@ -0,0 +1,247 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify, pdinv, dtrtrs
from ..util.plot import gpplot
from .. import kern
from scipy import stats
from sparse_gp import SparseGP
class FITC(SparseGP):
"""
sparse FITC approximation
:param X: inputs
:type X: np.ndarray (num_data x Q)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel : the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, likelihood, kernel, Z, normalize_X=False):
SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False)
assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs"
def update_likelihood_approximation(self):
"""
Approximates a non-Gaussian likelihood using Expectation Propagation
For a Gaussian likelihood, no iteration is required:
this function does nothing
"""
self.likelihood.restart()
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params())
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.Z, self.X)
self.psi2 = None
def _computations(self):
#factor Kmm
self.Lm = jitchol(self.Kmm)
self.Lmi,info = dtrtrs(self.Lm,np.eye(self.num_inducing),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn)
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #NOTE: beta_star contains Diag0 and the precision
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data)))
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B
self.B = np.eye(self.num_inducing) + self.A
self.LB = jitchol(self.B)
self.LBi = chol_inv(self.LB)
self.psi1V = np.dot(self.psi1, self.V_star)
Lmi_psi1V, info = dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
Kmmi = np.dot(self.Lmi.T,self.Lmi)
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
VVT = np.outer(self.V_star,self.V_star)
VV_p_Ki = np.dot(VVT,Kmmipsi1.T)
Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
psi1beta = self.psi1*self.beta_star.T
H = self.Kmm + mdot(self.psi1,psi1beta.T)
LH = jitchol(H)
LHi = chol_inv(LH)
Hi = np.dot(LHi.T,LHi)
betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
gamma_1 = mdot(VVT,self.psi1.T,Hi)
pHip = mdot(self.psi1.T,Hi,self.psi1)
gamma_2 = mdot(self.beta_star*pHip,self.V_star)
gamma_3 = self.V_star * gamma_2
self._dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0: logdet(self.beta_star)
self._dL_dpsi0 += .5 * self.V_star**2 #dA_psi0: yT*beta_star*y
self._dL_dpsi0 += .5 *alpha #dC_dpsi0
self._dL_dpsi0 += 0.5*mdot(self.beta_star*pHip,self.V_star)**2 - self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T #dD_dpsi0
self._dL_dpsi1 = b_psi1_Ki.copy() #dA_dpsi1: logdet(self.beta_star)
self._dL_dpsi1 += -np.dot(psi1beta.T,LBL_inv) #dC_dpsi1
self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1
self._dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm: logdet(self.beta_star)
self._dL_dKmm += .5*(LBL_inv - Kmmi) + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
self._dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm
self._dpsi1_dtheta = 0
self._dpsi1_dX = 0
self._dKmm_dtheta = 0
self._dKmm_dX = 0
self._dpsi1_dX_jkj = 0
self._dpsi1_dtheta_jkj = 0
for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.num_data),self.V_star,alpha,gamma_2,gamma_3):
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented."
else:
# likelihood is not heterscedatic
dbstar_dnoise = self.likelihood.precision * (self.beta_star**2 * self.Diag0[:,None] - self.beta_star)
Lmi_psi1 = mdot(self.Lmi,self.psi1)
LBiLmipsi1 = np.dot(self.LBi,Lmi_psi1)
aux_0 = np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star)
alpha_ = mdot(LBiLmipsi1.T,alpha)
dD_dnoise_2 = -0.5 * self.input_dim * np.sum(alpha_**2 * dbstar_dnoise )
dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
dD_dnoise = dD_dnoise_1 + dD_dnoise_2
self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.output_dim * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D
def _log_likelihood_gradients(self):
pass
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X)
dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z)
dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z)
dL_dtheta += self._dKmm_dtheta
dL_dtheta += self._dpsi1_dtheta
return dL_dtheta
def dL_dZ(self):
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z)
dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX
return dL_dZ
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
assert X_variance_new is None, "FITC model is not defined for handling uncertain inputs."
if self.likelihood.is_heteroscedastic:
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.likelihood.precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.Gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P,self.Gamma)
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.num_inducing) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
#self.mu_u = mu_u
#self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u,self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T,self.Lmi.T)
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:
raise NotImplementedError, "Heteroscedastic case not implemented."
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

157
GPy/core/gp.py Normal file
View file

@ -0,0 +1,157 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from .. import kern
from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs
from ..likelihoods import EP
from gp_base import GPBase
class GP(GPBase):
"""
Gaussian Process model for regression and EP
:param X: input observations
:param kernel: a GPy kernel, defaults to rbf+white
:parm likelihood: a GPy likelihood
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:rtype: model object
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
:type powerep: list
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, likelihood, kernel, normalize_X=False):
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())
def getstate(self):
return GPBase.getstate(self)
def setstate(self, state):
GPBase.setstate(self, state)
self._set_params(self._get_params())
def _set_params(self, p):
self.kern._set_params_transformed(p[:self.kern.num_params_transformed()])
self.likelihood._set_params(p[self.kern.num_params_transformed():])
self.K = self.kern.K(self.X)
self.K += self.likelihood.covariance_matrix
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
# the gradient of the likelihood wrt the covariance matrix
if self.likelihood.YYT is None:
# alpha = np.dot(self.Ki, self.likelihood.Y)
alpha, _ = dpotrs(self.L, self.likelihood.Y, lower=1)
self.dL_dK = 0.5 * (tdot(alpha) - self.output_dim * self.Ki)
else:
# tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
tmp, _ = dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
tmp, _ = dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki)
def _get_params(self):
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
def _get_param_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian likelihood, no iteration is required:
this function does nothing
"""
self.likelihood.restart()
self.likelihood.fit_full(self.kern.K(self.X))
self._set_params(self._get_params()) # update the GP
def _model_fit_term(self):
"""
Computes the model fit using YYT if it's available
"""
if self.likelihood.YYT is None:
tmp, _ = dtrtrs(self.L, np.asfortranarray(self.likelihood.Y), lower=1)
return -0.5 * np.sum(np.square(tmp))
# return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
else:
return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
def log_likelihood(self):
"""
The log marginal likelihood of the GP.
For an EP model, can be written as the log likelihood of a regression
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
"""
return (-0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) -
0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z)
def _log_likelihood_gradients(self):
"""
The gradient of all parameters.
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
"""
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
"""
Internal helper function for making predictions, does not account
for normalization or likelihood
"""
Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T
# KiKx = np.dot(self.Ki, Kx)
KiKx, _ = dpotrs(self.L, np.asfortranarray(Kx), lower=1)
mu = np.dot(KiKx.T, self.likelihood.Y)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - np.dot(KiKx.T, Kx)
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
var = var[:, None]
if stop:
debug_this # @UndefinedVariable
return mu, var
def predict(self, Xnew, which_parts='all', full_cov=False, likelihood_args=dict()):
"""
Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions.
"""
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
return mean, var, _025pm, _975pm

209
GPy/core/gp_base.py Normal file
View file

@ -0,0 +1,209 @@
import numpy as np
from .. import kern
from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D
import pylab as pb
from GPy.core.model import Model
class GPBase(Model):
"""
Gaussian process base model for holding shared behaviour between
sparse_GP and GP models.
"""
def __init__(self, X, likelihood, kernel, normalize_X=False):
self.X = X
assert len(self.X.shape) == 2
self.num_data, self.input_dim = self.X.shape
assert isinstance(kernel, kern.kern)
self.kern = kernel
self.likelihood = likelihood
assert self.X.shape[0] == self.likelihood.data.shape[0]
self.num_data, self.output_dim = self.likelihood.data.shape
if normalize_X:
self._Xoffset = X.mean(0)[None, :]
self._Xscale = X.std(0)[None, :]
self.X = (X.copy() - self._Xoffset) / self._Xscale
else:
self._Xoffset = np.zeros((1, self.input_dim))
self._Xscale = np.ones((1, self.input_dim))
super(GPBase, self).__init__()
# Model.__init__(self)
# All leaf nodes should call self._set_params(self._get_params()) at
# the end
def getstate(self):
"""
Get the current state of the class, here we return everything that is needed to recompute the model.
"""
return Model.getstate(self) + [self.X,
self.num_data,
self.input_dim,
self.kern,
self.likelihood,
self.output_dim,
self._Xoffset,
self._Xscale]
def setstate(self, state):
self._Xscale = state.pop()
self._Xoffset = state.pop()
self.output_dim = state.pop()
self.likelihood = state.pop()
self.kern = state.pop()
self.input_dim = state.pop()
self.num_data = state.pop()
self.X = state.pop()
Model.setstate(self, state)
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
"""
Plot the GP's view of the world, where the data is normalized and the
likelihood is Gaussian.
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
:param samples: the number of a posteriori samples to plot
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param full_cov:
:type full_cov: bool
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
"""
if which_data == 'all':
which_data = slice(None)
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if self.X.shape[1] == 1:
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
if samples == 0:
m, v = self._raw_predict(Xnew, which_parts=which_parts)
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
else:
m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
for i in range(samples):
ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
ax.set_xlim(xmin, xmax)
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2:
resolution = resolution or 50
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
m, v = self._raw_predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
"""
Plot the GP with noise where the likelihood is Gaussian.
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param linecol: color of line to plot.
:type linecol:
:param fillcol: color of fill
:type fillcol:
:param levels: for 2D plotting, the number of contour levels to use
is ax is None, create a new figure
"""
# TODO include samples
if which_data == 'all':
which_data = slice(None)
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
plotdims = self.input_dim - len(fixed_inputs)
if plotdims == 1:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
fixed_dims = np.array([i for i,v in fixed_inputs])
freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,freedim] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
m, _, lower, upper = self.predict(Xgrid, which_parts=which_parts)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2: # FIXME
resolution = resolution or 50
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
Yf = self.likelihood.data.flatten()
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

190
GPy/core/mapping.py Normal file
View file

@ -0,0 +1,190 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from ..util.plot import Tango, x_frame1D, x_frame2D
from parameterized import Parameterized
import numpy as np
import pylab as pb
class Mapping(Parameterized):
"""
Base model for shared behavior between models that can act like a mapping.
"""
def __init__(self, input_dim, output_dim):
self.input_dim = input_dim
self.output_dim = output_dim
super(Mapping, self).__init__()
# Model.__init__(self)
# All leaf nodes should call self._set_params(self._get_params()) at
# the end
def f(self, X):
raise NotImplementedError
def df_dX(self, dL_df, X):
"""Evaluate derivatives of mapping outputs with respect to inputs.
:param dL_df: gradient of the objective with respect to the function.
:type dL_df: ndarray (num_data x output_dim)
:param X: the input locations where derivatives are to be evaluated.
:type X: ndarray (num_data x input_dim)
:returns: matrix containing gradients of the function with respect to the inputs.
"""
raise NotImplementedError
def df_dtheta(self, dL_df, X):
"""The gradient of the outputs of the multi-layer perceptron with respect to each of the parameters.
:param dL_df: gradient of the objective with respect to the function.
:type dL_df: ndarray (num_data x output_dim)
:param X: input locations where the function is evaluated.
:type X: ndarray (num_data x input_dim)
:returns: Matrix containing gradients with respect to parameters of each output for each input data.
:rtype: ndarray (num_params length)
"""
raise NotImplementedError
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue']):
"""
Plot the mapping.
Plots the mapping associated with the model.
- In one dimension, the function is plotted.
- In two dimsensions, a contour-plot shows the function
- In higher dimensions, we've not implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param linecol: color of line to plot.
:type linecol:
:param levels: for 2D plotting, the number of contour levels to use
is ax is None, create a new figure
"""
# TODO include samples
if which_data == 'all':
which_data = slice(None)
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
plotdims = self.input_dim - len(fixed_inputs)
if plotdims == 1:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
fixed_dims = np.array([i for i,v in fixed_inputs])
freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,freedim] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
f = self.predict(Xgrid, which_parts=which_parts)
for d in range(y.shape[1]):
ax.plot(Xnew, f[:, d], edgecol=linecol)
elif self.X.shape[1] == 2:
resolution = resolution or 50
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
f = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(x, y, f, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
from GPy.core.model import Model
class Mapping_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given mapping are implemented correctly. It enables checkgradient() to be called independently on each mapping."""
def __init__(self, mapping=None, dL_df=None, X=None):
num_samples = 20
if mapping==None:
mapping = GPy.mapping.linear(1, 1)
if X==None:
X = np.random.randn(num_samples, mapping.input_dim)
if dL_df==None:
dL_df = np.ones((num_samples, mapping.output_dim))
self.mapping=mapping
self.X = X
self.dL_df = dL_df
self.num_params = self.mapping.num_params
Model.__init__(self)
def _get_params(self):
return self.mapping._get_params()
def _get_param_names(self):
return self.mapping._get_param_names()
def _set_params(self, x):
self.mapping._set_params(x)
def log_likelihood(self):
return (self.dL_df*self.mapping.f(self.X)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the Mapping_check_model class."
class Mapping_check_df_dtheta(Mapping_check_model):
"""This class allows gradient checks for the gradient of a mapping with respect to parameters. """
def __init__(self, mapping=None, dL_df=None, X=None):
Mapping_check_model.__init__(self,mapping=mapping,dL_df=dL_df, X=X)
def _log_likelihood_gradients(self):
return self.mapping.df_dtheta(self.dL_df, self.X)
class Mapping_check_df_dX(Mapping_check_model):
"""This class allows gradient checks for the gradient of a mapping with respect to X. """
def __init__(self, mapping=None, dL_df=None, X=None):
Mapping_check_model.__init__(self,mapping=mapping,dL_df=dL_df, X=X)
if dL_df==None:
dL_df = np.ones((self.X.shape[0],self.mapping.output_dim))
self.num_params = self.X.shape[0]*self.mapping.input_dim
def _log_likelihood_gradients(self):
return self.mapping.df_dX(self.dL_df, self.X).flatten()
def _get_param_names(self):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)

581
GPy/core/model.py Normal file
View file

@ -0,0 +1,581 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from .. import likelihoods
from ..inference import optimization
from ..util.linalg import jitchol
from GPy.util.misc import opt_wrapper
from parameterized import Parameterized
import multiprocessing as mp
import numpy as np
from GPy.core.domains import POSITIVE, REAL
from numpy.linalg.linalg import LinAlgError
# import numdifftools as ndt
class Model(Parameterized):
_fail_count = 0 # Count of failed optimization steps (see objective)
_allowed_failures = 10 # number of allowed failures
def __init__(self):
Parameterized.__init__(self)
self.priors = None
self.optimization_runs = []
self.sampling_runs = []
self.preferred_optimizer = 'scg'
# self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to use the model class"
def _log_likelihood_gradients(self):
raise NotImplementedError, "this needs to be implemented to use the model class"
def getstate(self):
"""
Get the current state of the class.
Inherited from Parameterized, so add those parameters to the state
:return: list of states from the model.
"""
return Parameterized.getstate(self) + \
[self.priors, self.optimization_runs,
self.sampling_runs, self.preferred_optimizer]
def setstate(self, state):
"""
set state from previous call to getstate
call Parameterized with the rest of the state
:param state: the state of the model.
:type state: list as returned from getstate.
"""
self.preferred_optimizer = state.pop()
self.sampling_runs = state.pop()
self.optimization_runs = state.pop()
self.priors = state.pop()
Parameterized.setstate(self, state)
def set_prior(self, regexp, what):
"""
Sets priors on the model parameters.
Notes
-----
Asserts that the prior is suitable for the constraint. If the
wrong constraint is in place, an error is raised. If no
constraint is in place, one is added (warning printed).
For tied parameters, the prior will only be "counted" once, thus
a prior object is only inserted on the first tied index
:param regexp: regular expression of parameters on which priors need to be set.
:type param: string, regexp, or integer array
:param what: prior to set on parameter.
:type what: GPy.core.Prior type
"""
if self.priors is None:
self.priors = [None for i in range(self._get_params().size)]
which = self.grep_param_names(regexp)
# check tied situation
tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
if len(tie_partial_matches):
raise ValueError, "cannot place prior across partial ties"
tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
if len(tie_matches) > 1:
raise ValueError, "cannot place prior across multiple ties"
elif len(tie_matches) == 1:
which = which[:1] # just place a prior object on the first parameter
# check constraints are okay
if what.domain is POSITIVE:
constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain is POSITIVE]
if len(constrained_positive_indices):
constrained_positive_indices = np.hstack(constrained_positive_indices)
else:
constrained_positive_indices = np.zeros(shape=(0,))
bad_constraints = np.setdiff1d(self.all_constrained_indices(), constrained_positive_indices)
assert not np.any(which[:, None] == bad_constraints), "constraint and prior incompatible"
unconst = np.setdiff1d(which, constrained_positive_indices)
if len(unconst):
print "Warning: constraining parameters to be positive:"
print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
print '\n'
self.constrain_positive(unconst)
elif what.domain is REAL:
assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
else:
raise ValueError, "prior not recognised"
# store the prior in a local list
for w in which:
self.priors[w] = what
def get_gradient(self, name, return_names=False):
"""
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
:param name: the name of parameters required (as a regular expression).
:type name: regular expression
:param return_names: whether or not to return the names matched (default False)
:type return_names: bool
"""
matches = self.grep_param_names(name)
if len(matches):
if return_names:
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
else:
return self._log_likelihood_gradients()[matches]
else:
raise AttributeError, "no parameter matches %s" % name
def log_prior(self):
"""evaluate the prior"""
if self.priors is not None:
return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])
else:
return 0.
def _log_prior_gradients(self):
"""evaluate the gradients of the priors"""
if self.priors is None:
return 0.
x = self._get_params()
ret = np.zeros(x.size)
[np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
return ret
def _transform_gradients(self, g):
x = self._get_params()
for index, constraint in zip(self.constrained_indices, self.constraints):
g[index] = g[index] * constraint.gradfactor(x[index])
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
if len(self.tied_indices) or len(self.fixed_indices):
to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
return np.delete(g, to_remove)
else:
return g
def randomize(self):
"""
Randomize the model.
Make this draw from the prior if one exists, else draw from N(0,1)
"""
# first take care of all parameters (from N(0,1))
x = self._get_params_transformed()
x = np.random.randn(x.size)
self._set_params_transformed(x)
# now draw from prior where possible
x = self._get_params()
if self.priors is not None:
[np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
self._set_params(x)
self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
"""
Perform random restarts of the model, and set the model to the best
seen solution.
If the robust flag is set, exceptions raised during optimizations will
be handled silently. If _all_ runs fail, the model is reset to the
existing parameter values.
Notes
-----
:param num_restarts: number of restarts to use (default 10)
:type num_restarts: int
:param robust: whether to handle exceptions silently or not (default False)
:type robust: bool
:param parallel: whether to run each restart as a separate process. It relies on the multiprocessing module.
:type parallel: bool
:param num_processes: number of workers in the multiprocessing pool
:type numprocesses: int
**kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations
:type max_f_eval: int
:param max_iters: maximum number of iterations
:type max_iters: int
:param messages: whether to display during optimisation
:type messages: bool
..Note: If num_processes is None, the number of workes in the multiprocessing pool is automatically
set to the number of processors on the current machine.
"""
initial_parameters = self._get_params_transformed()
if parallel:
try:
jobs = []
pool = mp.Pool(processes=num_processes)
for i in range(num_restarts):
self.randomize()
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
jobs.append(job)
pool.close() # signal that no more data coming in
pool.join() # wait for all the tasks to complete
except KeyboardInterrupt:
print "Ctrl+c received, terminating and joining pool."
pool.terminate()
pool.join()
for i in range(num_restarts):
try:
if not parallel:
self.randomize()
self.optimize(**kwargs)
else:
self.optimization_runs.append(jobs[i].get())
if verbose:
print("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt))
except Exception as e:
if robust:
print("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts))
else:
raise e
if len(self.optimization_runs):
i = np.argmin([o.f_opt for o in self.optimization_runs])
self._set_params_transformed(self.optimization_runs[i].x_opt)
else:
self._set_params_transformed(initial_parameters)
def ensure_default_constraints(self):
"""
Ensure that any variables which should clearly be positive
have been constrained somehow. The method performs a regular
expression search on parameter names looking for the terms
'variance', 'lengthscale', 'precision' and 'kappa'. If any of
these terms are present in the name the parameter is
constrained positive.
"""
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa']
# param_names = self._get_param_names()
currently_constrained = self.all_constrained_indices()
to_make_positive = []
for s in positive_strings:
for i in self.grep_param_names(".*" + s):
if not (i in currently_constrained):
to_make_positive.append(i)
if len(to_make_positive):
self.constrain_positive(np.asarray(to_make_positive))
def objective_function(self, x):
"""
The objective function passed to the optimizer. It combines
the likelihood and the priors.
Failures are handled robustly. The algorithm will try several times to
return the objective, and will raise the original exception if it
the objective cannot be computed.
:param x: the parameters of the model.
:parameter type: np.array
"""
try:
self._set_params_transformed(x)
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError) as e:
if self._fail_count >= self._allowed_failures:
raise e
self._fail_count += 1
return np.inf
return -self.log_likelihood() - self.log_prior()
def objective_function_gradients(self, x):
"""
Gets the gradients from the likelihood and the priors.
Failures are handled robustly. The algorithm will try several times to
return the gradients, and will raise the original exception if it
the objective cannot be computed.
:param x: the parameters of the model.
:parameter type: np.array
"""
try:
self._set_params_transformed(x)
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError) as e:
if self._fail_count >= self._allowed_failures:
raise e
self._fail_count += 1
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
return obj_grads
def objective_and_gradients(self, x):
"""
Compute the objective function of the model and the gradient of the model at the point given by x.
:param x: the point at which gradients are to be computed.
:type np.array:
"""
try:
self._set_params_transformed(x)
obj_f = -self.log_likelihood() - self.log_prior()
self._fail_count = 0
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
except (LinAlgError, ZeroDivisionError, ValueError) as e:
if self._fail_count >= self._allowed_failures:
raise e
self._fail_count += 1
obj_f = np.inf
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
return obj_f, obj_grads
def optimize(self, optimizer=None, start=None, **kwargs):
"""
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations
:type max_f_eval: int
:messages: whether to display during optimisation
:type messages: bool
:param optimzer: which optimizer to use (defaults to self.preferred optimizer)
:type optimzer: string TODO: valid strings?
"""
if optimizer is None:
optimizer = self.preferred_optimizer
if start == None:
start = self._get_params_transformed()
optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model=self, **kwargs)
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
self.optimization_runs.append(opt)
self._set_params_transformed(opt.x_opt)
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) # @UndefinedVariable
sgd.run()
self.optimization_runs.append(sgd)
def Laplace_covariance(self):
"""return the covariance matrix of a Laplace approximation at the current (stationary) point."""
# TODO add in the prior contributions for MAP estimation
# TODO fix the hessian for tied, constrained and fixed components
if hasattr(self, 'log_likelihood_hessian'):
A = -self.log_likelihood_hessian()
else:
print "numerically calculating Hessian. please be patient!"
x = self._get_params()
def f(x):
self._set_params(x)
return self.log_likelihood()
h = ndt.Hessian(f) # @UndefinedVariable
A = -h(x)
self._set_params(x)
# check for almost zero components on the diagonal which screw up the cholesky
aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0]
A[aa, aa] = 0.
return A
def Laplace_evidence(self):
"""Returns an estiamte of the model evidence based on the Laplace approximation.
Uses a numerical estimate of the Hessian if none is available analytically."""
A = self.Laplace_covariance()
try:
hld = np.sum(np.log(np.diag(jitchol(A)[0])))
except:
return np.nan
return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
def __str__(self, names=None):
if names is None:
names = self._get_print_names()
s = Parameterized.__str__(self, names=names).split('\n')
# add priors to the string
if self.priors is not None:
strs = [str(p) if p is not None else '' for p in self.priors]
else:
strs = [''] * len(self._get_param_names())
name_indices = self.grep_param_names("|".join(names))
strs = np.array(strs)[name_indices]
width = np.array(max([len(p) for p in strs] + [5])) + 4
log_like = self.log_likelihood()
log_prior = self.log_prior()
obj_funct = '\nLog-likelihood: {0:.3e}'.format(log_like)
if len(''.join(strs)) != 0:
obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
obj_funct += '\n\n'
s[0] = obj_funct + s[0]
s[0] += "|{h:^{col}}".format(h='prior', col=width)
s[1] += '-' * (width + 1)
for p in range(2, len(strs) + 2):
s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width)
return '\n'.join(s)
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
"""
Check the gradient of the ,odel by comparing to a numerical
estimate. If the verbose flag is passed, invividual
components are tested (and printed)
:param verbose: If True, print a "full" checking of each parameter
:type verbose: bool
:param step: The size of the step around which to linearise the objective
:type step: float (default 1e-6)
:param tolerance: the tolerance allowed (see note)
:type tolerance: float (default 1e-3)
Note:-
The gradient is considered correct if the ratio of the analytical
and numerical gradients is within <tolerance> of unity.
"""
x = self._get_params_transformed().copy()
if not verbose:
# just check the global ratio
dx = step * np.sign(np.random.uniform(-1, 1, x.size))
# evaulate around the point x
f1, g1 = self.objective_and_gradients(x + dx)
f2, g2 = self.objective_and_gradients(x - dx)
gradient = self.objective_function_gradients(x)
numerical_gradient = (f1 - f2) / (2 * dx)
global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() - 1) < tolerance
else:
# check the gradient of each parameter individually, and do some pretty printing
try:
names = self._get_param_names_transformed()
except NotImplementedError:
names = ['Variable %i' % i for i in range(len(x))]
# Prepare for pretty-printing
header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
float_len = 10
cols = [max_names]
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
cols = np.array(cols) + 5
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator])
if target_param is None:
param_list = range(len(x))
else:
param_list = self.grep_param_names(target_param, transformed=True, search=True)
if not np.any(param_list):
print "No free parameters to check"
return
for i in param_list:
xx = x.copy()
xx[i] += step
f1, g1 = self.objective_and_gradients(xx)
xx[i] -= 2.*step
f2, g2 = self.objective_and_gradients(xx)
gradient = self.objective_function_gradients(x)[i]
numerical_gradient = (f1 - f2) / (2 * step)
ratio = (f1 - f2) / (2 * step * gradient)
difference = np.abs((f1 - f2) / 2 / step - gradient)
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
formatted_name = "\033[92m {0} \033[0m".format(names[i])
else:
formatted_name = "\033[91m {0} \033[0m".format(names[i])
r = '%.6f' % float(ratio)
d = '%.6f' % float(difference)
g = '%.6f' % gradient
ng = '%.6f' % float(numerical_gradient)
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
print grad_string
def input_sensitivity(self):
"""
return an array describing the sesitivity of the model to each input
NB. Right now, we're basing this on the lengthscales (or
variances) of the kernel. TODO: proper sensitivity analysis
where we integrate across the model inputs and evaluate the
effect on the variance of the model output. """
if not hasattr(self, 'kern'):
raise ValueError, "this model has no kernel"
k = [p for p in self.kern.parts if p.name in ['rbf', 'linear', 'rbf_inv']]
if (not len(k) == 1) or (not k[0].ARD):
raise ValueError, "cannot determine sensitivity for this kernel"
k = k[0]
if k.name == 'rbf':
return 1. / k.lengthscale
elif k.name == 'rbf_inv':
return k.inv_lengthscale
elif k.name == 'linear':
return k.variances
def pseudo_EM(self, epsilon=.1, **kwargs):
"""
TODO: Should this not bein the GP class?
EM - like algorithm for Expectation Propagation and Laplace approximation
kwargs are passed to the optimize function. They can be:
:epsilon: convergence criterion
:max_f_eval: maximum number of function evaluations
:messages: whether to display during optimisation
:param optimzer: whice optimizer to use (defaults to self.preferred optimizer)
:type optimzer: string TODO: valid strings?
"""
assert isinstance(self.likelihood, likelihoods.EP), "pseudo_EM is only available for EP likelihoods"
ll_change = epsilon + 1.
iteration = 0
last_ll = -np.inf
convergence = False
alpha = 0
stop = False
while not stop:
last_approximation = self.likelihood.copy()
last_params = self._get_params()
self.update_likelihood_approximation()
new_ll = self.log_likelihood()
ll_change = new_ll - last_ll
if ll_change < 0:
self.likelihood = last_approximation # restore previous likelihood approximation
self._set_params(last_params) # restore model parameters
print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
stop = True
else:
self.optimize(**kwargs)
last_ll = self.log_likelihood()
if ll_change < epsilon:
stop = True
iteration += 1
if stop:
print "%s iterations." % iteration
self.update_likelihood_approximation()

385
GPy/core/parameterized.py Normal file
View file

@ -0,0 +1,385 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import re
import copy
import cPickle
import warnings
import transformations
class Parameterized(object):
def __init__(self):
"""
This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters
"""
self.tied_indices = []
self.fixed_indices = []
self.fixed_values = []
self.constrained_indices = []
self.constraints = []
def _get_params(self):
raise NotImplementedError, "this needs to be implemented to use the Parameterized class"
def _set_params(self, x):
raise NotImplementedError, "this needs to be implemented to use the Parameterized class"
def _get_param_names(self):
raise NotImplementedError, "this needs to be implemented to use the Parameterized class"
def _get_print_names(self):
""" Override for which names to print out, when using print m """
return self._get_param_names()
def pickle(self, filename, protocol=None):
if protocol is None:
if self._has_get_set_state():
protocol = 0
else:
protocol = -1
with open(filename, 'w') as f:
cPickle.dump(self, f, protocol)
def copy(self):
"""Returns a (deep) copy of the current model """
return copy.deepcopy(self)
def __getstate__(self):
if self._has_get_set_state():
return self.getstate()
return self.__dict__
def __setstate__(self, state):
if self._has_get_set_state():
self.setstate(state) # set state
self._set_params(self._get_params()) # restore all values
return
self.__dict__ = state
def _has_get_set_state(self):
return 'getstate' in vars(self.__class__) and 'setstate' in vars(self.__class__)
def getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
For inheriting from Parameterized:
Allways append the state of the inherited object
and call down to the inherited object in setstate!!
"""
return [self.tied_indices,
self.fixed_indices,
self.fixed_values,
self.constrained_indices,
self.constraints]
def setstate(self, state):
self.constraints = state.pop()
self.constrained_indices = state.pop()
self.fixed_values = state.pop()
self.fixed_indices = state.pop()
self.tied_indices = state.pop()
def __getitem__(self, regexp, return_names=False):
"""
Get a model parameter by name. The name is applied as a regular
expression and all parameters that match that regular expression are
returned.
"""
matches = self.grep_param_names(regexp)
if len(matches):
if return_names:
return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
else:
return self._get_params()[matches]
else:
raise AttributeError, "no parameter matches %s" % regexp
def __setitem__(self, name, val):
"""
Set model parameter(s) by name. The name is provided as a regular
expression. All parameters matching that regular expression are set to
the given value.
"""
matches = self.grep_param_names(name)
if len(matches):
val = np.array(val)
assert (val.size == 1) or val.size == len(matches), "Shape mismatch: {}:({},)".format(val.size, len(matches))
x = self._get_params()
x[matches] = val
self._set_params(x)
else:
raise AttributeError, "no parameter matches %s" % name
def tie_params(self, regexp):
"""
Tie (all!) parameters matching the regular expression `regexp`.
"""
matches = self.grep_param_names(regexp)
assert matches.size > 0, "need at least something to tie together"
if len(self.tied_indices):
assert not np.any(matches[:, None] == np.hstack(self.tied_indices)), "Some indices are already tied!"
self.tied_indices.append(matches)
# TODO only one of the priors will be evaluated. Give a warning message if the priors are not identical
if hasattr(self, 'prior'):
pass
self._set_params_transformed(self._get_params_transformed()) # sets tied parameters to single value
def untie_everything(self):
"""Unties all parameters by setting tied_indices to an empty list."""
self.tied_indices = []
def grep_param_names(self, regexp, transformed=False, search=False):
"""
:param regexp: regular expression to select parameter names
:type regexp: re | str | int
:rtype: the indices of self._get_param_names which match the regular expression.
Note:-
Other objects are passed through - i.e. integers which weren't meant for grepping
"""
if transformed:
names = self._get_param_names_transformed()
else:
names = self._get_param_names()
if type(regexp) in [str, np.string_, np.str]:
regexp = re.compile(regexp)
elif type(regexp) is re._pattern_type:
pass
else:
return regexp
if search:
return np.nonzero([regexp.search(name) for name in names])[0]
else:
return np.nonzero([regexp.match(name) for name in names])[0]
def num_params_transformed(self):
removed = 0
for tie in self.tied_indices:
removed += tie.size - 1
for fix in self.fixed_indices:
removed += fix.size
return len(self._get_params()) - removed
def unconstrain(self, regexp):
"""Unconstrain matching parameters. Does not untie parameters"""
matches = self.grep_param_names(regexp)
# tranformed contraints:
for match in matches:
self.constrained_indices = [i[i <> match] for i in self.constrained_indices]
# remove empty constraints
tmp = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)])
if tmp:
self.constrained_indices, self.constraints = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)])
self.constrained_indices, self.constraints = list(self.constrained_indices), list(self.constraints)
# fixed:
self.fixed_values = [np.delete(values, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices, values in zip(self.fixed_indices, self.fixed_values)]
self.fixed_indices = [np.delete(indices, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices in self.fixed_indices]
# remove empty elements
tmp = [(i, v) for i, v in zip(self.fixed_indices, self.fixed_values) if len(i)]
if tmp:
self.fixed_indices, self.fixed_values = zip(*tmp)
self.fixed_indices, self.fixed_values = list(self.fixed_indices), list(self.fixed_values)
else:
self.fixed_indices, self.fixed_values = [], []
def constrain_negative(self, regexp):
""" Set negative constraints. """
self.constrain(regexp, transformations.negative_logexp())
def constrain_positive(self, regexp):
""" Set positive constraints. """
self.constrain(regexp, transformations.logexp())
def constrain_bounded(self, regexp, lower, upper):
""" Set bounded constraints. """
self.constrain(regexp, transformations.logistic(lower, upper))
def all_constrained_indices(self):
if len(self.constrained_indices) or len(self.fixed_indices):
return np.hstack(self.constrained_indices + self.fixed_indices)
else:
return np.empty(shape=(0,))
def constrain(self, regexp, transform):
assert isinstance(transform, transformations.transformation)
matches = self.grep_param_names(regexp)
overlap = set(matches).intersection(set(self.all_constrained_indices()))
if overlap:
self.unconstrain(np.asarray(list(overlap)))
print 'Warning: re-constraining these parameters'
pn = self._get_param_names()
for i in overlap:
print pn[i]
self.constrained_indices.append(matches)
self.constraints.append(transform)
x = self._get_params()
x[matches] = transform.initialize(x[matches])
self._set_params(x)
def constrain_fixed(self, regexp, value=None):
"""
Arguments
---------
:param regexp: which parameters need to be fixed.
:type regexp: ndarray(dtype=int) or regular expression object or string
:param value: the vlaue to fix the parameters to. If the value is not specified,
the parameter is fixed to the current value
:type value: float
Notes
-----
Fixing a parameter which is tied to another, or constrained in some way will result in an error.
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
"""
matches = self.grep_param_names(regexp)
overlap = set(matches).intersection(set(self.all_constrained_indices()))
if overlap:
self.unconstrain(np.asarray(list(overlap)))
print 'Warning: re-constraining these parameters'
pn = self._get_param_names()
for i in overlap:
print pn[i]
self.fixed_indices.append(matches)
if value != None:
self.fixed_values.append(value)
else:
self.fixed_values.append(self._get_params()[self.fixed_indices[-1]])
# self.fixed_values.append(value)
self._set_params_transformed(self._get_params_transformed())
def _get_params_transformed(self):
"""use self._get_params to get the 'true' parameters of the model, which are then tied, constrained and fixed"""
x = self._get_params()
[np.put(x, i, t.finv(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
to_remove = self.fixed_indices + [t[1:] for t in self.tied_indices]
if len(to_remove):
return np.delete(x, np.hstack(to_remove))
else:
return x
def _set_params_transformed(self, x):
""" takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params"""
self._set_params(self._untransform_params(x))
def _untransform_params(self, x):
"""
The transformation required for _set_params_transformed.
This moves the vector x seen by the optimiser (unconstrained) to the
valid parameter vector seen by the model
Note:
- This function is separate from _set_params_transformed for downstream flexibility
"""
# work out how many places are fixed, and where they are. tricky logic!
fix_places = self.fixed_indices + [t[1:] for t in self.tied_indices]
if len(fix_places):
fix_places = np.hstack(fix_places)
Nfix_places = fix_places.size
else:
Nfix_places = 0
free_places = np.setdiff1d(np.arange(Nfix_places + x.size, dtype=np.int), fix_places)
# put the models values in the vector xx
xx = np.zeros(Nfix_places + free_places.size, dtype=np.float64)
xx[free_places] = x
[np.put(xx, i, v) for i, v in zip(self.fixed_indices, self.fixed_values)]
[np.put(xx, i, v) for i, v in [(t[1:], xx[t[0]]) for t in self.tied_indices] ]
[np.put(xx, i, t.f(xx[i])) for i, t in zip(self.constrained_indices, self.constraints)]
if hasattr(self, 'debug'):
stop # @UndefinedVariable
return xx
def _get_param_names_transformed(self):
"""
Returns the parameter names as propagated after constraining,
tying or fixing, i.e. a list of the same length as _get_params_transformed()
"""
n = self._get_param_names()
# remove/concatenate the tied parameter names
if len(self.tied_indices):
for t in self.tied_indices:
n[t[0]] = "<tie>".join([n[tt] for tt in t])
remove = np.hstack([t[1:] for t in self.tied_indices])
else:
remove = np.empty(shape=(0,), dtype=np.int)
# also remove the fixed params
if len(self.fixed_indices):
remove = np.hstack((remove, np.hstack(self.fixed_indices)))
# add markers to show that some variables are constrained
for i, t in zip(self.constrained_indices, self.constraints):
for ii in i:
n[ii] = n[ii] + t.__str__()
n = [nn for i, nn in enumerate(n) if not i in remove]
return n
@property
def all(self):
return self.__str__(self._get_param_names())
def __str__(self, names=None, nw=30):
"""
Return a string describing the parameter names and their ties and constraints
"""
if names is None:
names = self._get_print_names()
name_indices = self.grep_param_names("|".join(names))
N = len(names)
if not N:
return "This object has no free parameters."
header = ['Name', 'Value', 'Constraints', 'Ties']
values = self._get_params()[name_indices] # map(str,self._get_params())
# sort out the constraints
constraints = [''] * len(self._get_param_names())
for i, t in zip(self.constrained_indices, self.constraints):
for ii in i:
constraints[ii] = t.__str__()
for i in self.fixed_indices:
for ii in i:
constraints[ii] = 'Fixed'
# sort out the ties
ties = [''] * len(names)
for i, tie in enumerate(self.tied_indices):
for j in tie:
ties[j] = '(' + str(i) + ')'
values = ['%.4f' % float(v) for v in values]
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])])
max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])])
cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
# columns = cols.sum()
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-' * len(header_string[0])
param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n=names[i], v=values[i], c=constraints[i], t=ties[i], c0=cols[0], c1=cols[1], c2=cols[2], c3=cols[3]) for i in range(len(values))]
return ('\n'.join([header_string[0], separator] + param_string)) + '\n'

217
GPy/core/priors.py Normal file
View file

@ -0,0 +1,217 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from scipy.special import gammaln, digamma
from ..util.linalg import pdinv
from GPy.core.domains import REAL, POSITIVE
import warnings
class Prior:
domain = None
def pdf(self, x):
return np.exp(self.lnpdf(x))
def plot(self):
rvs = self.rvs(1000)
pb.hist(rvs, 100, normed=True)
xmin, xmax = pb.xlim()
xx = np.linspace(xmin, xmax, 1000)
pb.plot(xx, self.pdf(xx), 'r', linewidth=2)
class Gaussian(Prior):
"""
Implementation of the univariate Gaussian probability function, coupled with random variables.
:param mu: mean
:param sigma: standard deviation
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = REAL
def __init__(self, mu, sigma):
self.mu = float(mu)
self.sigma = float(sigma)
self.sigma2 = np.square(self.sigma)
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
def __str__(self):
return "N(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'
def lnpdf(self, x):
return self.constant - 0.5 * np.square(x - self.mu) / self.sigma2
def lnpdf_grad(self, x):
return -(x - self.mu) / self.sigma2
def rvs(self, n):
return np.random.randn(n) * self.sigma + self.mu
class LogGaussian(Prior):
"""
Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.
:param mu: mean
:param sigma: standard deviation
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = POSITIVE
def __init__(self, mu, sigma):
self.mu = float(mu)
self.sigma = float(sigma)
self.sigma2 = np.square(self.sigma)
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
def __str__(self):
return "lnN(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'
def lnpdf(self, x):
return self.constant - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 - np.log(x)
def lnpdf_grad(self, x):
return -((np.log(x) - self.mu) / self.sigma2 + 1.) / x
def rvs(self, n):
return np.exp(np.random.randn(n) * self.sigma + self.mu)
class MultivariateGaussian:
"""
Implementation of the multivariate Gaussian probability function, coupled with random variables.
:param mu: mean (N-dimensional array)
:param var: covariance matrix (NxN)
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = REAL
def __init__(self, mu, var):
self.mu = np.array(mu).flatten()
self.var = np.array(var)
assert len(self.var.shape) == 2
assert self.var.shape[0] == self.var.shape[1]
assert self.var.shape[0] == self.mu.size
self.input_dim = self.mu.size
self.inv, self.hld = pdinv(self.var)
self.constant = -0.5 * self.input_dim * np.log(2 * np.pi) - self.hld
def summary(self):
raise NotImplementedError
def pdf(self, x):
return np.exp(self.lnpdf(x))
def lnpdf(self, x):
d = x - self.mu
return self.constant - 0.5 * np.sum(d * np.dot(d, self.inv), 1)
def lnpdf_grad(self, x):
d = x - self.mu
return -np.dot(self.inv, d)
def rvs(self, n):
return np.random.multivariate_normal(self.mu, self.var, n)
def plot(self):
if self.input_dim == 2:
rvs = self.rvs(200)
pb.plot(rvs[:, 0], rvs[:, 1], 'kx', mew=1.5)
xmin, xmax = pb.xlim()
ymin, ymax = pb.ylim()
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
xflat = np.vstack((xx.flatten(), yy.flatten())).T
zz = self.pdf(xflat).reshape(100, 100)
pb.contour(xx, yy, zz, linewidths=2)
def gamma_from_EV(E, V):
warnings.warn("use Gamma.from_EV to create Gamma Prior", FutureWarning)
return Gamma.from_EV(E, V)
class Gamma(Prior):
"""
Implementation of the Gamma probability function, coupled with random variables.
:param a: shape parameter
:param b: rate parameter (warning: it's the *inverse* of the scale)
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = POSITIVE
def __init__(self, a, b):
self.a = float(a)
self.b = float(b)
self.constant = -gammaln(self.a) + a * np.log(b)
def __str__(self):
return "Ga(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'
def summary(self):
ret = {"E[x]": self.a / self.b, \
"E[ln x]": digamma(self.a) - np.log(self.b), \
"var[x]": self.a / self.b / self.b, \
"Entropy": gammaln(self.a) - (self.a - 1.) * digamma(self.a) - np.log(self.b) + self.a}
if self.a > 1:
ret['Mode'] = (self.a - 1.) / self.b
else:
ret['mode'] = np.nan
return ret
def lnpdf(self, x):
return self.constant + (self.a - 1) * np.log(x) - self.b * x
def lnpdf_grad(self, x):
return (self.a - 1.) / x - self.b
def rvs(self, n):
return np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
@staticmethod
def from_EV(E, V):
"""
Creates an instance of a Gamma Prior by specifying the Expected value(s)
and Variance(s) of the distribution.
:param E: expected value
:param V: variance
"""
a = np.square(E) / V
b = E / V
return Gamma(a, b)
class inverse_gamma(Prior):
"""
Implementation of the inverse-Gamma probability function, coupled with random variables.
:param a: shape parameter
:param b: rate parameter (warning: it's the *inverse* of the scale)
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = POSITIVE
def __init__(self, a, b):
self.a = float(a)
self.b = float(b)
self.constant = -gammaln(self.a) + a * np.log(b)
def __str__(self):
return "iGa(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'
def lnpdf(self, x):
return self.constant - (self.a + 1) * np.log(x) - self.b / x
def lnpdf_grad(self, x):
return -(self.a + 1.) / x + self.b / x ** 2
def rvs(self, n):
return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)

347
GPy/core/sparse_gp.py Normal file
View file

@ -0,0 +1,347 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri
from scipy import linalg
from ..likelihoods import Gaussian
from gp_base import GPBase
class SparseGP(GPBase):
"""
Variational sparse GP model
:param X: inputs
:type X: np.ndarray (num_data x input_dim)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
:param kernel : the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance
:param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (num_data x input_dim) | None
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None
:param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type num_inducing: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self.Z = Z
self.num_inducing = Z.shape[0]
# self.likelihood = likelihood
if X_variance is None:
self.has_uncertain_inputs = False
self.X_variance = None
else:
assert X_variance.shape == X.shape
self.has_uncertain_inputs = True
self.X_variance = X_variance
if normalize_X:
self.Z = (self.Z.copy() - self._Xoffset) / self._Xscale
# normalize X uncertainty also
if self.has_uncertain_inputs:
self.X_variance /= np.square(self._Xscale)
self._const_jitter = None
def getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return GPBase.getstate(self) + [self.Z,
self.num_inducing,
self.has_uncertain_inputs,
self.X_variance]
def setstate(self, state):
self.X_variance = state.pop()
self.has_uncertain_inputs = state.pop()
self.num_inducing = state.pop()
self.Z = state.pop()
GPBase.setstate(self, state)
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance)
self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
else:
self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.X, self.Z)
self.psi2 = None
def _computations(self):
if self._const_jitter is None or not(self._const_jitter.shape[0] == self.num_inducing):
self._const_jitter = np.eye(self.num_inducing) * 1e-7
# factor Kmm
self._Lm = jitchol(self.Kmm + self._const_jitter)
# TODO: no white kernel needed anymore, all noise in likelihood --------
# The rather complex computations of self._A
if self.has_uncertain_inputs:
if self.likelihood.is_heteroscedastic:
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.num_data, 1, 1))).sum(0)
else:
psi2_beta = self.psi2.sum(0) * self.likelihood.precision
evals, evecs = linalg.eigh(psi2_beta)
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
if not np.array_equal(evals, clipped_evals):
pass # print evals
tmp = evecs * np.sqrt(clipped_evals)
tmp = tmp.T
else:
if self.likelihood.is_heteroscedastic:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(self.num_data, 1)))
else:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1)
self._A = tdot(tmp)
# factor B
self.B = np.eye(self.num_inducing) + self._A
self.LB = jitchol(self.B)
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
self.psi1Vf = np.dot(self.psi1.T, self.likelihood.VVT_factor)
# back substutue C into psi1Vf
tmp, info1 = dtrtrs(self._Lm, np.asfortranarray(self.psi1Vf), lower=1, trans=0)
self._LBi_Lmi_psi1Vf, _ = dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
# tmp, info2 = dpotrs(self.LB, tmp, lower=1)
tmp, info2 = dtrtrs(self.LB, self._LBi_Lmi_psi1Vf, lower=1, trans=1)
self.Cpsi1Vf, info3 = dtrtrs(self._Lm, tmp, lower=1, trans=1)
# Compute dL_dKmm
tmp = tdot(self._LBi_Lmi_psi1Vf)
self.data_fit = np.trace(tmp)
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.output_dim * np.eye(self.num_inducing) + tmp)
tmp = -0.5 * self.DBi_plus_BiPBi
tmp += -0.5 * self.B * self.output_dim
tmp += self.output_dim * np.eye(self.num_inducing)
self.dL_dKmm = backsub_both_sides(self._Lm, tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten()
self.dL_dpsi1 = np.dot(self.likelihood.VVT_factor, self.Cpsi1Vf.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (self.psi1 * self.likelihood.precision.reshape(self.num_data, 1)).T).T
self.dL_dpsi2 = None
else:
dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
if self.has_uncertain_inputs:
# repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.num_data, axis=0)
else:
# subsume back into psi1 (==Kmn)
self.dL_dpsi1 += 2.*np.dot(self.psi1, dL_dpsi2)
self.dL_dpsi2 = None
# the partial derivative vector for the likelihood
if self.likelihood.Nparams == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit)
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
if self.likelihood.is_heteroscedastic:
A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self._A))
else:
A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self._A))
C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
D = 0.5 * self.data_fit
return A + B + C + D + self.likelihood.Z
def _set_params(self, p):
self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.num_params])
self.likelihood._set_params(p[self.Z.size + self.kern.num_params:])
self._compute_kernel_matrices()
self._computations()
self.Cpsi1V = None
def _get_params(self):
return np.hstack([self.Z.flatten(), self.kern._get_params_transformed(), self.likelihood._get_params()])
def _get_param_names(self):
return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], [])\
+ self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def _get_print_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian likelihood, no iteration is required:
this function does nothing
"""
if not isinstance(self.likelihood, Gaussian): # Updates not needed for Gaussian likelihood
self.likelihood.restart()
if self.has_uncertain_inputs:
Lmi = chol_inv(self._Lm)
Kmmi = tdot(Lmi.T)
diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)])
self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
# raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_DTC(self.Kmm, self.psi1.T)
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X, self.Z)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta
def dL_dZ(self):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dZ += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X)
return dL_dZ
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
"""
Internal helper function for making predictions, does not account for
normalization or likelihood function
"""
Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi)
if self.Cpsi1V is None:
psi1V = np.dot(self.psi1.T, self.likelihood.V)
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
tmp, _ = dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1)
if X_variance_new is None:
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.Cpsi1V)
if full_cov:
Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
else:
# assert which_p.Tarts=='all', "swithching out parts of variational kernels is not implemented"
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
raise NotImplementedError, "TODO"
else:
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var[:, None]
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
"""
Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param X_variance_new: The uncertainty in the prediction points
:type X_variance_new: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions.
"""
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
if X_variance_new is not None:
X_variance_new = X_variance_new / self._Xscale ** 2
# here's the actual prediction by the GP model
mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
return mean, var, _025pm, _975pm
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if which_data is 'all':
which_data = slice(None)
GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax)
# add the inducing inputs and some errorbars
if self.X.shape[1] == 1:
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 2:
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')

518
GPy/core/svigp.py Normal file
View file

@ -0,0 +1,518 @@
# Copyright (c) 2012, James Hensman and Nicolo' Fusi
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from .. import kern
from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs, jitchol, backsub_both_sides
from ..likelihoods import EP
from gp_base import GPBase
from model import Model
import time
import sys
class SVIGP(GPBase):
"""
Stochastic Variational inference in a Gaussian Process
:param X: inputs
:type X: np.ndarray (N x Q)
:param Y: observed data
:type Y: np.ndarray of observations (N x D)
:param batchsize: the size of a h
Additional kwargs are used as for a sparse GP. They include
:param q_u: canonical parameters of the distribution squasehd into a 1D array
:type q_u: np.ndarray
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param kernel : the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param beta: noise precision. TODO> ignore beta if doing EP
:type beta: float
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, likelihood, kernel, Z, q_u=None, batchsize=10, X_variance=None):
GPBase.__init__(self, X, likelihood, kernel, normalize_X=False)
self.batchsize=batchsize
self.Y = self.likelihood.Y.copy()
self.Z = Z
self.num_inducing = Z.shape[0]
self.batchcounter = 0
self.epochs = 0
self.iterations = 0
self.vb_steplength = 0.05
self.param_steplength = 1e-5
self.momentum = 0.9
if X_variance is None:
self.has_uncertain_inputs = False
else:
self.has_uncertain_inputs = True
self.X_variance = X_variance
if q_u is None:
q_u = np.hstack((np.random.randn(self.num_inducing*self.output_dim),-.5*np.eye(self.num_inducing).flatten()))
self.set_vb_param(q_u)
self._permutation = np.random.permutation(self.num_data)
self.load_batch()
self._param_trace = []
self._ll_trace = []
self._grad_trace = []
#set the adaptive steplength parameters
self.hbar_t = 0.0
self.tau_t = 100.0
self.gbar_t = 0.0
self.gbar_t1 = 0.0
self.gbar_t2 = 0.0
self.hbar_tp = 0.0
self.tau_tp = 10000.0
self.gbar_tp = 0.0
self.adapt_param_steplength = True
self.adapt_vb_steplength = True
self._param_steplength_trace = []
self._vb_steplength_trace = []
def getstate(self):
steplength_params = [self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength]
return GPBase.getstate(self) + \
[self.get_vb_param(),
self.Z,
self.num_inducing,
self.has_uncertain_inputs,
self.X_variance,
self.X_batch,
self.X_variance_batch,
steplength_params,
self.batchcounter,
self.batchsize,
self.epochs,
self.momentum,
self.data_prop,
self._param_trace,
self._param_steplength_trace,
self._vb_steplength_trace,
self._ll_trace,
self._grad_trace,
self.Y,
self._permutation,
self.iterations
]
def setstate(self, state):
self.iterations = state.pop()
self._permutation = state.pop()
self.Y = state.pop()
self._grad_trace = state.pop()
self._ll_trace = state.pop()
self._vb_steplength_trace = state.pop()
self._param_steplength_trace = state.pop()
self._param_trace = state.pop()
self.data_prop = state.pop()
self.momentum = state.pop()
self.epochs = state.pop()
self.batchsize = state.pop()
self.batchcounter = state.pop()
steplength_params = state.pop()
(self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength) = steplength_params
self.X_variance_batch = state.pop()
self.X_batch = state.pop()
self.X_variance = state.pop()
self.has_uncertain_inputs = state.pop()
self.num_inducing = state.pop()
self.Z = state.pop()
vb_param = state.pop()
GPBase.setstate(self, state)
self.set_vb_param(vb_param)
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z, self.X_batch, self.X_variance_batch)
self.psi1 = self.kern.psi1(self.Z, self.X_batch, self.X_variance_batch)
self.psi2 = self.kern.psi2(self.Z, self.X_batch, self.X_variance_batch)
else:
self.psi0 = self.kern.Kdiag(self.X_batch)
self.psi1 = self.kern.K(self.X_batch, self.Z)
self.psi2 = None
def dL_dtheta(self):
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch)
else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X_batch, self.Z)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch)
return dL_dtheta
def _set_params(self, p, computations=True):
self.kern._set_params_transformed(p[:self.kern.num_params])
self.likelihood._set_params(p[self.kern.num_params:])
if computations:
self._compute_kernel_matrices()
self._computations()
def _get_params(self):
return np.hstack((self.kern._get_params_transformed() , self.likelihood._get_params()))
def _get_param_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def load_batch(self):
"""
load a batch of data (set self.X_batch and self.likelihood.Y from self.X, self.Y)
"""
#if we've seen all the data, start again with them in a new random order
if self.batchcounter+self.batchsize > self.num_data:
self.batchcounter = 0
self.epochs += 1
self._permutation = np.random.permutation(self.num_data)
this_perm = self._permutation[self.batchcounter:self.batchcounter+self.batchsize]
self.X_batch = self.X[this_perm]
self.likelihood.set_data(self.Y[this_perm])
if self.has_uncertain_inputs:
self.X_variance_batch = self.X_variance[this_perm]
self.batchcounter += self.batchsize
self.data_prop = float(self.batchsize)/self.num_data
self._compute_kernel_matrices()
self._computations()
def _computations(self,do_Kmm=True, do_Kmm_grad=True):
"""
All of the computations needed. Some are optional, see kwargs.
"""
if do_Kmm:
self.Lm = jitchol(self.Kmm)
# The rather complex computations of self.A
if self.has_uncertain_inputs:
if self.likelihood.is_heteroscedastic:
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.batchsize, 1, 1))).sum(0)
else:
psi2_beta = self.psi2.sum(0) * self.likelihood.precision
evals, evecs = np.linalg.eigh(psi2_beta)
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
tmp = evecs * np.sqrt(clipped_evals)
else:
if self.likelihood.is_heteroscedastic:
tmp = self.psi1.T * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.batchsize)))
else:
tmp = self.psi1.T * (np.sqrt(self.likelihood.precision))
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
self.V = self.likelihood.precision*self.likelihood.Y
self.VmT = np.dot(self.V,self.q_u_expectation[0].T)
self.psi1V = np.dot(self.psi1.T, self.V)
self.B = np.eye(self.num_inducing)*self.data_prop + self.A
self.Lambda = backsub_both_sides(self.Lm, self.B.T)
self.LQL = backsub_both_sides(self.Lm,self.q_u_expectation[1].T,transpose='right')
self.trace_K = self.psi0.sum() - np.trace(self.A)/self.likelihood.precision
self.Kmmi_m, _ = dpotrs(self.Lm, self.q_u_expectation[0], lower=1)
self.projected_mean = np.dot(self.psi1,self.Kmmi_m)
# Compute dL_dpsi
self.dL_dpsi0 = - 0.5 * self.output_dim * self.likelihood.precision * np.ones(self.batchsize)
self.dL_dpsi1, _ = dpotrs(self.Lm,np.asfortranarray(self.VmT.T),lower=1)
self.dL_dpsi1 = self.dL_dpsi1.T
dL_dpsi2 = -0.5 * self.likelihood.precision * backsub_both_sides(self.Lm, self.LQL - self.output_dim * np.eye(self.num_inducing))
if self.has_uncertain_inputs:
self.dL_dpsi2 = np.repeat(dL_dpsi2[None,:,:],self.batchsize,axis=0)
else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2,self.psi1.T).T
self.dL_dpsi2 = None
# Compute dL_dKmm
if do_Kmm_grad:
tmp = np.dot(self.LQL,self.A) - backsub_both_sides(self.Lm,np.dot(self.q_u_expectation[0],self.psi1V.T),transpose='right')
tmp += tmp.T
tmp += -self.output_dim*self.B
tmp += self.data_prop*self.LQL
self.dL_dKmm = 0.5*backsub_both_sides(self.Lm,tmp)
#Compute the gradient of the log likelihood wrt noise variance
self.partial_for_likelihood = -0.5*(self.batchsize*self.output_dim - np.sum(self.A*self.LQL))*self.likelihood.precision
self.partial_for_likelihood += (0.5*self.output_dim*self.trace_K + 0.5 * self.likelihood.trYYT - np.sum(self.likelihood.Y*self.projected_mean))*self.likelihood.precision**2
def log_likelihood(self):
"""
As for uncollapsed sparse GP, but account for the proportion of data we're looking at right now.
NB. self.batchsize is the size of the batch, not the size of X_all
"""
assert not self.likelihood.is_heteroscedastic
A = -0.5*self.batchsize*self.output_dim*(np.log(2.*np.pi) - np.log(self.likelihood.precision))
B = -0.5*self.likelihood.precision*self.output_dim*self.trace_K
Kmm_logdet = 2.*np.sum(np.log(np.diag(self.Lm)))
C = -0.5*self.output_dim*self.data_prop*(Kmm_logdet-self.q_u_logdet - self.num_inducing)
C += -0.5*np.sum(self.LQL * self.B)
D = -0.5*self.likelihood.precision*self.likelihood.trYYT
E = np.sum(self.V*self.projected_mean)
return (A+B+C+D+E)/self.data_prop
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))/self.data_prop
def vb_grad_natgrad(self):
"""
Compute the gradients of the lower bound wrt the canonical and
Expectation parameters of u.
Note that the natural gradient in either is given by the gradient in the other (See Hensman et al 2012 Fast Variational inference in the conjugate exponential Family)
"""
# Gradient for eta
dL_dmmT_S = -0.5*self.Lambda/self.data_prop + 0.5*self.q_u_prec
Kmmipsi1V,_ = dpotrs(self.Lm,self.psi1V,lower=1)
dL_dm = (Kmmipsi1V - np.dot(self.Lambda,self.q_u_mean))/self.data_prop
# Gradients for theta
S = self.q_u_cov
Si = self.q_u_prec
m = self.q_u_mean
dL_dSi = -mdot(S,dL_dmmT_S, S)
dL_dmhSi = -2*dL_dSi
dL_dSim = np.dot(dL_dSi,m) + np.dot(Si, dL_dm)
return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) , np.hstack((dL_dSim.flatten(), dL_dmhSi.flatten()))
def optimize(self, iterations, print_interval=10, callback=lambda:None, callback_interval=5):
param_step = 0.
#Iterate!
for i in range(iterations):
#store the current configuration for plotting later
self._param_trace.append(self._get_params())
self._ll_trace.append(self.log_likelihood() + self.log_prior())
#load a batch
self.load_batch()
#compute the (stochastic) gradient
natgrads = self.vb_grad_natgrad()
grads = self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
self._grad_trace.append(grads)
#compute the steps in all parameters
vb_step = self.vb_steplength*natgrads[0]
if (self.epochs>=1):#only move the parameters after the first epoch
param_step = self.momentum*param_step + self.param_steplength*grads
else:
param_step = 0.
self.set_vb_param(self.get_vb_param() + vb_step)
#Note: don't recompute everything here, wait until the next iteration when we have a new batch
self._set_params(self._untransform_params(self._get_params_transformed() + param_step), computations=False)
#print messages if desired
if i and (not i%print_interval):
print i, np.mean(self._ll_trace[-print_interval:]) #, self.log_likelihood()
print np.round(np.mean(self._grad_trace[-print_interval:],0),3)
sys.stdout.flush()
#callback
if i and not i%callback_interval:
callback()
time.sleep(0.1)
if self.epochs > 10:
self._adapt_steplength()
self.iterations += 1
def _adapt_steplength(self):
if self.adapt_vb_steplength:
# self._adaptive_vb_steplength()
self._adaptive_vb_steplength_KL()
self._vb_steplength_trace.append(self.vb_steplength)
assert self.vb_steplength > 0
if self.adapt_param_steplength:
# self._adaptive_param_steplength()
# self._adaptive_param_steplength_log()
self._adaptive_param_steplength_from_vb()
self._param_steplength_trace.append(self.param_steplength)
def _adaptive_param_steplength(self):
decr_factor = 0.1
g_tp = self._transform_gradients(self._log_likelihood_gradients())
self.gbar_tp = (1-1/self.tau_tp)*self.gbar_tp + 1/self.tau_tp * g_tp
self.hbar_tp = (1-1/self.tau_tp)*self.hbar_tp + 1/self.tau_tp * np.dot(g_tp.T, g_tp)
new_param_steplength = np.dot(self.gbar_tp.T, self.gbar_tp) / self.hbar_tp
#- hack
new_param_steplength *= decr_factor
self.param_steplength = (self.param_steplength + new_param_steplength)/2
#-
self.tau_tp = self.tau_tp*(1-self.param_steplength) + 1
def _adaptive_param_steplength_log(self):
stp = np.logspace(np.log(0.0001), np.log(1e-6), base=np.e, num=18000)
self.param_steplength = stp[self.iterations]
def _adaptive_param_steplength_log2(self):
self.param_steplength = (self.iterations + 0.001)**-0.5
def _adaptive_param_steplength_from_vb(self):
self.param_steplength = self.vb_steplength * 0.01
def _adaptive_vb_steplength(self):
decr_factor = 0.1
g_t = self.vb_grad_natgrad()[0]
self.gbar_t = (1-1/self.tau_t)*self.gbar_t + 1/self.tau_t * g_t
self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t.T, g_t)
new_vb_steplength = np.dot(self.gbar_t.T, self.gbar_t) / self.hbar_t
#- hack
new_vb_steplength *= decr_factor
self.vb_steplength = (self.vb_steplength + new_vb_steplength)/2
#-
self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
def _adaptive_vb_steplength_KL(self):
decr_factor = 1 #0.1
natgrad = self.vb_grad_natgrad()
g_t1 = natgrad[0]
g_t2 = natgrad[1]
self.gbar_t1 = (1-1/self.tau_t)*self.gbar_t1 + 1/self.tau_t * g_t1
self.gbar_t2 = (1-1/self.tau_t)*self.gbar_t2 + 1/self.tau_t * g_t2
self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t1.T, g_t2)
self.vb_steplength = np.dot(self.gbar_t1.T, self.gbar_t2) / self.hbar_t
self.vb_steplength *= decr_factor
self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
def _raw_predict(self, X_new, X_variance_new=None, which_parts='all',full_cov=False):
"""Internal helper function for making predictions, does not account for normalization"""
#TODO: make this more efficient!
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
if X_variance_new is None:
Kx = self.kern.K(X_new,self.Z)
mu = np.dot(Kx,self.Kmmi_m)
if full_cov:
Kxx = self.kern.K(X_new)
var = Kxx - mdot(Kx,tmp,Kx.T)
else:
Kxx = self.kern.Kdiag(X_new)
var = (Kxx - np.sum(Kx*np.dot(Kx,tmp),1))[:,None]
return mu, var
else:
assert X_variance_new.shape == X_new.shape
Kx = self.kern.psi1(self.Z,X_new, X_variance_new)
mu = np.dot(Kx,self.Kmmi_m)
Kxx = self.kern.psi0(self.Z,X_new,X_variance_new)
psi2 = self.kern.psi2(self.Z,X_new,X_variance_new)
diag_var = Kxx - np.sum(np.sum(psi2*tmp[None,:,:],1),1)
if full_cov:
raise NotImplementedError
else:
return mu, diag_var[:,None]
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
if X_variance_new is not None:
X_variance_new = X_variance_new / self._Xscale ** 2
# here's the actual prediction by the GP model
mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
return mean, var, _025pm, _975pm
def set_vb_param(self,vb_param):
"""set the distribution q(u) from the canonical parameters"""
self.q_u_canonical_flat = vb_param.copy()
self.q_u_canonical = self.q_u_canonical_flat[:self.num_inducing*self.output_dim].reshape(self.num_inducing,self.output_dim),self.q_u_canonical_flat[self.num_inducing*self.output_dim:].reshape(self.num_inducing,self.num_inducing)
self.q_u_prec = -2.*self.q_u_canonical[1]
self.q_u_cov, q_u_Li, q_u_L, tmp = pdinv(self.q_u_prec)
self.q_u_Li = q_u_Li
self.q_u_logdet = -tmp
self.q_u_mean, _ = dpotrs(q_u_Li, np.asfortranarray(self.q_u_canonical[0]),lower=1)
self.q_u_expectation = (self.q_u_mean, np.dot(self.q_u_mean,self.q_u_mean.T)+self.q_u_cov*self.output_dim)
def get_vb_param(self):
"""
Return the canonical parameters of the distribution q(u)
"""
return self.q_u_canonical_flat
def plot(self, ax=None, fignum=None, Z_height=None, **kwargs):
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
#horrible hack here:
data = self.likelihood.data.copy()
self.likelihood.data = self.Y
GPBase.plot(self, ax=ax, **kwargs)
self.likelihood.data = data
Zu = self.Z * self._Xscale + self._Xoffset
if self.input_dim==1:
ax.plot(self.X_batch, self.likelihood.data, 'gx',mew=2)
if Z_height is None:
Z_height = ax.get_ylim()[0]
ax.plot(Zu, np.zeros_like(Zu) + Z_height, 'r|', mew=1.5, markersize=12)
if self.input_dim==2:
ax.scatter(self.X[:,0], self.X[:,1], 20., self.Y[:,0], linewidth=0, cmap=pb.cm.jet)
ax.plot(Zu[:,0], Zu[:,1], 'w^')
def plot_traces(self):
pb.figure()
t = np.array(self._param_trace)
pb.subplot(2,1,1)
for l,ti in zip(self._get_param_names(),t.T):
if not l[:3]=='iip':
pb.plot(ti,label=l)
pb.legend(loc=0)
pb.subplot(2,1,2)
pb.plot(np.asarray(self._ll_trace),label='stochastic likelihood')
pb.legend(loc=0)

144
GPy/core/transformations.py Normal file
View file

@ -0,0 +1,144 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from GPy.core.domains import POSITIVE, NEGATIVE, BOUNDED
import sys
lim_val = -np.log(sys.float_info.epsilon)
class transformation(object):
domain = None
def f(self, x):
raise NotImplementedError
def finv(self, x):
raise NotImplementedError
def gradfactor(self, f):
""" df_dx evaluated at self.f(x)=f"""
raise NotImplementedError
def initialize(self, f):
""" produce a sensible initial value for f(x)"""
raise NotImplementedError
def __str__(self):
raise NotImplementedError
class logexp(transformation):
domain = POSITIVE
def f(self, x):
return np.where(x>lim_val, x, np.log(1. + np.exp(x)))
def finv(self, f):
return np.where(f>lim_val, f, np.log(np.exp(f) - 1.))
def gradfactor(self, f):
return np.where(f>lim_val, 1., 1 - np.exp(-f))
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '(+ve)'
class negative_logexp(transformation):
domain = NEGATIVE
def f(self, x):
return -logexp.f(x) #np.log(1. + np.exp(x))
def finv(self, f):
return logexp.finv(-f) #np.log(np.exp(-f) - 1.)
def gradfactor(self, f):
return -logexp.gradfactor(-f)
#ef = np.exp(-f)
#return -(ef - 1.) / ef
def initialize(self, f):
return -logexp.initialize(f) #np.abs(f)
def __str__(self):
return '(-ve)'
class logexp_clipped(logexp):
max_bound = 1e100
min_bound = 1e-10
log_max_bound = np.log(max_bound)
log_min_bound = np.log(min_bound)
domain = POSITIVE
def __init__(self, lower=1e-6):
self.lower = lower
def f(self, x):
exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
f = np.log(1. + exp)
# if np.isnan(f).any():
# import ipdb;ipdb.set_trace()
return np.clip(f, self.min_bound, self.max_bound)
def finv(self, f):
return np.log(np.exp(f - 1.))
def gradfactor(self, f):
ef = np.exp(f) # np.clip(f, self.min_bound, self.max_bound))
gf = (ef - 1.) / ef
return gf # np.where(f < self.lower, 0, gf)
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '(+ve_c)'
class exponent(transformation):
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative exponent below. See old MATLAB code.
domain = POSITIVE
def f(self, x):
return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val))
def finv(self, x):
return np.log(x)
def gradfactor(self, f):
return f
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '(+ve)'
class negative_exponent(exponent):
domain = NEGATIVE
def f(self, x):
return -exponent.f(x)
def finv(self, f):
return exponent.finv(-f)
def gradfactor(self, f):
return f
def initialize(self, f):
return -exponent.initialize(f) #np.abs(f)
def __str__(self):
return '(-ve)'
class square(transformation):
domain = POSITIVE
def f(self, x):
return x ** 2
def finv(self, x):
return np.sqrt(x)
def gradfactor(self, f):
return 2 * np.sqrt(f)
def initialize(self, f):
return np.abs(f)
def __str__(self):
return '(+sq)'
class logistic(transformation):
domain = BOUNDED
def __init__(self, lower, upper):
assert lower < upper
self.lower, self.upper = float(lower), float(upper)
self.difference = self.upper - self.lower
def f(self, x):
return self.lower + self.difference / (1. + np.exp(-x))
def finv(self, f):
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
def gradfactor(self, f):
return (f - self.lower) * (self.upper - f) / self.difference
def initialize(self, f):
if np.any(np.logical_or(f < self.lower, f > self.upper)):
print "Warning: changing parameters to satisfy constraints"
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
def __str__(self):
return '({},{})'.format(self.lower, self.upper)

8
GPy/examples/__init__.py Normal file
View file

@ -0,0 +1,8 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import classification
import regression
import dimensionality_reduction
import tutorials
import stochastic

View file

@ -0,0 +1,168 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Gaussian Processes classification
"""
import pylab as pb
import numpy as np
import GPy
default_seed = 10000
def crescent_data(seed=default_seed, kernel=None): # FIXME
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation.
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1] = 0
m = GPy.models.GPClassification(data['X'], Y)
#m.update_likelihood_approximation()
#m.optimize()
m.pseudo_EM()
print(m)
m.plot()
return m
def oil(num_inducing=50, max_iters=100, kernel=None):
"""
Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
"""
data = GPy.util.datasets.oil()
X = data['X']
Xtest = data['Xtest']
Y = data['Y'][:, 0:1]
Ytest = data['Ytest'][:, 0:1]
Y[Y.flatten()==-1] = 0
Ytest[Ytest.flatten()==-1] = 0
# Create GP model
m = GPy.models.SparseGPClassification(X, Y,kernel=kernel,num_inducing=num_inducing)
# Contrain all parameters to be positive
m.tie_params('.*len')
m['.*len'] = 10.
m.update_likelihood_approximation()
# Optimize
m.optimize(max_iters=max_iters)
print(m)
#Test
probs = m.predict(Xtest)[0]
GPy.util.classification.conf_matrix(probs,Ytest)
return m
def toy_linear_1d_classification(seed=default_seed):
"""
Simple 1D classification example
:param seed : seed value for data generation (default is 4).
:type seed: int
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
# Model definition
m = GPy.models.GPClassification(data['X'], Y)
# Optimize
#m.update_likelihood_approximation()
# Parameters optimization:
#m.optimize()
m.pseudo_EM()
# Plot
fig, axes = pb.subplots(2,1)
m.plot_f(ax=axes[0])
m.plot(ax=axes[1])
print(m)
return m
def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
"""
Sparse 1D classification example
:param seed : seed value for data generation (default is 4).
:type seed: int
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
# Model definition
m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing)
m['.*len']= 4.
# Optimize
#m.update_likelihood_approximation()
# Parameters optimization:
#m.optimize()
m.pseudo_EM()
# Plot
fig, axes = pb.subplots(2,1)
m.plot_f(ax=axes[0])
m.plot(ax=axes[1])
print(m)
return m
def sparse_crescent_data(num_inducing=10, seed=default_seed, kernel=None):
"""
Run a Gaussian process classification with DTC approxiamtion on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation.
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1]=0
m = GPy.models.SparseGPClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
m['.*len'] = 10.
#m.update_likelihood_approximation()
#m.optimize()
m.pseudo_EM()
print(m)
m.plot()
return m
def FITC_crescent_data(num_inducing=10, seed=default_seed):
"""
Run a Gaussian process classification with FITC approximation on the crescent data. The demonstration uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation.
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type num_inducing: int
"""
data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1]=0
m = GPy.models.FITCClassification(data['X'], Y,num_inducing=num_inducing)
m.constrain_bounded('.*len',1.,1e3)
m['.*len'] = 3.
#m.update_likelihood_approximation()
#m.optimize()
m.pseudo_EM()
print(m)
m.plot()
return m

View file

@ -0,0 +1,492 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from matplotlib import pyplot as plt, cm
import GPy
from GPy.core.transformations import logexp
from GPy.models.bayesian_gplvm import BayesianGPLVM
from GPy.likelihoods.gaussian import Gaussian
default_seed = np.random.seed(123344)
def BGPLVM(seed=default_seed):
N = 5
num_inducing = 4
Q = 3
D = 2
# generate GPLVM-like data
X = np.random.rand(N, Q)
lengthscales = np.random.rand(Q)
k = (GPy.kern.rbf(Q, .5, lengthscales, ARD=True)
+ GPy.kern.white(Q, 0.01))
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N), K, D).T
lik = Gaussian(Y, normalize=True)
k = GPy.kern.rbf_inv(Q, .5, np.ones(Q) * 2., ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
m = GPy.models.BayesianGPLVM(lik, Q, kernel=k, num_inducing=num_inducing)
m.lengthscales = lengthscales
# m.constrain_positive('(rbf|bias|noise|white|S)')
# m.constrain_fixed('S', 1)
# pb.figure()
# m.plot()
# pb.title('PCA initialisation')
# pb.figure()
# m.optimize(messages = 1)
# m.plot()
# pb.title('After optimisation')
# m.randomize()
# m.checkgrad(verbose=1)
return m
def GPLVM_oil_100(optimize=True):
data = GPy.util.datasets.oil_100()
Y = data['X']
# create simple GP model
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
m = GPy.models.GPLVM(Y, 6, kernel=kernel)
m.data_labels = data['Y'].argmax(axis=1)
# optimize
if optimize:
m.optimize('scg', messages=1)
# plot
print(m)
m.plot_latent(labels=m.data_labels)
return m
def sparseGPLVM_oil(optimize=True, N=100, Q=6, num_inducing=15, max_iters=50):
np.random.seed(0)
data = GPy.util.datasets.oil()
Y = data['X'][:N]
Y = Y - Y.mean(0)
Y /= Y.std(0)
# create simple GP model
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q)
m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing)
m.data_labels = data['Y'].argmax(axis=1)
# optimize
if optimize:
m.optimize('scg', messages=1, max_iters=max_iters)
# plot
print(m)
# m.plot_latent(labels=m.data_labels)
return m
def swiss_roll(optimize=True, N=1000, num_inducing=15, Q=4, sigma=.2, plot=False):
from GPy.util.datasets import swiss_roll_generated
from GPy.core.transformations import logexp_clipped
data = swiss_roll_generated(N=N, sigma=sigma)
Y = data['Y']
Y -= Y.mean()
Y /= Y.std()
t = data['t']
c = data['colors']
try:
from sklearn.manifold.isomap import Isomap
iso = Isomap().fit(Y)
X = iso.embedding_
if Q > 2:
X = np.hstack((X, np.random.randn(N, Q - 2)))
except ImportError:
X = np.random.randn(N, Q)
if plot:
from mpl_toolkits import mplot3d
import pylab
fig = pylab.figure("Swiss Roll Data")
ax = fig.add_subplot(121, projection='3d')
ax.scatter(*Y.T, c=c)
ax.set_title("Swiss Roll")
ax = fig.add_subplot(122)
ax.scatter(*X.T[:2], c=c)
ax.set_title("Initialization")
var = .5
S = (var * np.ones_like(X) + np.clip(np.random.randn(N, Q) * var ** 2,
- (1 - var),
(1 - var))) + .001
Z = np.random.permutation(X)[:num_inducing]
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
m.data_colors = c
m.data_t = t
m['rbf_lengthscale'] = 1. # X.var(0).max() / X.var(0)
m['noise_variance'] = Y.var() / 100.
m['bias_variance'] = 0.05
if optimize:
m.optimize('scg', messages=1)
return m
def BGPLVM_oil(optimize=True, N=200, Q=7, num_inducing=40, max_iters=1000, plot=False, **k):
np.random.seed(0)
data = GPy.util.datasets.oil()
# create simple GP model
kernel = GPy.kern.rbf_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2))
Y = data['X'][:N]
Yn = Gaussian(Y, normalize=True)
# Yn = Y - Y.mean(0)
# Yn /= Yn.std(0)
m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k)
m.data_labels = data['Y'][:N].argmax(axis=1)
# m.constrain('variance|leng', logexp_clipped())
# m['.*lengt'] = m.X.var(0).max() / m.X.var(0)
m['noise'] = Yn.Y.var() / 100.
# optimize
if optimize:
m.constrain_fixed('noise')
m.optimize('scg', messages=1, max_iters=200, gtol=.05)
m.constrain_positive('noise')
m.constrain_bounded('white', 1e-7, 1)
m.optimize('scg', messages=1, max_iters=max_iters, gtol=.05)
if plot:
y = m.likelihood.Y[0, :]
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
plt.sca(latent_axes)
m.plot_latent()
data_show = GPy.util.visualize.vector_show(y)
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes)
raw_input('Press enter to finish')
plt.close(fig)
return m
def oil_100():
data = GPy.util.datasets.oil_100()
m = GPy.models.GPLVM(data['X'], 2)
# optimize
m.optimize(messages=1, max_iters=2)
# plot
print(m)
# m.plot_latent(labels=data['Y'].argmax(axis=1))
return m
def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
x = np.linspace(0, 4 * np.pi, N)[:, None]
s1 = np.vectorize(lambda x: np.sin(x))
s2 = np.vectorize(lambda x: np.cos(x))
s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
sS = np.vectorize(lambda x: np.sin(2 * x))
s1 = s1(x)
s2 = s2(x)
s3 = s3(x)
sS = sS(x)
S1 = np.hstack([s1, sS])
S2 = np.hstack([s2, s3, sS])
S3 = np.hstack([s3, sS])
Y1 = S1.dot(np.random.randn(S1.shape[1], D1))
Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
Y1 += .3 * np.random.randn(*Y1.shape)
Y2 += .2 * np.random.randn(*Y2.shape)
Y3 += .25 * np.random.randn(*Y3.shape)
Y1 -= Y1.mean(0)
Y2 -= Y2.mean(0)
Y3 -= Y3.mean(0)
Y1 /= Y1.std(0)
Y2 /= Y2.std(0)
Y3 /= Y3.std(0)
slist = [sS, s1, s2, s3]
slist_names = ["sS", "s1", "s2", "s3"]
Ylist = [Y1, Y2, Y3]
if plot_sim:
import pylab
import itertools
fig = pylab.figure("MRD Simulation Data", figsize=(8, 6))
fig.clf()
ax = fig.add_subplot(2, 1, 1)
labls = slist_names
for S, lab in itertools.izip(slist, labls):
ax.plot(S, label=lab)
ax.legend()
for i, Y in enumerate(Ylist):
ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
ax.set_title("Y{}".format(i + 1))
pylab.draw()
pylab.tight_layout()
return slist, [S1, S2, S3], Ylist
def bgplvm_simulation_matlab_compare():
from GPy.util.datasets import simulation_BGPLVM
sim_data = simulation_BGPLVM()
Y = sim_data['Y']
S = sim_data['S']
mu = sim_data['mu']
num_inducing, [_, Q] = 3, mu.shape
from GPy.models import mrd
from GPy import kern
reload(mrd); reload(kern)
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k,
# X=mu,
# X_variance=S,
_debug=False)
m.auto_scale_factor = True
m['noise'] = Y.var() / 100.
m['linear_variance'] = .01
return m
def bgplvm_simulation(optimize='scg',
plot=True,
max_iters=2e4,
plot_sim=False):
# from GPy.core.transformations import logexp_clipped
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
from GPy.models import mrd
from GPy import kern
reload(mrd); reload(kern)
Y = Ylist[0]
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
# m.constrain('variance|noise', logexp_clipped())
m['noise'] = Y.var() / 100.
if optimize:
print "Optimizing model:"
m.optimize(optimize, max_iters=max_iters,
messages=True, gtol=.05)
if plot:
m.plot_X_1d("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
return m
def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
likelihood_list = [Gaussian(x, normalize=True) for x in Ylist]
from GPy.models import mrd
from GPy import kern
reload(mrd); reload(kern)
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = mrd.MRD(likelihood_list, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw)
m.ensure_default_constraints()
for i, bgplvm in enumerate(m.bgplvms):
m['{}_noise'.format(i)] = bgplvm.likelihood.Y.var() / 500.
# DEBUG
# np.seterr("raise")
if optimize:
print "Optimizing Model:"
m.optimize(messages=1, max_iters=8e3, gtol=.1)
if plot:
m.plot_X_1d("MRD Latent Space 1D")
m.plot_scales("MRD Scales")
return m
def brendan_faces():
from GPy import kern
data = GPy.util.datasets.brendan_faces()
Q = 2
Y = data['Y'][0:-1:10, :]
# Y = data['Y']
Yn = Y - Y.mean()
Yn /= Yn.std()
m = GPy.models.GPLVM(Yn, Q)
# m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=100)
# optimize
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
m.optimize('scg', messages=1, max_f_eval=10000)
ax = m.plot_latent(which_indices=(0, 1))
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
return m
def stick_play(range=None, frame_rate=15):
data = GPy.util.datasets.osu_run1()
# optimize
if range == None:
Y = data['Y'].copy()
else:
Y = data['Y'][range[0]:range[1], :].copy()
y = Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.util.visualize.data_play(Y, data_show, frame_rate)
return Y
def stick(kernel=None):
data = GPy.util.datasets.osu_run1()
# optimize
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
m.optimize(messages=1, max_f_eval=10000)
if GPy.util.visualize.visual_available:
plt.clf
ax = m.plot_latent()
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
return m
def bcgplvm_linear_stick(kernel=None):
data = GPy.util.datasets.osu_run1()
# optimize
mapping = GPy.mappings.Linear(data['Y'].shape[1], 2)
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
m.optimize(messages=1, max_f_eval=10000)
if GPy.util.visualize.visual_available:
plt.clf
ax = m.plot_latent()
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
return m
def bcgplvm_stick(kernel=None):
data = GPy.util.datasets.osu_run1()
# optimize
back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.)
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
m.optimize(messages=1, max_f_eval=10000)
if GPy.util.visualize.visual_available:
plt.clf
ax = m.plot_latent()
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
return m
def robot_wireless():
data = GPy.util.datasets.robot_wireless()
# optimize
m = GPy.models.GPLVM(data['Y'], 2)
m.optimize(messages=1, max_f_eval=10000)
m._set_params(m._get_params())
plt.clf
ax = m.plot_latent()
return m
def stick_bgplvm(model=None):
data = GPy.util.datasets.osu_run1()
Q = 6
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
# optimize
m.ensure_default_constraints()
m.optimize('scg', messages=1, max_iters=200, xtol=1e-300, ftol=1e-300)
m._set_params(m._get_params())
plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2)
plt.sca(latent_axes)
m.plot_latent()
y = m.likelihood.Y[0, :].copy()
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
raw_input('Press enter to finish')
return m
def cmu_mocap(subject='35', motion=['01'], in_place=True):
data = GPy.util.datasets.cmu_mocap(subject, motion)
Y = data['Y']
if in_place:
# Make figure move in place.
data['Y'][:, 0:3] = 0.0
m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True)
# optimize
m.optimize(messages=1, max_f_eval=10000)
ax = m.plot_latent()
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
lvm_visualizer.close()
return m
# def BGPLVM_oil():
# data = GPy.util.datasets.oil()
# Y, X = data['Y'], data['X']
# X -= X.mean(axis=0)
# X /= X.std(axis=0)
#
# Q = 10
# num_inducing = 30
#
# kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
# m = GPy.models.BayesianGPLVM(X, Q, kernel=kernel, num_inducing=num_inducing)
# # m.scale_factor = 100.0
# m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
# from sklearn import cluster
# km = cluster.KMeans(num_inducing, verbose=10)
# Z = km.fit(m.X).cluster_centers_
# # Z = GPy.util.misc.kmm_init(m.X, num_inducing)
# m.set('iip', Z)
# m.set('bias', 1e-4)
# # optimize
#
# import pdb; pdb.set_trace()
# m.optimize('tnc', messages=1)
# print m
# m.plot_latent(labels=data['Y'].argmax(axis=1))
# return m

473
GPy/examples/regression.py Normal file
View file

@ -0,0 +1,473 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Gaussian Processes regression examples
"""
import pylab as pb
import numpy as np
import GPy
def coregionalisation_toy2(max_iters=100):
"""
A simple demonstration of coregionalisation on two sinusoidal functions.
"""
X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5
index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
X = np.hstack((np.vstack((X1, X2)), index))
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2.
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalise(2, 1)
k = k1**k2
m = GPy.models.GPRegression(X, Y, kernel=k)
m.constrain_fixed('.*rbf_var', 1.)
# m.constrain_positive('.*kappa')
m.optimize('sim', messages=1, max_iters=max_iters)
pb.figure()
Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1))))
Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1))))
mean, var, low, up = m.predict(Xtest1)
GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
mean, var, low, up = m.predict(Xtest2)
GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
return m
def coregionalisation_toy(max_iters=100):
"""
A simple demonstration of coregionalisation on two sinusoidal functions.
"""
X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5
index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
X = np.hstack((np.vstack((X1, X2)), index))
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(2, 2)
k = k1**k2 #k1.prod(k2, tensor=True)
m = GPy.models.GPRegression(X, Y, kernel=k)
m.constrain_fixed('.*rbf_var', 1.)
# m.constrain_positive('kappa')
m.optimize(max_iters=max_iters)
pb.figure()
Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1))))
Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1))))
mean, var, low, up = m.predict(Xtest1)
GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
mean, var, low, up = m.predict(Xtest2)
GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
return m
def coregionalisation_sparse(max_iters=100):
"""
A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations.
"""
X1 = np.random.rand(500, 1) * 8
X2 = np.random.rand(300, 1) * 5
index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
X = np.hstack((np.vstack((X1, X2)), index))
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
Y = np.vstack((Y1, Y2))
num_inducing = 40
Z = np.hstack((np.random.rand(num_inducing, 1) * 8, np.random.randint(0, 2, num_inducing)[:, None]))
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(2, 2)
k = k1**k2 #.prod(k2, tensor=True) # + GPy.kern.white(2,0.001)
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
m.constrain_fixed('.*rbf_var', 1.)
m.constrain_fixed('iip')
m.constrain_bounded('noise_variance', 1e-3, 1e-1)
# m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')
m.optimize(max_iters=max_iters)
# plotting:
pb.figure()
Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1))))
Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1))))
mean, var, low, up = m.predict(Xtest1)
GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
mean, var, low, up = m.predict(Xtest2)
GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
y = pb.ylim()[0]
pb.plot(Z[:, 0][Z[:, 1] == 0], np.zeros(np.sum(Z[:, 1] == 0)) + y, 'r|', mew=2)
pb.plot(Z[:, 0][Z[:, 1] == 1], np.zeros(np.sum(Z[:, 1] == 1)) + y, 'g|', mew=2)
return m
def epomeo_gpx(max_iters=100):
"""Perform Gaussian process regression on the latitude and longitude data from the Mount Epomeo runs. Requires gpxpy to be installed on your system to load in the data."""
data = GPy.util.datasets.epomeo_gpx()
num_data_list = []
for Xpart in data['X']:
num_data_list.append(Xpart.shape[0])
num_data_array = np.array(num_data_list)
num_data = num_data_array.sum()
Y = np.zeros((num_data, 2))
t = np.zeros((num_data, 2))
start = 0
for Xpart, index in zip(data['X'], range(len(data['X']))):
end = start+Xpart.shape[0]
t[start:end, :] = np.hstack((Xpart[:, 0:1],
index*np.ones((Xpart.shape[0], 1))))
Y[start:end, :] = Xpart[:, 1:3]
num_inducing = 200
Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None],
np.random.randint(0, 4, num_inducing)[:, None]))
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(output_dim=5, rank=5)
k = k1**k2
m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True)
m.constrain_fixed('.*rbf_var', 1.)
m.constrain_fixed('iip')
m.constrain_bounded('noise_variance', 1e-3, 1e-1)
# m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')
m.optimize(max_iters=max_iters,messages=True)
return m
def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=10000, max_iters=300):
"""Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisy mode is higher."""
# Contour over a range of length scales and signal/noise ratios.
length_scales = np.linspace(0.1, 60., resolution)
log_SNRs = np.linspace(-3., 4., resolution)
data = GPy.util.datasets.della_gatta_TRP63_gene_expression(gene_number)
# data['Y'] = data['Y'][0::2, :]
# data['X'] = data['X'][0::2, :]
data['Y'] = data['Y'] - np.mean(data['Y'])
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet)
ax = pb.gca()
pb.xlabel('length scale')
pb.ylabel('log_10 SNR')
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# Now run a few optimizations
models = []
optim_point_x = np.empty(2)
optim_point_y = np.empty(2)
np.random.seed(seed=seed)
for i in range(0, model_restarts):
# kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern)
m['noise_variance'] = np.random.uniform(1e-3, 1)
optim_point_x[0] = m['rbf_lengthscale']
optim_point_y[0] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']);
# optimize
m.optimize('scg', xtol=1e-6, ftol=1e-6, max_iters=max_iters)
optim_point_x[1] = m['rbf_lengthscale']
optim_point_y[1] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']);
pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1] - optim_point_x[0], optim_point_y[1] - optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
models.append(m)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
return m # (models, lls)
def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf):
"""Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales.
:data_set: A data set from the utils.datasets director.
:length_scales: a list of length scales to explore for the contour plot.
:log_SNRs: a list of base 10 logarithm signal to noise ratios to explore for the contour plot.
:kernel: a kernel to use for the 'signal' portion of the data."""
lls = []
total_var = np.var(data['Y'])
kernel = kernel_call(1, variance=1., lengthscale=1.)
model = GPy.models.GPRegression(data['X'], data['Y'], kernel=kernel)
for log_SNR in log_SNRs:
SNR = 10.**log_SNR
noise_var = total_var / (1. + SNR)
signal_var = total_var - noise_var
model.kern['.*variance'] = signal_var
model['noise_variance'] = noise_var
length_scale_lls = []
for length_scale in length_scales:
model['.*lengthscale'] = length_scale
length_scale_lls.append(model.log_likelihood())
lls.append(length_scale_lls)
return np.array(lls)
def olympic_100m_men(max_iters=100, kernel=None):
"""Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
data = GPy.util.datasets.olympic_100m_men()
# create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y'], kernel)
# set the lengthscale to be something sensible (defaults to 1)
if kernel==None:
m['rbf_lengthscale'] = 10
# optimize
m.optimize(max_iters=max_iters)
# plot
m.plot(plot_limits=(1850, 2050))
print(m)
return m
def olympic_marathon_men(max_iters=100, kernel=None):
"""Run a standard Gaussian process regression on the Olympic marathon data."""
data = GPy.util.datasets.olympic_marathon_men()
# create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y'], kernel)
# set the lengthscale to be something sensible (defaults to 1)
if kernel==None:
m['rbf_lengthscale'] = 10
# optimize
m.optimize(max_iters=max_iters)
# plot
m.plot(plot_limits=(1850, 2050))
print(m)
return m
def toy_rbf_1d(optimizer='tnc', max_nb_eval_optim=100):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
data = GPy.util.datasets.toy_rbf_1d()
# create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y'])
# optimize
m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
# plot
m.plot()
print(m)
return m
def toy_rbf_1d_50(max_iters=100):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
data = GPy.util.datasets.toy_rbf_1d_50()
# create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y'])
# optimize
m.optimize(max_iters=max_iters)
# plot
m.plot()
print(m)
return m
def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4):
# Create an artificial dataset where the values in the targets (Y)
# only depend in dimensions 1 and 3 of the inputs (X). Run ARD to
# see if this dependency can be recovered
X1 = np.sin(np.sort(np.random.rand(num_samples, 1) * 10, 0))
X2 = np.cos(np.sort(np.random.rand(num_samples, 1) * 10, 0))
X3 = np.exp(np.sort(np.random.rand(num_samples, 1), 0))
X4 = np.log(np.sort(np.random.rand(num_samples, 1), 0))
X = np.hstack((X1, X2, X3, X4))
Y1 = np.asarray(2 * X[:, 0] + 3).reshape(-1, 1)
Y2 = np.asarray(4 * (X[:, 2] - 1.5 * X[:, 0])).reshape(-1, 1)
Y = np.hstack((Y1, Y2))
Y = np.dot(Y, np.random.rand(2, D));
Y = Y + 0.2 * np.random.randn(Y.shape[0], Y.shape[1])
Y -= Y.mean()
Y /= Y.std()
if kernel_type == 'linear':
kernel = GPy.kern.linear(X.shape[1], ARD=1)
elif kernel_type == 'rbf_inv':
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1)
else:
kernel = GPy.kern.rbf(X.shape[1], ARD=1)
kernel += GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
m = GPy.models.GPRegression(X, Y, kernel)
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
# m.set_prior('.*lengthscale',len_prior)
m.optimize(optimizer='scg', max_iters=max_iters, messages=1)
m.kern.plot_ARD()
print(m)
return m
def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4):
# Create an artificial dataset where the values in the targets (Y)
# only depend in dimensions 1 and 3 of the inputs (X). Run ARD to
# see if this dependency can be recovered
X1 = np.sin(np.sort(np.random.rand(num_samples, 1) * 10, 0))
X2 = np.cos(np.sort(np.random.rand(num_samples, 1) * 10, 0))
X3 = np.exp(np.sort(np.random.rand(num_samples, 1), 0))
X4 = np.log(np.sort(np.random.rand(num_samples, 1), 0))
X = np.hstack((X1, X2, X3, X4))
Y1 = np.asarray(2 * X[:, 0] + 3)[:, None]
Y2 = np.asarray(4 * (X[:, 2] - 1.5 * X[:, 0]))[:, None]
Y = np.hstack((Y1, Y2))
Y = np.dot(Y, np.random.rand(2, D));
Y = Y + 0.2 * np.random.randn(Y.shape[0], Y.shape[1])
Y -= Y.mean()
Y /= Y.std()
if kernel_type == 'linear':
kernel = GPy.kern.linear(X.shape[1], ARD=1)
elif kernel_type == 'rbf_inv':
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1)
else:
kernel = GPy.kern.rbf(X.shape[1], ARD=1)
kernel += GPy.kern.bias(X.shape[1])
X_variance = np.ones(X.shape) * 0.5
m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance)
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
# m.set_prior('.*lengthscale',len_prior)
m.optimize(optimizer='scg', max_iters=max_iters, messages=1)
m.kern.plot_ARD()
print(m)
return m
def robot_wireless(max_iters=100, kernel=None):
"""Predict the location of a robot given wirelss signal strength readings."""
data = GPy.util.datasets.robot_wireless()
# create simple GP Model
m = GPy.models.GPRegression(data['Y'], data['X'], kernel=kernel)
# optimize
m.optimize(messages=True, max_iters=max_iters)
Xpredict = m.predict(data['Ytest'])[0]
pb.plot(data['Xtest'][:, 0], data['Xtest'][:, 1], 'r-')
pb.plot(Xpredict[:, 0], Xpredict[:, 1], 'b-')
pb.axis('equal')
pb.title('WiFi Localization with Gaussian Processes')
pb.legend(('True Location', 'Predicted Location'))
sse = ((data['Xtest'] - Xpredict)**2).sum()
print(m)
print('Sum of squares error on test data: ' + str(sse))
return m
def silhouette(max_iters=100):
"""Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
data = GPy.util.datasets.silhouette()
# create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y'])
# optimize
m.optimize(messages=True, max_iters=max_iters)
print(m)
return m
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100):
"""Run a 1D example of a sparse GP regression."""
# sample inputs and outputs
X = np.random.uniform(-3., 3., (num_samples, 1))
Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05
# construct kernel
rbf = GPy.kern.rbf(1)
# create simple GP Model
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
m.checkgrad(verbose=1)
m.optimize('tnc', messages=1, max_iters=max_iters)
m.plot()
return m
def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100):
"""Run a 2D example of a sparse GP regression."""
X = np.random.uniform(-3., 3., (num_samples, 2))
Y = np.sin(X[:, 0:1]) * np.sin(X[:, 1:2]) + np.random.randn(num_samples, 1) * 0.05
# construct kernel
rbf = GPy.kern.rbf(2)
# create simple GP Model
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
# contrain all parameters to be positive (but not inducing inputs)
m['.*len'] = 2.
m.checkgrad()
# optimize and plot
m.optimize('tnc', messages=1, max_iters=max_iters)
m.plot()
print(m)
return m
def uncertain_inputs_sparse_regression(max_iters=100):
"""Run a 1D example of a sparse GP regression with uncertain inputs."""
fig, axes = pb.subplots(1, 2, figsize=(12, 5))
# sample inputs and outputs
S = np.ones((20, 1))
X = np.random.uniform(-3., 3., (20, 1))
Y = np.sin(X) + np.random.randn(20, 1) * 0.05
# likelihood = GPy.likelihoods.Gaussian(Y)
Z = np.random.uniform(-3., 3., (7, 1))
k = GPy.kern.rbf(1)
# create simple GP Model - no input uncertainty on this one
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
m.optimize('scg', messages=1, max_iters=max_iters)
m.plot(ax=axes[0])
axes[0].set_title('no input uncertainty')
# the same Model with uncertainty
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S)
m.optimize('scg', messages=1, max_iters=max_iters)
m.plot(ax=axes[1])
axes[1].set_title('with input uncertainty')
print(m)
fig.canvas.draw()
return m

View file

@ -0,0 +1,41 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import pylab as pb
import numpy as np
import GPy
def toy_1d():
N = 2000
M = 20
#create data
X = np.linspace(0,32,N)[:,None]
Z = np.linspace(0,32,M)[:,None]
Y = np.sin(X) + np.cos(0.3*X) + np.random.randn(*X.shape)/np.sqrt(50.)
m = GPy.models.SVIGPRegression(X,Y, batchsize=10, Z=Z)
m.constrain_bounded('noise_variance',1e-3,1e-1)
m.constrain_bounded('white_variance',1e-3,1e-1)
m.param_steplength = 1e-4
fig = pb.figure()
ax = fig.add_subplot(111)
def cb():
ax.cla()
m.plot(ax=ax,Z_height=-3)
ax.set_ylim(-3,3)
fig.canvas.draw()
m.optimize(500, callback=cb, callback_interval=1)
m.plot_traces()
return m

146
GPy/examples/tutorials.py Normal file
View file

@ -0,0 +1,146 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Code of Tutorials
"""
import pylab as pb
pb.ion()
import numpy as np
import GPy
def tuto_GP_regression():
"""The detailed explanations of the commands used in this file can be found in the tutorial section"""
X = np.random.uniform(-3.,3.,(20,1))
Y = np.sin(X) + np.random.randn(20,1)*0.05
kernel = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
m = GPy.models.GPRegression(X, Y, kernel)
print m
m.plot()
m.constrain_positive('')
m.unconstrain('') # may be used to remove the previous constrains
m.constrain_positive('.*rbf_variance')
m.constrain_bounded('.*lengthscale',1.,10. )
m.constrain_fixed('.*noise',0.0025)
m.optimize()
m.optimize_restarts(num_restarts = 10)
#######################################################
#######################################################
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(50,2))
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
# define kernel
ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
# create simple GP model
m = GPy.models.GPRegression(X, Y, ker)
# contrain all parameters to be positive
m.constrain_positive('')
# optimize and plot
m.optimize('tnc', max_f_eval = 1000)
m.plot()
print(m)
return(m)
def tuto_kernel_overview():
"""The detailed explanations of the commands used in this file can be found in the tutorial section"""
ker1 = GPy.kern.rbf(1) # Equivalent to ker1 = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
ker2 = GPy.kern.rbf(input_dim=1, variance = .75, lengthscale=2.)
ker3 = GPy.kern.rbf(1, .5, .5)
print ker2
ker1.plot()
ker2.plot()
ker3.plot()
k1 = GPy.kern.rbf(1,1.,2.)
k2 = GPy.kern.Matern32(1, 0.5, 0.2)
# Product of kernels
k_prod = k1.prod(k2) # By default, tensor=False
k_prodtens = k1.prod(k2,tensor=True)
# Sum of kernels
k_add = k1.add(k2) # By default, tensor=False
k_addtens = k1.add(k2,tensor=True)
k1 = GPy.kern.rbf(1,1.,2)
k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
k = k1 * k2 # equivalent to k = k1.prod(k2)
print k
# Simulate sample paths
X = np.linspace(-5,5,501)[:,None]
Y = np.random.multivariate_normal(np.zeros(501),k.K(X),1)
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.Matern32(1)
k3 = GPy.kern.white(1)
k = k1 + k2 + k3
print k
k.constrain_positive('.*var')
k.constrain_fixed(np.array([1]),1.75)
k.tie_params('.*len')
k.unconstrain('white')
k.constrain_bounded('white',lower=1e-5,upper=.5)
print k
k_cst = GPy.kern.bias(1,variance=1.)
k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
Kanova = (k_cst + k_mat).prod(k_cst + k_mat,tensor=True)
print Kanova
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(40,2))
Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
# Create GP regression model
m = GPy.models.GPRegression(X, Y, Kanova)
fig = pb.figure(figsize=(5,5))
ax = fig.add_subplot(111)
m.plot(ax=ax)
pb.figure(figsize=(20,3))
pb.subplots_adjust(wspace=0.5)
axs = pb.subplot(1,5,1)
m.plot(ax=axs)
pb.subplot(1,5,2)
pb.ylabel("= ",rotation='horizontal',fontsize='30')
axs = pb.subplot(1,5,3)
m.plot(ax=axs, which_parts=[False,True,False,False])
pb.ylabel("cst +",rotation='horizontal',fontsize='30')
axs = pb.subplot(1,5,4)
m.plot(ax=axs, which_parts=[False,False,True,False])
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
axs = pb.subplot(1,5,5)
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
m.plot(ax=axs, which_parts=[False,False,False,True])
return(m)
def model_interaction():
X = np.random.randn(20,1)
Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5.
k = GPy.kern.rbf(1) + GPy.kern.bias(1)
m = GPy.models.GPRegression(X, Y, kernel=k)
return m

View file

View file

@ -0,0 +1,290 @@
'''
Created on 24 Apr 2013
@author: maxz
'''
from GPy.inference.gradient_descent_update_rules import FletcherReeves, \
PolakRibiere
from Queue import Empty
from multiprocessing import Value
from multiprocessing.queues import Queue
from multiprocessing.synchronize import Event
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
from threading import Thread
import numpy
import sys
import time
RUNNING = "running"
CONVERGED = "converged"
MAXITER = "maximum number of iterations reached"
MAX_F_EVAL = "maximum number of function calls reached"
LINE_SEARCH = "line search failed"
KBINTERRUPT = "interrupted"
class _Async_Optimization(Thread):
def __init__(self, f, df, x0, update_rule, runsignal, SENTINEL,
report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3,
gtol=1e-6, outqueue=None, *args, **kw):
"""
Helper Process class for async optimization
f_call and df_call are Multiprocessing Values, for synchronized assignment
"""
self.f_call = Value('i', 0)
self.df_call = Value('i', 0)
self.f = self.f_wrapper(f, self.f_call)
self.df = self.f_wrapper(df, self.df_call)
self.x0 = x0
self.update_rule = update_rule
self.report_every = report_every
self.messages = messages
self.maxiter = maxiter
self.max_f_eval = max_f_eval
self.gtol = gtol
self.SENTINEL = SENTINEL
self.runsignal = runsignal
# self.parent = parent
# self.result = None
self.outq = outqueue
super(_Async_Optimization, self).__init__(target=self.run,
name="CG Optimization",
*args, **kw)
# def __enter__(self):
# return self
#
# def __exit__(self, type, value, traceback):
# return isinstance(value, TypeError)
def f_wrapper(self, f, counter):
def f_w(*a, **kw):
counter.value += 1
return f(*a, **kw)
return f_w
def callback(self, *a):
if self.outq is not None:
self.outq.put(a)
# self.parent and self.parent.callback(*a, **kw)
pass
# print "callback done"
def callback_return(self, *a):
self.callback(*a)
if self.outq is not None:
self.outq.put(self.SENTINEL)
if self.messages:
print ""
self.runsignal.clear()
def run(self, *args, **kwargs):
raise NotImplementedError("Overwrite this with optimization (for async use)")
pass
class _CGDAsync(_Async_Optimization):
def reset(self, xi, *a, **kw):
gi = -self.df(xi, *a, **kw)
si = gi
ur = self.update_rule(gi)
return gi, ur, si
def run(self, *a, **kw):
status = RUNNING
fi = self.f(self.x0)
fi_old = fi + 5000
gi, ur, si = self.reset(self.x0, *a, **kw)
xi = self.x0
xi_old = numpy.nan
it = 0
while it < self.maxiter:
if not self.runsignal.is_set():
break
if self.f_call.value > self.max_f_eval:
status = MAX_F_EVAL
gi = -self.df(xi, *a, **kw)
if numpy.dot(gi.T, gi) <= self.gtol:
status = CONVERGED
break
if numpy.isnan(numpy.dot(gi.T, gi)):
if numpy.any(numpy.isnan(xi_old)):
status = CONVERGED
break
self.reset(xi_old)
gammai = ur(gi)
if gammai < 1e-6 or it % xi.shape[0] == 0:
gi, ur, si = self.reset(xi, *a, **kw)
si = gi + gammai * si
alphai, _, _, fi2, fi_old2, gfi = line_search_wolfe1(self.f,
self.df,
xi,
si, gi,
fi, fi_old)
if alphai is None:
alphai, _, _, fi2, fi_old2, gfi = \
line_search_wolfe2(self.f, self.df,
xi, si, gi,
fi, fi_old)
if alphai is None:
# This line search also failed to find a better solution.
status = LINE_SEARCH
break
if fi2 < fi:
fi, fi_old = fi2, fi_old2
if gfi is not None:
gi = gfi
if numpy.isnan(fi) or fi_old < fi:
gi, ur, si = self.reset(xi, *a, **kw)
else:
xi += numpy.dot(alphai, si)
if self.messages:
sys.stdout.write("\r")
sys.stdout.flush()
sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)))
if it % self.report_every == 0:
self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
it += 1
else:
status = MAXITER
self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status]
class Async_Optimize(object):
callback = lambda *x: None
runsignal = Event()
SENTINEL = "SENTINEL"
def async_callback_collect(self, q):
while self.runsignal.is_set():
try:
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
self.callback(*ret)
self.runsignal.clear()
except Empty:
pass
def opt_async(self, f, df, x0, callback, update_rule=PolakRibiere,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs):
self.runsignal.set()
c = None
outqueue = None
if callback:
outqueue = Queue()
self.callback = callback
c = Thread(target=self.async_callback_collect, args=(outqueue,))
c.start()
p = _CGDAsync(f, df, x0, update_rule, self.runsignal, self.SENTINEL,
report_every=report_every, messages=messages, maxiter=maxiter,
max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
p.start()
return p, c
def opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs):
p, c = self.opt_async(f, df, x0, callback, update_rule, messages,
maxiter, max_f_eval, gtol,
report_every, *args, **kwargs)
while self.runsignal.is_set():
try:
p.join(1)
if c: c.join(1)
except KeyboardInterrupt:
# print "^C"
self.runsignal.clear()
p.join()
if c: c.join()
if c and c.is_alive():
# self.runsignal.set()
# while self.runsignal.is_set():
# try:
# c.join(.1)
# except KeyboardInterrupt:
# # print "^C"
# self.runsignal.clear()
# c.join()
print "WARNING: callback still running, optimisation done!"
return p.result
class CGD(Async_Optimize):
'''
Conjugate gradient descent algorithm to minimize
function f with gradients df, starting at x0
with update rule update_rule
if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules
'''
opt_name = "Conjugate Gradient Descent"
def opt_async(self, *a, **kw):
"""
opt_async(self, f, df, x0, callback, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs)
callback gets called every `report_every` iterations
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules
f, and df will be called with
f(xi, *args, **kwargs)
df(xi, *args, **kwargs)
**returns**
-----------
Started `Process` object, optimizing asynchronously
**calls**
---------
callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
at end of optimization!
"""
return super(CGD, self).opt_async(*a, **kw)
def opt(self, *a, **kw):
"""
opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs)
Minimize f, calling callback every `report_every` iterations with following syntax:
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules
f, and df will be called with
f(xi, *args, **kwargs)
df(xi, *args, **kwargs)
**returns**
---------
x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message
at end of optimization
"""
return super(CGD, self).opt(*a, **kw)

View file

@ -0,0 +1,53 @@
'''
Created on 24 Apr 2013
@author: maxz
'''
import numpy
class GDUpdateRule():
_gradnat = None
_gradnatold = None
def __init__(self, initgrad, initgradnat=None):
self.grad = initgrad
if initgradnat:
self.gradnat = initgradnat
else:
self.gradnat = initgrad
# self.grad, self.gradnat
def _gamma(self):
raise NotImplemented("""Implement gamma update rule here,
you can use self.grad and self.gradold for parameters, as well as
self.gradnat and self.gradnatold for natural gradients.""")
def __call__(self, grad, gradnat=None, si=None, *args, **kw):
"""
Return gamma for given gradients and optional natural gradients
"""
if not gradnat:
gradnat = grad
self.gradold = self.grad
self.gradnatold = self.gradnat
self.grad = grad
self.gradnat = gradnat
self.si = si
return self._gamma(*args, **kw)
class FletcherReeves(GDUpdateRule):
'''
Fletcher Reeves update rule for gamma
'''
def _gamma(self, *a, **kw):
tmp = numpy.dot(self.grad.T, self.gradnat)
if tmp:
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
return tmp
class PolakRibiere(GDUpdateRule):
'''
Fletcher Reeves update rule for gamma
'''
def _gamma(self, *a, **kw):
tmp = numpy.dot((self.grad - self.gradold).T, self.gradnat)
if tmp:
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
return tmp

View file

@ -0,0 +1,240 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import pylab as pb
import datetime as dt
from scipy import optimize
from warnings import warn
try:
import rasmussens_minimize as rasm
rasm_available = True
except ImportError:
rasm_available = False
from scg import SCG
class Optimizer():
"""
Superclass for all the optimizers.
:param x_init: initial set of parameters
:param f_fp: function that returns the function AND the gradients at the same time
:param f: function to optimize
:param fp: gradients
:param messages: print messages from the optimizer?
:type messages: (True | False)
:param max_f_eval: maximum number of function evaluations
:rtype: optimizer object.
"""
def __init__(self, x_init, messages=False, model=None, max_f_eval=1e4, max_iters=1e3,
ftol=None, gtol=None, xtol=None):
self.opt_name = None
self.x_init = x_init
self.messages = messages
self.f_opt = None
self.x_opt = None
self.funct_eval = None
self.status = None
self.max_f_eval = int(max_f_eval)
self.max_iters = int(max_iters)
self.trace = None
self.time = "Not available"
self.xtol = xtol
self.gtol = gtol
self.ftol = ftol
self.model = model
def run(self, **kwargs):
start = dt.datetime.now()
self.opt(**kwargs)
end = dt.datetime.now()
self.time = str(end - start)
def opt(self, f_fp=None, f=None, fp=None):
raise NotImplementedError, "this needs to be implemented to use the optimizer class"
def plot(self):
if self.trace == None:
print "No trace present so I can't plot it. Please check that the optimizer actually supplies a trace."
else:
pb.figure()
pb.plot(self.trace)
pb.xlabel('Iteration')
pb.ylabel('f(x)')
def __str__(self):
diagnostics = "Optimizer: \t\t\t\t %s\n" % self.opt_name
diagnostics += "f(x_opt): \t\t\t\t %.3f\n" % self.f_opt
diagnostics += "Number of function evaluations: \t %d\n" % self.funct_eval
diagnostics += "Optimization status: \t\t\t %s\n" % self.status
diagnostics += "Time elapsed: \t\t\t\t %s\n" % self.time
return diagnostics
class opt_tnc(Optimizer):
def __init__(self, *args, **kwargs):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "TNC (Scipy implementation)"
def opt(self, f_fp=None, f=None, fp=None):
"""
Run the TNC optimizer
"""
tnc_rcstrings = ['Local minimum', 'Converged', 'XConverged', 'Maximum number of f evaluations reached',
'Line search failed', 'Function is constant']
assert f_fp != None, "TNC requires f_fp"
opt_dict = {}
if self.xtol is not None:
opt_dict['xtol'] = self.xtol
if self.ftol is not None:
opt_dict['ftol'] = self.ftol
if self.gtol is not None:
opt_dict['pgtol'] = self.gtol
opt_result = optimize.fmin_tnc(f_fp, self.x_init, messages=self.messages,
maxfun=self.max_f_eval, **opt_dict)
self.x_opt = opt_result[0]
self.f_opt = f_fp(self.x_opt)[0]
self.funct_eval = opt_result[1]
self.status = tnc_rcstrings[opt_result[2]]
class opt_lbfgsb(Optimizer):
def __init__(self, *args, **kwargs):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "L-BFGS-B (Scipy implementation)"
def opt(self, f_fp=None, f=None, fp=None):
"""
Run the optimizer
"""
rcstrings = ['Converged', 'Maximum number of f evaluations reached', 'Error']
assert f_fp != None, "BFGS requires f_fp"
if self.messages:
iprint = 1
else:
iprint = -1
opt_dict = {}
if self.xtol is not None:
print "WARNING: l-bfgs-b doesn't have an xtol arg, so I'm going to ignore it"
if self.ftol is not None:
print "WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it"
if self.gtol is not None:
opt_dict['pgtol'] = self.gtol
opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint,
maxfun=self.max_f_eval, **opt_dict)
self.x_opt = opt_result[0]
self.f_opt = f_fp(self.x_opt)[0]
self.funct_eval = opt_result[2]['funcalls']
self.status = rcstrings[opt_result[2]['warnflag']]
class opt_simplex(Optimizer):
def __init__(self, *args, **kwargs):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Nelder-Mead simplex routine (via Scipy)"
def opt(self, f_fp=None, f=None, fp=None):
"""
The simplex optimizer does not require gradients.
"""
statuses = ['Converged', 'Maximum number of function evaluations made', 'Maximum number of iterations reached']
opt_dict = {}
if self.xtol is not None:
opt_dict['xtol'] = self.xtol
if self.ftol is not None:
opt_dict['ftol'] = self.ftol
if self.gtol is not None:
print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it"
opt_result = optimize.fmin(f, self.x_init, (), disp=self.messages,
maxfun=self.max_f_eval, full_output=True, **opt_dict)
self.x_opt = opt_result[0]
self.f_opt = opt_result[1]
self.funct_eval = opt_result[3]
self.status = statuses[opt_result[4]]
self.trace = None
class opt_rasm(Optimizer):
def __init__(self, *args, **kwargs):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Rasmussen's Conjugate Gradient"
def opt(self, f_fp=None, f=None, fp=None):
"""
Run Rasmussen's Conjugate Gradient optimizer
"""
assert f_fp != None, "Rasmussen's minimizer requires f_fp"
statuses = ['Converged', 'Line search failed', 'Maximum number of f evaluations reached',
'NaNs in optimization']
opt_dict = {}
if self.xtol is not None:
print "WARNING: minimize doesn't have an xtol arg, so I'm going to ignore it"
if self.ftol is not None:
print "WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it"
if self.gtol is not None:
print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it"
opt_result = rasm.minimize(self.x_init, f_fp, (), messages=self.messages,
maxnumfuneval=self.max_f_eval)
self.x_opt = opt_result[0]
self.f_opt = opt_result[1][-1]
self.funct_eval = opt_result[2]
self.status = statuses[opt_result[3]]
self.trace = opt_result[1]
class opt_SCG(Optimizer):
def __init__(self, *args, **kwargs):
if 'max_f_eval' in kwargs:
warn("max_f_eval deprecated for SCG optimizer: use max_iters instead!\nIgnoring max_f_eval!", FutureWarning)
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Scaled Conjugate Gradients"
def opt(self, f_fp=None, f=None, fp=None):
assert not f is None
assert not fp is None
opt_result = SCG(f, fp, self.x_init, display=self.messages,
maxiters=self.max_iters,
max_f_eval=self.max_f_eval,
xtol=self.xtol, ftol=self.ftol,
gtol=self.gtol)
self.x_opt = opt_result[0]
self.trace = opt_result[1]
self.f_opt = self.trace[-1]
self.funct_eval = opt_result[2]
self.status = opt_result[3]
def get_optimizer(f_min):
from sgd import opt_SGD
optimizers = {'fmin_tnc': opt_tnc,
'simplex': opt_simplex,
'lbfgsb': opt_lbfgsb,
'scg': opt_SCG,
'sgd': opt_SGD}
if rasm_available:
optimizers['rasmussen'] = opt_rasm
for opt_name in optimizers.keys():
if opt_name.lower().find(f_min.lower()) != -1:
return optimizers[opt_name]
raise KeyError('No optimizer was found matching the name: %s' % f_min)

85
GPy/inference/samplers.py Normal file
View file

@ -0,0 +1,85 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import linalg, optimize
import pylab as pb
import Tango
import sys
import re
import numdifftools as ndt
import pdb
import cPickle
class Metropolis_Hastings:
def __init__(self,model,cov=None):
"""Metropolis Hastings, with tunings according to Gelman et al. """
self.model = model
current = self.model._get_params_transformed()
self.D = current.size
self.chains = []
if cov is None:
self.cov = model.Laplace_covariance()
else:
self.cov = cov
self.scale = 2.4/np.sqrt(self.D)
self.new_chain(current)
def new_chain(self, start=None):
self.chains.append([])
if start is None:
self.model.randomize()
else:
self.model._set_params_transformed(start)
def sample(self, Ntotal, Nburn, Nthin, tune=True, tune_throughout=False, tune_interval=400):
current = self.model._get_params_transformed()
fcurrent = self.model.log_likelihood() + self.model.log_prior()
accepted = np.zeros(Ntotal,dtype=np.bool)
for it in range(Ntotal):
print "sample %d of %d\r"%(it,Ntotal),
sys.stdout.flush()
prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
self.model._set_params_transformed(prop)
fprop = self.model.log_likelihood() + self.model.log_prior()
if fprop>fcurrent:#sample accepted, going 'uphill'
accepted[it] = True
current = prop
fcurrent = fprop
else:
u = np.random.rand()
if np.exp(fprop-fcurrent)>u:#sample accepted downhill
accepted[it] = True
current = prop
fcurrent = fprop
#store current value
if (it > Nburn) & ((it%Nthin)==0):
self.chains[-1].append(current)
#tuning!
if it & ((it%tune_interval)==0) & tune & ((it<Nburn) | (tune_throughout)):
pc = np.mean(accepted[it-tune_interval:it])
self.cov = np.cov(np.vstack(self.chains[-1][-tune_interval:]).T)
if pc > .25:
self.scale *= 1.1
if pc < .15:
self.scale /= 1.1
def predict(self,function,args):
"""Make a prediction for the function, to which we will pass the additional arguments"""
param = self.model._get_params()
fs = []
for p in self.chain:
self.model._set_params(p)
fs.append(function(*args))
self.model._set_params(param)# reset model to starting state
return fs

191
GPy/inference/scg.py Normal file
View file

@ -0,0 +1,191 @@
# Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
# Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import numpy as np
import sys
def print_out(len_maxiters, fnow, current_grad, beta, iteration):
print '\r',
print '{0:>0{mi}g} {1:> 12e} {2:> 12e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush()
def exponents(fnow, current_grad):
exps = [np.abs(fnow), current_grad]
return np.sign(exps) * np.log10(exps).astype(int)
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, xtol=None, ftol=None, gtol=None):
"""
Optimisation through Scaled Conjugate Gradients (SCG)
f: the objective function
gradf : the gradient function (should return a 1D np.ndarray)
x : the initial condition
Returns
x the optimal value for x
flog : a list of all the objective values
function_eval number of fn evaluations
status: string describing convergence status
"""
if xtol is None:
xtol = 1e-6
if ftol is None:
ftol = 1e-6
if gtol is None:
gtol = 1e-5
sigma0 = 1.0e-8
fold = f(x, *optargs) # Initial function value.
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
if any(np.isnan(gradnew)):
raise UnexpectedInfOrNan
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy()
d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs.
nsuccess = 0 # nsuccess counts number of successes.
beta = 1.0 # Initial scale parameter.
betamin = 1.0e-60 # Lower bound on scale.
betamax = 1.0e50 # Upper bound on scale.
status = "Not converged"
flog = [fold]
iteration = 0
len_maxiters = len(str(maxiters))
if display:
print ' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters)
exps = exponents(fnow, current_grad)
p_iter = iteration
# Main optimization loop.
while iteration < maxiters:
# Calculate first and second directional derivatives.
if success:
mu = np.dot(d, gradnew)
if mu >= 0:
d = -gradnew
mu = np.dot(d, gradnew)
kappa = np.dot(d, d)
sigma = sigma0 / np.sqrt(kappa)
xplus = x + sigma * d
gplus = gradf(xplus, *optargs)
theta = np.dot(d, (gplus - gradnew)) / sigma
# Increase effective curvature and evaluate step size alpha.
delta = theta + beta * kappa
if delta <= 0:
delta = beta * kappa
beta = beta - theta / kappa
alpha = -mu / delta
# Calculate the comparison ratio.
xnew = x + alpha * d
fnew = f(xnew, *optargs)
function_eval += 1
# if function_eval >= max_f_eval:
# status = "maximum number of function evaluations exceeded"
# break
# return x, flog, function_eval, status
Delta = 2.*(fnew - fold) / (alpha * mu)
if Delta >= 0.:
success = True
nsuccess += 1
x = xnew
fnow = fnew
else:
success = False
fnow = fold
# Store relevant variables
flog.append(fnow) # Current function value
iteration += 1
if display:
print_out(len_maxiters, fnow, current_grad, beta, iteration)
n_exps = exponents(fnow, current_grad)
if iteration - p_iter >= 20 * np.random.rand():
a = iteration >= p_iter * 2.78
b = np.any(n_exps < exps)
if a or b:
p_iter = iteration
print ''
if b:
exps = n_exps
if success:
# Test for termination
if (np.abs(fnew - fold) < ftol):
status = 'converged - relative reduction in objective'
break
# return x, flog, function_eval, status
elif (np.max(np.abs(alpha * d)) < xtol):
status = 'converged - relative stepsize'
break
else:
# Update variables for new position
gradnew = gradf(x, *optargs)
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew
fold = fnew
# If the gradient is zero then we are done.
if current_grad <= gtol:
status = 'converged - relative reduction in gradient'
break
# return x, flog, function_eval, status
# Adjust beta according to comparison ratio.
if Delta < 0.25:
beta = min(4.0 * beta, betamax)
if Delta > 0.75:
beta = max(0.5 * beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps.
if nsuccess == x.size:
d = -gradnew
# beta = 1. # TODO: betareset!!
nsuccess = 0
elif success:
Gamma = np.dot(gradold - gradnew, gradnew) / (mu)
d = Gamma * d - gradnew
else:
# If we get here, then we haven't terminated in the given number of
# iterations.
status = "maxiter exceeded"
if display:
print_out(len_maxiters, fnow, current_grad, beta, iteration)
print ""
print status
return x, flog, function_eval, status

356
GPy/inference/sgd.py Normal file
View file

@ -0,0 +1,356 @@
import numpy as np
import scipy as sp
import scipy.sparse
from optimization import Optimizer
from scipy import linalg, optimize
import pylab as plt
import copy, sys, pickle
class opt_SGD(Optimizer):
"""
Optimize using stochastic gradient descent.
*** Parameters ***
Model: reference to the Model object
iterations: number of iterations
learning_rate: learning rate
momentum: momentum
"""
def __init__(self, start, iterations = 10, learning_rate = 1e-4, momentum = 0.9, model = None, messages = False, batch_size = 1, self_paced = False, center = True, iteration_file = None, learning_rate_adaptation=None, actual_iter=None, schedule=None, **kwargs):
self.opt_name = "Stochastic Gradient Descent"
self.Model = model
self.iterations = iterations
self.momentum = momentum
self.learning_rate = learning_rate
self.x_opt = None
self.f_opt = None
self.messages = messages
self.batch_size = batch_size
self.self_paced = self_paced
self.center = center
self.param_traces = [('noise',[])]
self.iteration_file = iteration_file
self.learning_rate_adaptation = learning_rate_adaptation
self.actual_iter = actual_iter
if self.learning_rate_adaptation != None:
if self.learning_rate_adaptation == 'annealing':
self.learning_rate_0 = self.learning_rate
else:
self.learning_rate_0 = self.learning_rate.mean()
self.schedule = schedule
# if len([p for p in self.model.kern.parts if p.name == 'bias']) == 1:
# self.param_traces.append(('bias',[]))
# if len([p for p in self.model.kern.parts if p.name == 'linear']) == 1:
# self.param_traces.append(('linear',[]))
# if len([p for p in self.model.kern.parts if p.name == 'rbf']) == 1:
# self.param_traces.append(('rbf_var',[]))
self.param_traces = dict(self.param_traces)
self.fopt_trace = []
num_params = len(self.Model._get_params())
if isinstance(self.learning_rate, float):
self.learning_rate = np.ones((num_params,)) * self.learning_rate
assert (len(self.learning_rate) == num_params), "there must be one learning rate per parameter"
def __str__(self):
status = "\nOptimizer: \t\t\t %s\n" % self.opt_name
status += "f(x_opt): \t\t\t %.4f\n" % self.f_opt
status += "Number of iterations: \t\t %d\n" % self.iterations
status += "Learning rate: \t\t\t max %.3f, min %.3f\n" % (self.learning_rate.max(), self.learning_rate.min())
status += "Momentum: \t\t\t %.3f\n" % self.momentum
status += "Batch size: \t\t\t %d\n" % self.batch_size
status += "Time elapsed: \t\t\t %s\n" % self.time
return status
def plot_traces(self):
plt.figure()
plt.subplot(211)
plt.title('Parameters')
for k in self.param_traces.keys():
plt.plot(self.param_traces[k], label=k)
plt.legend(loc=0)
plt.subplot(212)
plt.title('Objective function')
plt.plot(self.fopt_trace)
def non_null_samples(self, data):
return (np.isnan(data).sum(axis=1) == 0)
def check_for_missing(self, data):
if sp.sparse.issparse(self.Model.likelihood.Y):
return True
else:
return np.isnan(data).sum() > 0
def subset_parameter_vector(self, x, samples, param_shapes):
subset = np.array([], dtype = int)
x = np.arange(0, len(x))
i = 0
for s in param_shapes:
N, input_dim = s
X = x[i:i+N*input_dim].reshape(N, input_dim)
X = X[samples]
subset = np.append(subset, X.flatten())
i += N*input_dim
subset = np.append(subset, x[i:])
return subset
def shift_constraints(self, j):
constrained_indices = copy.deepcopy(self.Model.constrained_indices)
for c, constraint in enumerate(constrained_indices):
mask = (np.ones_like(constrained_indices[c]) == 1)
for i in range(len(constrained_indices[c])):
pos = np.where(j == constrained_indices[c][i])[0]
if len(pos) == 1:
self.Model.constrained_indices[c][i] = pos
else:
mask[i] = False
self.Model.constrained_indices[c] = self.Model.constrained_indices[c][mask]
return constrained_indices
# back them up
# bounded_i = copy.deepcopy(self.Model.constrained_bounded_indices)
# bounded_l = copy.deepcopy(self.Model.constrained_bounded_lowers)
# bounded_u = copy.deepcopy(self.Model.constrained_bounded_uppers)
# for b in range(len(bounded_i)): # for each group of constraints
# for bc in range(len(bounded_i[b])):
# pos = np.where(j == bounded_i[b][bc])[0]
# if len(pos) == 1:
# pos2 = np.where(self.Model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
# self.Model.constrained_bounded_indices[b][pos2] = pos[0]
# else:
# if len(self.Model.constrained_bounded_indices[b]) == 1:
# # if it's the last index to be removed
# # the logic here is just a mess. If we remove the last one, then all the
# # b-indices change and we have to iterate through everything to find our
# # current index. Can't deal with this right now.
# raise NotImplementedError
# else: # just remove it from the indices
# mask = self.Model.constrained_bounded_indices[b] != bc
# self.Model.constrained_bounded_indices[b] = self.Model.constrained_bounded_indices[b][mask]
# # here we shif the positive constraints. We cycle through each positive
# # constraint
# positive = self.Model.constrained_positive_indices.copy()
# mask = (np.ones_like(positive) == 1)
# for p in range(len(positive)):
# # we now check whether the constrained index appears in the j vector
# # (the vector of the "active" indices)
# pos = np.where(j == self.Model.constrained_positive_indices[p])[0]
# if len(pos) == 1:
# self.Model.constrained_positive_indices[p] = pos
# else:
# mask[p] = False
# self.Model.constrained_positive_indices = self.Model.constrained_positive_indices[mask]
# return (bounded_i, bounded_l, bounded_u), positive
def restore_constraints(self, c):#b, p):
# self.Model.constrained_bounded_indices = b[0]
# self.Model.constrained_bounded_lowers = b[1]
# self.Model.constrained_bounded_uppers = b[2]
# self.Model.constrained_positive_indices = p
self.Model.constrained_indices = c
def get_param_shapes(self, N = None, input_dim = None):
model_name = self.Model.__class__.__name__
if model_name == 'GPLVM':
return [(N, input_dim)]
if model_name == 'Bayesian_GPLVM':
return [(N, input_dim), (N, input_dim)]
else:
raise NotImplementedError
def step_with_missing_data(self, f_fp, X, step, shapes):
N, input_dim = X.shape
if not sp.sparse.issparse(self.Model.likelihood.Y):
Y = self.Model.likelihood.Y
samples = self.non_null_samples(self.Model.likelihood.Y)
self.Model.N = samples.sum()
Y = Y[samples]
else:
samples = self.Model.likelihood.Y.nonzero()[0]
self.Model.N = len(samples)
Y = np.asarray(self.Model.likelihood.Y[samples].todense(), dtype = np.float64)
if self.Model.N == 0 or Y.std() == 0.0:
return 0, step, self.Model.N
self.Model.likelihood._offset = Y.mean()
self.Model.likelihood._scale = Y.std()
self.Model.likelihood.set_data(Y)
# self.Model.likelihood.V = self.Model.likelihood.Y*self.Model.likelihood.precision
sigma = self.Model.likelihood._variance
self.Model.likelihood._variance = None # invalidate cache
self.Model.likelihood._set_params(sigma)
j = self.subset_parameter_vector(self.x_opt, samples, shapes)
self.Model.X = X[samples]
model_name = self.Model.__class__.__name__
if model_name == 'Bayesian_GPLVM':
self.Model.likelihood.YYT = np.dot(self.Model.likelihood.Y, self.Model.likelihood.Y.T)
self.Model.likelihood.trYYT = np.trace(self.Model.likelihood.YYT)
ci = self.shift_constraints(j)
f, fp = f_fp(self.x_opt[j])
step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
self.x_opt[j] -= step[j]
self.restore_constraints(ci)
self.Model.grads[j] = fp
# restore likelihood _offset and _scale, otherwise when we call set_data(y) on
# the next feature, it will get normalized with the mean and std of this one.
self.Model.likelihood._offset = 0
self.Model.likelihood._scale = 1
return f, step, self.Model.N
def adapt_learning_rate(self, t, D):
if self.learning_rate_adaptation == 'adagrad':
if t > 0:
g_k = self.Model.grads
self.s_k += np.square(g_k)
t0 = 100.0
self.learning_rate = 0.1/(t0 + np.sqrt(self.s_k))
import pdb; pdb.set_trace()
else:
self.learning_rate = np.zeros_like(self.learning_rate)
self.s_k = np.zeros_like(self.x_opt)
elif self.learning_rate_adaptation == 'annealing':
#self.learning_rate = self.learning_rate_0/(1+float(t+1)/10)
self.learning_rate = np.ones_like(self.learning_rate) * self.schedule[t]
elif self.learning_rate_adaptation == 'semi_pesky':
if self.Model.__class__.__name__ == 'Bayesian_GPLVM':
g_t = self.Model.grads
if t == 0:
self.hbar_t = 0.0
self.tau_t = 100.0
self.gbar_t = 0.0
self.gbar_t = (1-1/self.tau_t)*self.gbar_t + 1/self.tau_t * g_t
self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t.T, g_t)
self.learning_rate = np.ones_like(self.learning_rate)*(np.dot(self.gbar_t.T, self.gbar_t) / self.hbar_t)
tau_t = self.tau_t*(1-self.learning_rate) + 1
def opt(self, f_fp=None, f=None, fp=None):
self.x_opt = self.Model._get_params_transformed()
self.grads = []
X, Y = self.Model.X.copy(), self.Model.likelihood.Y.copy()
self.Model.likelihood.YYT = 0
self.Model.likelihood.trYYT = 0
self.Model.likelihood._offset = 0.0
self.Model.likelihood._scale = 1.0
N, input_dim = self.Model.X.shape
D = self.Model.likelihood.Y.shape[1]
num_params = self.Model._get_params()
self.trace = []
missing_data = self.check_for_missing(self.Model.likelihood.Y)
step = np.zeros_like(num_params)
for it in range(self.iterations):
if self.actual_iter != None:
it = self.actual_iter
self.Model.grads = np.zeros_like(self.x_opt) # TODO this is ugly
if it == 0 or self.self_paced is False:
features = np.random.permutation(Y.shape[1])
else:
features = np.argsort(NLL)
b = len(features)/self.batch_size
features = [features[i::b] for i in range(b)]
NLL = []
import pylab as plt
for count, j in enumerate(features):
self.Model.input_dim = len(j)
self.Model.likelihood.input_dim = len(j)
self.Model.likelihood.set_data(Y[:, j])
# self.Model.likelihood.V = self.Model.likelihood.Y*self.Model.likelihood.precision
sigma = self.Model.likelihood._variance
self.Model.likelihood._variance = None # invalidate cache
self.Model.likelihood._set_params(sigma)
if missing_data:
shapes = self.get_param_shapes(N, input_dim)
f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes)
else:
self.Model.likelihood.YYT = np.dot(self.Model.likelihood.Y, self.Model.likelihood.Y.T)
self.Model.likelihood.trYYT = np.trace(self.Model.likelihood.YYT)
Nj = N
f, fp = f_fp(self.x_opt)
self.Model.grads = fp.copy()
step = self.momentum * step + self.learning_rate * fp
self.x_opt -= step
if self.messages == 2:
noise = self.Model.likelihood._variance
status = "evaluating {feature: 5d}/{tot: 5d} \t f: {f: 2.3f} \t non-missing: {nm: 4d}\t noise: {noise: 2.4f}\r".format(feature = count, tot = len(features), f = f, nm = Nj, noise = noise)
sys.stdout.write(status)
sys.stdout.flush()
self.param_traces['noise'].append(noise)
self.adapt_learning_rate(it+count, D)
NLL.append(f)
self.fopt_trace.append(NLL[-1])
# fig = plt.figure('traces')
# plt.clf()
# plt.plot(self.param_traces['noise'])
# for k in self.param_traces.keys():
# self.param_traces[k].append(self.Model.get(k)[0])
self.grads.append(self.Model.grads.tolist())
# should really be a sum(), but earlier samples in the iteration will have a very crappy ll
self.f_opt = np.mean(NLL)
self.Model.N = N
self.Model.X = X
self.Model.input_dim = D
self.Model.likelihood.N = N
self.Model.likelihood.input_dim = D
self.Model.likelihood.Y = Y
sigma = self.Model.likelihood._variance
self.Model.likelihood._variance = None # invalidate cache
self.Model.likelihood._set_params(sigma)
self.trace.append(self.f_opt)
if self.iteration_file is not None:
f = open(self.iteration_file + "iteration%d.pickle" % it, 'w')
data = [self.x_opt, self.fopt_trace, self.param_traces]
pickle.dump(data, f)
f.close()
if self.messages != 0:
sys.stdout.write('\r' + ' '*len(status)*2 + ' \r')
status = "SGD Iteration: {0: 3d}/{1: 3d} f: {2: 2.3f} max eta: {3: 1.5f}\n".format(it+1, self.iterations, self.f_opt, self.learning_rate.max())
sys.stdout.write(status)
sys.stdout.flush()

9
GPy/kern/__init__.py Normal file
View file

@ -0,0 +1,9 @@
# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import *
try:
from constructors import rbf_sympy, sympykern # these depend on sympy
except:
pass
from kern import *

423
GPy/kern/constructors.py Normal file
View file

@ -0,0 +1,423 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kern import kern
import parts
def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False):
"""
Construct an RBF kernel
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = parts.rbf_inv.RBFInv(input_dim,variance,inv_lengthscale,ARD)
return kern(input_dim, [part])
def rbf(input_dim,variance=1., lengthscale=None,ARD=False):
"""
Construct an RBF kernel
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = parts.rbf.RBF(input_dim,variance,lengthscale,ARD)
return kern(input_dim, [part])
def linear(input_dim,variances=None,ARD=False):
"""
Construct a linear kernel.
Arguments
---------
input_dimD (int), obligatory
variances (np.ndarray)
ARD (boolean)
"""
part = parts.linear.Linear(input_dim,variances,ARD)
return kern(input_dim, [part])
def mlp(input_dim,variance=1., weight_variance=None,bias_variance=100.,ARD=False):
"""
Construct an MLP kernel
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param weight_scale: the lengthscale of the kernel
:type weight_scale: vector of weight variances for input weights in neural network (length 1 if kernel is isotropic)
:param bias_variance: the variance of the biases in the neural network.
:type bias_variance: float
:param ARD: Auto Relevance Determination (allows for ARD version of covariance)
:type ARD: Boolean
"""
part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD)
return kern(input_dim, [part])
def gibbs(input_dim,variance=1., mapping=None):
"""
Gibbs and MacKay non-stationary covariance function.
.. math::
r = sqrt((x_i - x_j)'*(x_i - x_j))
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
where :math:`l(x)` is a function giving the length scale as a function of space.
This is the non stationary kernel proposed by Mark Gibbs in his 1997
thesis. It is similar to an RBF but has a length scale that varies
with input location. This leads to an additional term in front of
the kernel.
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param mapping: the mapping that gives the lengthscale across the input space.
:type mapping: GPy.core.Mapping
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
"""
part = parts.gibbs.Gibbs(input_dim,variance,mapping)
return kern(input_dim, [part])
def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False):
"""
Construct a polynomial kernel
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param weight_scale: the lengthscale of the kernel
:type weight_scale: vector of weight variances for input weights.
:param bias_variance: the variance of the biases.
:type bias_variance: float
:param degree: the degree of the polynomial
:type degree: int
:param ARD: Auto Relevance Determination (allows for ARD version of covariance)
:type ARD: Boolean
"""
part = parts.poly.POLY(input_dim,variance,weight_variance,bias_variance,degree,ARD)
return kern(input_dim, [part])
def white(input_dim,variance=1.):
"""
Construct a white kernel.
Arguments
---------
input_dimD (int), obligatory
variance (float)
"""
part = parts.white.White(input_dim,variance)
return kern(input_dim, [part])
def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
"""
Construct an exponential kernel
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = parts.exponential.Exponential(input_dim,variance, lengthscale, ARD)
return kern(input_dim, [part])
def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
"""
Construct a Matern 3/2 kernel.
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = parts.Matern32.Matern32(input_dim,variance, lengthscale, ARD)
return kern(input_dim, [part])
def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
"""
Construct a Matern 5/2 kernel.
:param input_dim: dimensionality of the kernel, obligatory
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = parts.Matern52.Matern52(input_dim, variance, lengthscale, ARD)
return kern(input_dim, [part])
def bias(input_dim, variance=1.):
"""
Construct a bias kernel.
Arguments
---------
input_dim (int), obligatory
variance (float)
"""
part = parts.bias.Bias(input_dim, variance)
return kern(input_dim, [part])
def finite_dimensional(input_dim, F, G, variances=1., weights=None):
"""
Construct a finite dimensional kernel.
input_dim: int - the number of input dimensions
F: np.array of functions with shape (n,) - the n basis functions
G: np.array with shape (n,n) - the Gram matrix associated to F
variances : np.ndarray with shape (n,)
"""
part = parts.finite_dimensional.FiniteDimensional(input_dim, F, G, variances, weights)
return kern(input_dim, [part])
def spline(input_dim, variance=1.):
"""
Construct a spline kernel.
:param input_dim: Dimensionality of the kernel
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
part = parts.spline.Spline(input_dim, variance)
return kern(input_dim, [part])
def Brownian(input_dim, variance=1.):
"""
Construct a Brownian motion kernel.
:param input_dim: Dimensionality of the kernel
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
part = parts.Brownian.Brownian(input_dim, variance)
return kern(input_dim, [part])
try:
import sympy as sp
from sympykern import spkern
from sympy.parsing.sympy_parser import parse_expr
sympy_available = True
except ImportError:
sympy_available = False
if sympy_available:
def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.):
"""
Radial Basis Function covariance.
"""
X = [sp.var('x%i' % i) for i in range(input_dim)]
Z = [sp.var('z%i' % i) for i in range(input_dim)]
rbf_variance = sp.var('rbf_variance',positive=True)
if ARD:
rbf_lengthscales = [sp.var('rbf_lengthscale_%i' % i, positive=True) for i in range(input_dim)]
dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
dist = parse_expr(dist_string)
f = rbf_variance*sp.exp(-dist/2.)
else:
rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
dist = parse_expr(dist_string)
f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
return kern(input_dim, [spkern(input_dim, f)])
def sympykern(input_dim, k):
"""
A kernel from a symbolic sympy representation
"""
return kern(input_dim, [spkern(input_dim, k)])
del sympy_available
def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
"""
Construct an periodic exponential kernel
:param input_dim: dimensionality, only defined for input_dim=1
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
"""
part = parts.periodic_exponential.PeriodicExponential(input_dim, variance, lengthscale, period, n_freq, lower, upper)
return kern(input_dim, [part])
def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
"""
Construct a periodic Matern 3/2 kernel.
:param input_dim: dimensionality, only defined for input_dim=1
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
"""
part = parts.periodic_Matern32.PeriodicMatern32(input_dim, variance, lengthscale, period, n_freq, lower, upper)
return kern(input_dim, [part])
def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
"""
Construct a periodic Matern 5/2 kernel.
:param input_dim: dimensionality, only defined for input_dim=1
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
"""
part = parts.periodic_Matern52.PeriodicMatern52(input_dim, variance, lengthscale, period, n_freq, lower, upper)
return kern(input_dim, [part])
def prod(k1,k2,tensor=False):
"""
Construct a product kernel over input_dim from two kernels over input_dim
:param k1, k2: the kernels to multiply
:type k1, k2: kernpart
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""
part = parts.prod.Prod(k1, k2, tensor)
return kern(part.input_dim, [part])
def symmetric(k):
"""
Construct a symmetric kernel from an existing kernel
"""
k_ = k.copy()
k_.parts = [symmetric.Symmetric(p) for p in k.parts]
return k_
def coregionalise(output_dim, rank=1, W=None, kappa=None):
"""
Coregionalisation kernel.
Used for computing covariance functions of the form
.. math::
k_2(x, y)=\mathbf{B} k(x, y)
where
.. math::
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
:param output_dim: the number of output dimensions
:type output_dim: int
:param rank: the rank of the coregionalisation matrix.
:type rank: int
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B.
:type W: ndarray
:param kappa: a diagonal term which allows the outputs to behave independently.
:rtype: kernel object
.. Note: see coregionalisation examples in GPy.examples.regression for some usage.
"""
p = parts.coregionalise.Coregionalise(output_dim,rank,W,kappa)
return kern(1,[p])
def rational_quadratic(input_dim, variance=1., lengthscale=1., power=1.):
"""
Construct rational quadratic kernel.
:param input_dim: the number of input dimensions
:type input_dim: int (input_dim=1 is the only value currently supported)
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the lengthscale :math:`\ell`
:type lengthscale: float
:rtype: kern object
"""
part = parts.rational_quadratic.RationalQuadratic(input_dim, variance, lengthscale, power)
return kern(input_dim, [part])
def fixed(input_dim, K, variance=1.):
"""
Construct a Fixed effect kernel.
:param input_dim: the number of input dimensions
:type input_dim: int (input_dim=1 is the only value currently supported)
:param K: the variance :math:`\sigma^2`
:type K: np.array
:param variance: kernel variance
:type variance: float
:rtype: kern object
"""
part = parts.fixed.Fixed(input_dim, K, variance)
return kern(input_dim, [part])
def rbfcos(input_dim, variance=1., frequencies=None, bandwidths=None, ARD=False):
"""
construct a rbfcos kernel
"""
part = parts.rbfcos.RBFCos(input_dim, variance, frequencies, bandwidths, ARD)
return kern(input_dim, [part])
def independent_outputs(k):
"""
Construct a kernel with independent outputs from an existing kernel
"""
for sl in k.input_slices:
assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
_parts = [parts.independent_outputs.IndependentOutputs(p) for p in k.parts]
return kern(k.input_dim+1,_parts)
def hierarchical(k):
"""
TODO THis can't be right! Construct a kernel with independent outputs from an existing kernel
"""
# for sl in k.input_slices:
# assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
_parts = [parts.hierarchical.Hierarchical(k.parts)]
return kern(k.input_dim+len(k.parts),_parts)

669
GPy/kern/kern.py Normal file
View file

@ -0,0 +1,669 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..core.parameterized import Parameterized
from parts.kernpart import Kernpart
import itertools
from parts.prod import Prod as prod
from matplotlib.transforms import offset_copy
class kern(Parameterized):
def __init__(self, input_dim, parts=[], input_slices=None):
"""
This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
The technical code for kernels is divided into _parts_ (see
e.g. rbf.py). This object contains a list of parts, which are
computed additively. For multiplication, special _prod_ parts
are used.
:param input_dim: The dimensionality of the kernel's input space
:type input_dim: int
:param parts: the 'parts' (PD functions) of the kernel
:type parts: list of Kernpart objects
:param input_slices: the slices on the inputs which apply to each kernel
:type input_slices: list of slice objects, or list of bools
"""
self.parts = parts
self.Nparts = len(parts)
self.num_params = sum([p.num_params for p in self.parts])
self.input_dim = input_dim
# deal with input_slices
if input_slices is None:
self.input_slices = [slice(None) for p in self.parts]
else:
assert len(input_slices) == len(self.parts)
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
for p in self.parts:
assert isinstance(p, Kernpart), "bad kernel part"
self.compute_param_slices()
Parameterized.__init__(self)
def getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return Parameterized.getstate(self) + [self.parts,
self.Nparts,
self.num_params,
self.input_dim,
self.input_slices,
self.param_slices
]
def setstate(self, state):
self.param_slices = state.pop()
self.input_slices = state.pop()
self.input_dim = state.pop()
self.num_params = state.pop()
self.Nparts = state.pop()
self.parts = state.pop()
Parameterized.setstate(self, state)
def plot_ARD(self, fignum=None, ax=None, title='', legend=False):
"""If an ARD kernel is present, it bar-plots the ARD parameters,
:param fignum: figure number of the plot
:param ax: matplotlib axis to plot on
:param title:
title of the plot,
pass '' to not print a title
pass None for a generic title
"""
if ax is None:
fig = pb.figure(fignum)
ax = fig.add_subplot(111)
else:
fig = ax.figure
from GPy.util import Tango
from matplotlib.textpath import TextPath
Tango.reset()
xticklabels = []
bars = []
x0 = 0
for p in self.parts:
c = Tango.nextMedium()
if hasattr(p, 'ARD') and p.ARD:
if title is None:
ax.set_title('ARD parameters, %s kernel' % p.name)
else:
ax.set_title(title)
if p.name == 'linear':
ard_params = p.variances
else:
ard_params = 1. / p.lengthscale
x = np.arange(x0, x0 + len(ard_params))
bars.append(ax.bar(x, ard_params, align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name))
xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))])
x0 += len(ard_params)
x = np.arange(x0)
transOffset = offset_copy(ax.transData, fig=fig,
x=0., y= -2., units='points')
transOffsetUp = offset_copy(ax.transData, fig=fig,
x=0., y=1., units='points')
for bar in bars:
for patch, num in zip(bar.patches, np.arange(len(bar.patches))):
height = patch.get_height()
xi = patch.get_x() + patch.get_width() / 2.
va = 'top'
c = 'w'
t = TextPath((0, 0), "${xi}$".format(xi=xi), rotation=0, usetex=True, ha='center')
transform = transOffset
if patch.get_extents().height <= t.get_extents().height + 3:
va = 'bottom'
c = 'k'
transform = transOffsetUp
ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va, transform=transform)
# for xi, t in zip(x, xticklabels):
# ax.text(xi, maxi / 2, t, rotation=90, ha='center', va='center')
# ax.set_xticklabels(xticklabels, rotation=17)
ax.set_xticks([])
ax.set_xlim(-.5, x0 - .5)
if legend:
if title is '':
mode = 'expand'
if len(bars) > 1:
mode = 'expand'
ax.legend(bbox_to_anchor=(0., 1.02, 1., 1.02), loc=3,
ncol=len(bars), mode=mode, borderaxespad=0.)
fig.tight_layout(rect=(0, 0, 1, .9))
else:
ax.legend()
return ax
def _transform_gradients(self, g):
x = self._get_params()
[np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
if len(self.tied_indices) or len(self.fixed_indices):
to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
return np.delete(g, to_remove)
else:
return g
def compute_param_slices(self):
"""create a set of slices that can index the parameters of each part."""
self.param_slices = []
count = 0
for p in self.parts:
self.param_slices.append(slice(count, count + p.num_params))
count += p.num_params
def __add__(self, other):
"""
Shortcut for `add`.
"""
return self.add(other)
def add(self, other, tensor=False):
"""
Add another kernel to this one. Both kernels are defined on the same _space_
:param other: the other kernel to be added
:type other: GPy.kern
"""
if tensor:
D = self.input_dim + other.input_dim
self_input_slices = [slice(*sl.indices(self.input_dim)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices]
other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices]
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.constraints = self.constraints + other.constraints
newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
else:
assert self.input_dim == other.input_dim
newkern = kern(self.input_dim, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
return newkern
def __mul__(self, other):
"""
Shortcut for `prod`.
"""
return self.prod(other)
def __pow__(self, other, tensor=False):
"""
Shortcut for tensor `prod`.
"""
return self.prod(other, tensor=True)
def prod(self, other, tensor=False):
"""
multiply two kernels (either on the same space, or on the tensor product of the input space).
:param other: the other kernel to be added
:type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool
"""
K1 = self.copy()
K2 = other.copy()
slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.input_dim, [False] * K2.input_dim
s1[sl1], s2[sl2] = [True], [True]
slices += [s1 + s2]
newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
if tensor:
newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices)
else:
newkern = kern(K1.input_dim, newkernparts, slices)
newkern._follow_constrains(K1, K2)
return newkern
def _follow_constrains(self, K1, K2):
# Build the array that allows to go from the initial indices of the param to the new ones
K1_param = []
n = 0
for k1 in K1.parts:
K1_param += [range(n, n + k1.num_params)]
n += k1.num_params
n = 0
K2_param = []
for k2 in K2.parts:
K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)]
n += k2.num_params
index_param = []
for p1 in K1_param:
for p2 in K2_param:
index_param += p1 + p2
index_param = np.array(index_param)
# Get the ties and constrains of the kernels before the multiplication
prev_ties = K1.tied_indices + [arr + K1.num_params for arr in K2.tied_indices]
prev_constr_ind = [K1.constrained_indices] + [K1.num_params + i for i in K2.constrained_indices]
prev_constr = K1.constraints + K2.constraints
# prev_constr_fix = K1.fixed_indices + [arr + K1.num_params for arr in K2.fixed_indices]
# prev_constr_fix_values = K1.fixed_values + K2.fixed_values
# follow the previous ties
for arr in prev_ties:
for j in arr:
index_param[np.where(index_param == j)[0]] = arr[0]
# ties and constrains
for i in range(K1.num_params + K2.num_params):
index = np.where(index_param == i)[0]
if index.size > 1:
self.tie_params(index)
for i, t in zip(prev_constr_ind, prev_constr):
self.constrain(np.where(index_param == i)[0], t)
def _get_params(self):
return np.hstack([p._get_params() for p in self.parts])
def _set_params(self, x):
[p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)]
def _get_param_names(self):
# this is a bit nasty: we want to distinguish between parts with the same name by appending a count
part_names = np.array([k.name for k in self.parts], dtype=np.str)
counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)]
cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)]
names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)]
return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], [])
def K(self, X, X2=None, which_parts='all'):
if which_parts == 'all':
which_parts = [True] * self.Nparts
assert X.shape[1] == self.input_dim
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
else:
target = np.zeros((X.shape[0], X2.shape[0]))
[p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
return target
def dK_dtheta(self, dL_dK, X, X2=None):
"""
Compute the gradient of the covariance function with respect to the parameters.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: Np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)
"""
assert X.shape[1] == self.input_dim
target = np.zeros(self.num_params)
if X2 is None:
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
else:
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
return self._transform_gradients(target)
def dK_dX(self, dL_dK, X, X2=None):
"""Compute the gradient of the covariance function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
if X2 is None:
X2 = X
target = np.zeros_like(X)
if X2 is None:
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
else:
[p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target
def Kdiag(self, X, which_parts='all'):
"""Compute the diagonal of the covariance function for inputs X."""
if which_parts == 'all':
which_parts = [True] * self.Nparts
assert X.shape[1] == self.input_dim
target = np.zeros(X.shape[0])
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on]
return target
def dKdiag_dtheta(self, dL_dKdiag, X):
"""Compute the gradient of the diagonal of the covariance function with respect to the parameters."""
assert X.shape[1] == self.input_dim
assert dL_dKdiag.size == X.shape[0]
target = np.zeros(self.num_params)
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
return self._transform_gradients(target)
def dKdiag_dX(self, dL_dKdiag, X):
assert X.shape[1] == self.input_dim
target = np.zeros_like(X)
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target
def psi0(self, Z, mu, S):
target = np.zeros(mu.shape[0])
[p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
return target
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
target = np.zeros(self.num_params)
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
return self._transform_gradients(target)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
[p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target_mu, target_S
def psi1(self, Z, mu, S):
target = np.zeros((mu.shape[0], Z.shape[0]))
[p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
return target
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
target = np.zeros((self.num_params))
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
return self._transform_gradients(target)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
"""return shapes are num_samples,num_inducing,input_dim"""
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target_mu, target_S
def psi2(self, Z, mu, S):
"""
Computer the psi2 statistics for the covariance function.
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
"""
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
# compute the "cross" terms
# TODO: input_slices needed
crossterms = 0
for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self.parts, self.input_slices), 2):
if i_s1 == i_s2:
# TODO psi1 this must be faster/better/precached/more nice
tmp1 = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1)
tmp2 = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2)
prod = np.multiply(tmp1, tmp2)
crossterms += prod[:, :, None] + prod[:, None, :]
# target += crossterms
return target + crossterms
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
"""Gradient of the psi2 statistics with respect to the parameters."""
target = np.zeros(self.num_params)
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
# compute the "cross" terms
# TODO: better looping, input_slices
for i1, i2 in itertools.permutations(range(len(self.parts)), 2):
p1, p2 = self.parts[i1], self.parts[i2]
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2])
return self._transform_gradients(target)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
# target *= 2
# compute the "cross" terms
# TODO: we need input_slices here.
for p1, p2 in itertools.permutations(self.parts, 2):
if p1.name == 'linear' and p2.name == 'linear':
raise NotImplementedError("We don't handle linear/linear cross-terms")
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target)
return target * 2
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
# compute the "cross" terms
# TODO: we need input_slices here.
for p1, p2 in itertools.permutations(self.parts, 2):
if p1.name == 'linear' and p2.name == 'linear':
raise NotImplementedError("We don't handle linear/linear cross-terms")
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S)
return target_mu, target_S
def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
if which_parts == 'all':
which_parts = [True] * self.Nparts
if self.input_dim == 1:
if x is None:
x = np.zeros((1, 1))
else:
x = np.asarray(x)
assert x.size == 1, "The size of the fixed variable x is not 1"
x = x.reshape((1, 1))
if plot_limits == None:
xmin, xmax = (x - 5).flatten(), (x + 5).flatten()
elif len(plot_limits) == 2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
Kx = self.K(Xnew, x, which_parts)
pb.plot(Xnew, Kx, *args, **kwargs)
pb.xlim(xmin, xmax)
pb.xlabel("x")
pb.ylabel("k(x,%0.1f)" % x)
elif self.input_dim == 2:
if x is None:
x = np.zeros((1, 2))
else:
x = np.asarray(x)
assert x.size == 2, "The size of the fixed variable x is not 2"
x = x.reshape((1, 2))
if plot_limits == None:
xmin, xmax = (x - 5).flatten(), (x + 5).flatten()
elif len(plot_limits) == 2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
resolution = resolution or 51
xx, yy = np.mgrid[xmin[0]:xmax[0]:1j * resolution, xmin[1]:xmax[1]:1j * resolution]
xg = np.linspace(xmin[0], xmax[0], resolution)
yg = np.linspace(xmin[1], xmax[1], resolution)
Xnew = np.vstack((xx.flatten(), yy.flatten())).T
Kx = self.K(Xnew, x, which_parts)
Kx = Kx.reshape(resolution, resolution).T
pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) # @UndefinedVariable
pb.xlim(xmin[0], xmax[0])
pb.ylim(xmin[1], xmax[1])
pb.xlabel("x1")
pb.ylabel("x2")
pb.title("k(x1,x2 ; %0.1f,%0.1f)" % (x[0, 0], x[0, 1]))
else:
raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions"
from GPy.core.model import Model
class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
num_samples = 20
num_samples2 = 10
if kernel==None:
kernel = GPy.kern.rbf(1)
if X==None:
X = np.random.randn(num_samples, kernel.input_dim)
if dL_dK==None:
if X2==None:
dL_dK = np.ones((X.shape[0], X.shape[0]))
else:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel
self.X = X
self.X2 = X2
self.dL_dK = dL_dK
#self.constrained_indices=[]
#self.constraints=[]
Model.__init__(self)
def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<0):
return False
else:
return True
def _get_params(self):
return self.kernel._get_params()
def _get_param_names(self):
return self.kernel._get_param_names()
def _set_params(self, x):
self.kernel._set_params(x)
def log_likelihood(self):
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
class Kern_check_dK_dtheta(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel.dK_dtheta(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model):
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
def __init__(self, kernel=None, dL_dK=None, X=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel.dK_dX(self.dL_dK, self.X, self.X2).flatten()
def _get_param_names(self):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)
class Kern_check_dKdiag_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
def _get_param_names(self):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)
def kern_test(kern, X=None, X2=None, verbose=False):
"""This function runs on kernels to check the correctness of their implementation. It checks that the covariance function is positive definite for a randomly generated data set.
:param kern: the kernel to be tested.
:type kern: GPy.kern.Kernpart
:param X: X input values to test the covariance function.
:type X: ndarray
:param X2: X2 input values to test the covariance function.
:type X2: ndarray
"""
if X==None:
X = np.random.randn(10, kern.input_dim)
if X2==None:
X2 = np.random.randn(20, kern.input_dim)
result = [Kern_check_model(kern, X=X).is_positive_definite(),
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose),
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose),
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose),
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose),
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)]
# Need to check
#Kern_check_dK_dX(kern, X, X2=None).checkgrad(verbose=verbose)]
# but currently I think these aren't implemented.
return np.all(result)

View file

@ -0,0 +1,65 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
def theta(x):
"""Heavisdie step function"""
return np.where(x>=0.,1.,0.)
class Brownian(Kernpart):
"""
Brownian Motion kernel.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance:
:type variance: float
"""
def __init__(self,input_dim,variance=1.):
self.input_dim = input_dim
assert self.input_dim==1, "Brownian motion in 1D only"
self.num_params = 1
self.name = 'Brownian'
self._set_params(np.array([variance]).flatten())
def _get_params(self):
return self.variance
def _set_params(self,x):
assert x.shape==(1,)
self.variance = x
def _get_param_names(self):
return ['variance']
def K(self,X,X2,target):
if X2 is None:
X2 = X
target += self.variance*np.fmin(X,X2.T)
def Kdiag(self,X,target):
target += self.variance*X.flatten()
def dK_dtheta(self,dL_dK,X,X2,target):
if X2 is None:
X2 = X
target += np.sum(np.fmin(X,X2.T)*dL_dK)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.dot(X.flatten(), dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target):
raise NotImplementedError, "TODO"
#target += self.variance
#target -= self.variance*theta(X-X2.T)
#if X.shape==X2.shape:
#if np.all(X==X2):
#np.add(target[:,:,0],self.variance*np.diag(X2.flatten()-X.flatten()),target[:,:,0])
def dKdiag_dX(self,dL_dKdiag,X,target):
target += self.variance*dL_dKdiag[:,None]

135
GPy/kern/parts/Matern32.py Normal file
View file

@ -0,0 +1,135 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from scipy import integrate
class Matern32(Kernpart):
"""
Matern 3/2 kernel:
.. math::
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if ARD == False:
self.num_params = 2
self.name = 'Mat32'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
self.name = 'Mat32'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.input_dim)
self._set_params(np.hstack((variance, lengthscale.flatten())))
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance, self.lengthscale))
def _set_params(self, x):
"""set the value of the parameters."""
assert x.size == self.num_params
self.variance = x[0]
self.lengthscale = x[1:]
def _get_param_names(self):
"""return parameter names."""
if self.num_params == 2:
return ['variance', 'lengthscale']
else:
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
def K(self, X, X2, target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
np.add(self.variance * (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist), target, target)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
dvar = (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist)
invdist = 1. / np.where(dist != 0., dist, np.inf)
dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3
# dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar * dL_dK)
if self.ARD == True:
dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist))[:, :, np.newaxis] * dist2M * invdist[:, :, np.newaxis]
# dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0)
else:
dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist)) * dist2M.sum(-1) * invdist
# dl = self.variance*dvar*dist2M.sum(-1)*invdist
target[1] += np.sum(dl * dL_dK)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(dL_dKdiag)
def dK_dX(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
def Gram_matrix(self, F, F1, F2, lower, upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x, i):
return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x))
n = F.shape[0]
G = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
Flower = np.array([f(lower) for f in F])[:, None]
F1lower = np.array([f(lower) for f in F1])[:, None]
# print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
# return(G)
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))

142
GPy/kern/parts/Matern52.py Normal file
View file

@ -0,0 +1,142 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
import hashlib
from scipy import integrate
class Matern52(Kernpart):
"""
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self,input_dim,variance=1.,lengthscale=None,ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if ARD == False:
self.num_params = 2
self.name = 'Mat52'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
self.name = 'Mat52'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.input_dim)
self._set_params(np.hstack((variance,lengthscale.flatten())))
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale))
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size == self.num_params
self.variance = x[0]
self.lengthscale = x[1:]
def _get_param_names(self):
"""return parameter names."""
if self.num_params == 2:
return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target)
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar*dL_dK)
if self.ARD:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
else:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
target[1] += np.sum(dl*dL_dK)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
def Gram_matrix(self,F,F1,F2,F3,lower,upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param F3: vector of third derivatives of F
:type F3: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x,i):
return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x))
n = F.shape[0]
G = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5))
Flower = np.array([f(lower) for f in F])[:,None]
F1lower = np.array([f(lower) for f in F1])[:,None]
F2lower = np.array([f(lower) for f in F2])[:,None]
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T)
orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
return(1./self.variance* (G_coef*G + orig + orig2))

View file

@ -0,0 +1,26 @@
import bias
import Brownian
import coregionalise
import exponential
import finite_dimensional
import fixed
import gibbs
import independent_outputs
import linear
import Matern32
import Matern52
import mlp
import periodic_exponential
import periodic_Matern32
import periodic_Matern52
import poly
import prod_orthogonal
import prod
import rational_quadratic
import rbfcos
import rbf
import spline
import symmetric
import white
import hierarchical
import rbf_inv

89
GPy/kern/parts/bias.py Normal file
View file

@ -0,0 +1,89 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
import hashlib
class Bias(Kernpart):
def __init__(self,input_dim,variance=1.):
"""
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
self.input_dim = input_dim
self.num_params = 1
self.name = 'bias'
self._set_params(np.array([variance]).flatten())
def _get_params(self):
return self.variance
def _set_params(self,x):
assert x.shape==(1,)
self.variance = x
def _get_param_names(self):
return ['variance']
def K(self,X,X2,target):
target += self.variance
def Kdiag(self,X,target):
target += self.variance
def dK_dtheta(self,dL_dKdiag,X,X2,target):
target += dL_dKdiag.sum()
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += dL_dKdiag.sum()
def dK_dX(self, dL_dK,X, X2, target):
pass
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, mu, S, target):
target += self.variance
def psi1(self, Z, mu, S, target):
self._psi1 = self.variance
target += self._psi1
def psi2(self, Z, mu, S, target):
target += self.variance**2
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
target += dL_dpsi0.sum()
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
target += dL_dpsi1.sum()
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
target += 2.*self.variance*dL_dpsi2.sum()
def dpsi0_dZ(self, dL_dpsi0, Z, mu, S, target):
pass
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
pass
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
pass
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
pass
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
pass

View file

@ -0,0 +1,160 @@
# Copyright (c) 2012, James Hensman and Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from GPy.util.linalg import mdot, pdinv
import pdb
from scipy import weave
class Coregionalise(Kernpart):
"""
Coregionalisation kernel.
Used for computing covariance functions of the form
.. math::
k_2(x, y)=B k(x, y)
where
.. math::
B = WW^\top + diag(kappa)
:param output_dim: the number of output dimensions
:type output_dim: int
:param rank: the rank of the coregionalisation matrix.
:type rank: int
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B.
:type W: ndarray
:param kappa: a diagonal term which allows the outputs to behave independently.
:rtype: kernel object
.. Note: see coregionalisation examples in GPy.examples.regression for some usage.
"""
def __init__(self,output_dim,rank=1, W=None, kappa=None):
self.input_dim = 1
self.name = 'coregion'
self.output_dim = output_dim
self.rank = rank
if W is None:
self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
else:
assert W.shape==(self.output_dim,self.rank)
self.W = W
if kappa is None:
kappa = 0.5*np.ones(self.output_dim)
else:
assert kappa.shape==(self.output_dim,)
self.kappa = kappa
self.num_params = self.output_dim*(self.rank + 1)
self._set_params(np.hstack([self.W.flatten(),self.kappa]))
def _get_params(self):
return np.hstack([self.W.flatten(),self.kappa])
def _set_params(self,x):
assert x.size == self.num_params
self.kappa = x[-self.output_dim:]
self.W = x[:-self.output_dim].reshape(self.output_dim,self.rank)
self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa)
def _get_param_names(self):
return sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[]) + ['kappa_%i'%i for i in range(self.output_dim)]
def K(self,index,index2,target):
index = np.asarray(index,dtype=np.int)
#here's the old code (numpy)
#if index2 is None:
#index2 = index
#else:
#index2 = np.asarray(index2,dtype=np.int)
#false_target = target.copy()
#ii,jj = np.meshgrid(index,index2)
#ii,jj = ii.T, jj.T
#false_target += self.B[ii,jj]
if index2 is None:
code="""
for(int i=0;i<N; i++){
target[i+i*N] += B[index[i]+output_dim*index[i]];
for(int j=0; j<i; j++){
target[j+i*N] += B[index[i]+output_dim*index[j]];
target[i+j*N] += target[j+i*N];
}
}
"""
N,B,output_dim = index.size, self.B, self.output_dim
weave.inline(code,['target','index','N','B','output_dim'])
else:
index2 = np.asarray(index2,dtype=np.int)
code="""
for(int i=0;i<num_inducing; i++){
for(int j=0; j<N; j++){
target[i+j*num_inducing] += B[output_dim*index[j]+index2[i]];
}
}
"""
N,num_inducing,B,output_dim = index.size,index2.size, self.B, self.output_dim
weave.inline(code,['target','index','index2','N','num_inducing','B','output_dim'])
def Kdiag(self,index,target):
target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
def dK_dtheta(self,dL_dK,index,index2,target):
index = np.asarray(index,dtype=np.int)
dL_dK_small = np.zeros_like(self.B)
if index2 is None:
index2 = index
else:
index2 = np.asarray(index2,dtype=np.int)
code="""
for(int i=0; i<num_inducing; i++){
for(int j=0; j<N; j++){
dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
}
}
"""
N, num_inducing, output_dim = index.size, index2.size, self.output_dim
weave.inline(code, ['N','num_inducing','output_dim','dL_dK','dL_dK_small','index','index2'])
dkappa = np.diag(dL_dK_small)
dL_dK_small += dL_dK_small.T
dW = (self.W[:,None,:]*dL_dK_small[:,:,None]).sum(0)
target += np.hstack([dW.flatten(),dkappa])
def dK_dtheta_old(self,dL_dK,index,index2,target):
if index2 is None:
index2 = index
else:
index2 = np.asarray(index2,dtype=np.int)
ii,jj = np.meshgrid(index,index2)
ii,jj = ii.T, jj.T
dL_dK_small = np.zeros_like(self.B)
for i in range(self.output_dim):
for j in range(self.output_dim):
tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
dL_dK_small[i,j] = tmp
dkappa = np.diag(dL_dK_small)
dL_dK_small += dL_dK_small.T
dW = (self.W[:,None,:]*dL_dK_small[:,:,None]).sum(0)
target += np.hstack([dW.flatten(),dkappa])
def dKdiag_dtheta(self,dL_dKdiag,index,target):
index = np.asarray(index,dtype=np.int).flatten()
dL_dKdiag_small = np.zeros(self.output_dim)
for i in range(self.output_dim):
dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
dW = 2.*self.W*dL_dKdiag_small[:,None]
dkappa = dL_dKdiag_small
target += np.hstack([dW.flatten(),dkappa])
def dK_dX(self,dL_dK,X,X2,target):
pass

View file

@ -0,0 +1,127 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from scipy import integrate
class Exponential(Kernpart):
"""
Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2)
.. math::
k(r) = \sigma^2 \exp(- r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if ARD == False:
self.num_params = 2
self.name = 'exp'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
self.name = 'exp'
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.input_dim)
self._set_params(np.hstack((variance, lengthscale.flatten())))
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance, self.lengthscale))
def _set_params(self, x):
"""set the value of the parameters."""
assert x.size == self.num_params
self.variance = x[0]
self.lengthscale = x[1:]
def _get_param_names(self):
"""return parameter names."""
if self.num_params == 2:
return ['variance', 'lengthscale']
else:
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
def K(self, X, X2, target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
np.add(self.variance * np.exp(-dist), target, target)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
invdist = 1. / np.where(dist != 0., dist, np.inf)
dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3
dvar = np.exp(-dist)
target[0] += np.sum(dvar * dL_dK)
if self.ARD == True:
dl = self.variance * dvar[:, :, None] * dist2M * invdist[:, :, None]
target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0)
else:
dl = self.variance * dvar * dist2M.sum(-1) * invdist
target[1] += np.sum(dl * dL_dK)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
# NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(dL_dKdiag)
def dK_dX(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
def Gram_matrix(self, F, F1, lower, upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x, i):
return(1. / self.lengthscale * F[i](x) + F1[i](x))
n = F.shape[0]
G = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
Flower = np.array([f(lower) for f in F])[:, None]
return(self.lengthscale / 2. / self.variance * G + 1. / self.variance * np.dot(Flower, Flower.T))

View file

@ -0,0 +1,74 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from ...util.linalg import pdinv,mdot
class FiniteDimensional(Kernpart):
def __init__(self, input_dim, F, G, variance=1., weights=None):
"""
Argumnents
----------
input_dim: int - the number of input dimensions
F: np.array of functions with shape (n,) - the n basis functions
G: np.array with shape (n,n) - the Gram matrix associated to F
weights : np.ndarray with shape (n,)
"""
self.input_dim = input_dim
self.F = F
self.G = G
self.G_1 ,L,Li,logdet = pdinv(G)
self.n = F.shape[0]
if weights is not None:
assert weights.shape==(self.n,)
else:
weights = np.ones(self.n)
self.num_params = self.n + 1
self.name = 'finite_dim'
self._set_params(np.hstack((variance,weights)))
def _get_params(self):
return np.hstack((self.variance,self.weights))
def _set_params(self,x):
assert x.size == (self.num_params)
self.variance = x[0]
self.weights = x[1:]
def _get_param_names(self):
if self.n==1:
return ['variance','weight']
else:
return ['variance']+['weight_%i'%i for i in range(self.weights.size)]
def K(self,X,X2,target):
if X2 is None: X2 = X
FX = np.column_stack([f(X) for f in self.F])
FX2 = np.column_stack([f(X2) for f in self.F])
product = self.variance * mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
np.add(product,target,target)
def Kdiag(self,X,target):
product = np.diag(self.K(X, X))
np.add(target,product,target)
def dK_dtheta(self,X,X2,target):
"""Return shape is NxMx(Ntheta)"""
if X2 is None: X2 = X
FX = np.column_stack([f(X) for f in self.F])
FX2 = np.column_stack([f(X2) for f in self.F])
DER = np.zeros((self.n,self.n,self.n))
for i in range(self.n):
DER[i,i,i] = np.sqrt(self.weights[i])
dw = self.variance * mdot(FX,DER,self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
dv = mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
np.add(target[:,:,0],np.transpose(dv,(0,2,1)), target[:,:,0])
np.add(target[:,:,1:],np.transpose(dw,(0,2,1)), target[:,:,1:])
def dKdiag_dtheta(self,X,target):
np.add(target[:,0],1.,target[:,0])

41
GPy/kern/parts/fixed.py Normal file
View file

@ -0,0 +1,41 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
class Fixed(Kernpart):
def __init__(self, input_dim, K, variance=1.):
"""
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
self.input_dim = input_dim
self.fixed_K = K
self.num_params = 1
self.name = 'fixed'
self._set_params(np.array([variance]).flatten())
def _get_params(self):
return self.variance
def _set_params(self, x):
assert x.shape == (1,)
self.variance = x
def _get_param_names(self):
return ['variance']
def K(self, X, X2, target):
target += self.variance * self.fixed_K
def dK_dtheta(self, partial, X, X2, target):
target += (partial * self.fixed_K).sum()
def dK_dX(self, partial, X, X2, target):
pass
def dKdiag_dX(self, partial, X, target):
pass

135
GPy/kern/parts/gibbs.py Normal file
View file

@ -0,0 +1,135 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from ...util.linalg import tdot
from ...core.mapping import Mapping
import GPy
class Gibbs(Kernpart):
"""
Gibbs and MacKay non-stationary covariance function.
.. math::
r = sqrt((x_i - x_j)'*(x_i - x_j))
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
Z = (2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')^{q/2}
where :math:`l(x)` is a function giving the length scale as a function of space and :math:`q` is the dimensionality of the input space.
This is the non stationary kernel proposed by Mark Gibbs in his 1997
thesis. It is similar to an RBF but has a length scale that varies
with input location. This leads to an additional term in front of
the kernel.
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
:type mapping: GPy.core.Mapping
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
"""
def __init__(self, input_dim, variance=1., mapping=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not mapping:
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
if not ARD:
self.num_params=1+mapping.num_params
else:
raise NotImplementedError
self.mapping = mapping
self.name='gibbs'
self._set_params(np.hstack((variance, self.mapping._get_params())))
def _get_params(self):
return np.hstack((self.variance, self.mapping._get_params()))
def _set_params(self, x):
assert x.size == (self.num_params)
self.variance = x[0]
self.mapping._set_params(x[1:])
def _get_param_names(self):
return ['variance'] + self.mapping._get_param_names()
def K(self, X, X2, target):
"""Return covariance between X and X2."""
self._K_computations(X, X2)
target += self.variance*self._K_dvar
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
self._dK_computations(dL_dK)
if X2==None:
gmapping = self.mapping.df_dtheta(2*self._dL_dl[:, None], X)
else:
gmapping = self.mapping.df_dtheta(self._dL_dl[:, None], X)
gmapping += self.mapping.df_dtheta(self._dL_dl_two[:, None], X2)
target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping])
def dK_dX(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X."""
# First account for gradients arising from presence of X in exponent.
self._K_computations(X, X2)
_K_dist = X[:, None, :] - X2[None, :, :]
dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
target += np.sum(dK_dX*dL_dK.T[:, :, None], 0)
# Now account for gradients arising from presence of X in lengthscale.
self._dK_computations(dL_dK)
target += self.mapping.df_dX(self._dL_dl[:, None], X)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X."""
pass
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to parameters."""
pass
def _K_computations(self, X, X2=None):
"""Pre-computations for the covariance function (used both when computing the covariance and its gradients). Here self._dK_dvar and self._K_dist2 are updated."""
self._lengthscales=self.mapping.f(X)
self._lengthscales2=np.square(self._lengthscales)
if X2==None:
self._lengthscales_two = self._lengthscales
self._lengthscales_two2 = self._lengthscales2
Xsquare = np.square(X).sum(1)
self._K_dist2 = -2.*tdot(X) + Xsquare[:, None] + Xsquare[None, :]
else:
self._lengthscales_two = self.mapping.f(X2)
self._lengthscales_two2 = np.square(self._lengthscales_two)
self._K_dist2 = -2.*np.dot(X, X2.T) + np.square(X).sum(1)[:, None] + np.square(X2).sum(1)[None, :]
self._w2 = self._lengthscales2 + self._lengthscales_two2.T
prod_length = self._lengthscales*self._lengthscales_two.T
self._K_exponential = np.exp(-self._K_dist2/self._w2)
self._K_dvar = np.sign(prod_length)*(2*np.abs(prod_length)/self._w2)**(self.input_dim/2.)*np.exp(-self._K_dist2/self._w2)
def _dK_computations(self, dL_dK):
"""Pre-computations for the gradients of the covaraince function. Here the gradient of the covariance with respect to all the individual lengthscales is computed.
:param dL_dK: the gradient of the objective with respect to the covariance function.
:type dL_dK: ndarray"""
self._dL_dl = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales_two.T**4 - self._lengthscales**4) + 2*self._lengthscales2*self._K_dist2)/(self._w2*self._w2*self._lengthscales)).sum(1)
if self._lengthscales_two is self._lengthscales:
self._dL_dl_two = None
else:
self._dL_dl_two = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales**4 - self._lengthscales_two.T**4 ) + 2*self._lengthscales_two2.T*self._K_dist2)/(self._w2*self._w2*self._lengthscales_two.T)).sum(0)

View file

@ -0,0 +1,76 @@
# Copyright (c) 2012, James Hesnsman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from independent_outputs import index_to_slices
class Hierarchical(Kernpart):
"""
A kernel part which can reopresent a hierarchy of indepencnce: a gerenalisation of independent_outputs
"""
def __init__(self,parts):
self.levels = len(parts)
self.input_dim = parts[0].input_dim + 1
self.num_params = np.sum([k.num_params for k in parts])
self.name = 'hierarchy'
self.parts = parts
self.param_starts = np.hstack((0,np.cumsum([k.num_params for k in self.parts[:-1]])))
self.param_stops = np.cumsum([k.num_params for k in self.parts])
def _get_params(self):
return np.hstack([k._get_params() for k in self.parts])
def _set_params(self,x):
[k._set_params(x[start:stop]) for k, start, stop in zip(self.parts, self.param_starts, self.param_stops)]
def _get_param_names(self):
return sum([[str(i)+'_'+k.name+'_'+n for n in k._get_param_names()] for i,k in enumerate(self.parts)],[])
def _sort_slices(self,X,X2):
slices = [index_to_slices(x) for x in X[:,-self.levels:].T]
X = X[:,:-self.levels]
if X2 is None:
slices2 = slices
X2 = X
else:
slices2 = [index_to_slices(x) for x in X2[:,-self.levels:].T]
X2 = X2[:,:-self.levels]
return X, X2, slices, slices2
def K(self,X,X2,target):
X, X2, slices, slices2 = self._sort_slices(X,X2)
[[[[k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_,slices2_)] for k, slices_, slices2_ in zip(self.parts,slices,slices2)]
def Kdiag(self,X,target):
raise NotImplementedError
#X,slices = X[:,:-1],index_to_slices(X[:,-1])
#[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
def dK_dtheta(self,dL_dK,X,X2,target):
X, X2, slices, slices2 = self._sort_slices(X,X2)
[[[[k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)]
def dK_dX(self,dL_dK,X,X2,target):
raise NotImplementedError
#X,slices = X[:,:-1],index_to_slices(X[:,-1])
#if X2 is None:
#X2,slices2 = X,slices
#else:
#X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
#[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
#
def dKdiag_dX(self,dL_dKdiag,X,target):
raise NotImplementedError
#X,slices = X[:,:-1],index_to_slices(X[:,-1])
#[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
def dKdiag_dtheta(self,dL_dKdiag,X,target):
raise NotImplementedError
#X,slices = X[:,:-1],index_to_slices(X[:,-1])
#[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]

View file

@ -0,0 +1,97 @@
# Copyright (c) 2012, James Hesnsman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
def index_to_slices(index):
"""
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
e.g.
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
returns
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
or, a more complicated example
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
returns
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
"""
#contruct the return structure
ind = np.asarray(index,dtype=np.int64)
ret = [[] for i in range(ind.max()+1)]
#find the switchpoints
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret
class IndependentOutputs(Kernpart):
"""
A kernel part shich can reopresent several independent functions.
this kernel 'switches off' parts of the matrix where the output indexes are different.
The index of the functions is given by the last column in the input X
the rest of the columns of X are passed to the kernel for computation (in blocks).
"""
def __init__(self,k):
self.input_dim = k.input_dim + 1
self.num_params = k.num_params
self.name = 'iops('+ k.name + ')'
self.k = k
def _get_params(self):
return self.k._get_params()
def _set_params(self,x):
self.k._set_params(x)
self.params = x
def _get_param_names(self):
return self.k._get_param_names()
def K(self,X,X2,target):
#Sort out the slices from the input data
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def Kdiag(self,X,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
def dK_dtheta(self,dL_dK,X,X2,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def dK_dX(self,dL_dK,X,X2,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def dKdiag_dX(self,dL_dKdiag,X,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
def dKdiag_dtheta(self,dL_dKdiag,X,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]

View file

@ -0,0 +1,76 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
class Kernpart(object):
def __init__(self,input_dim):
"""
The base class for a kernpart: a positive definite function which forms part of a kernel
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
self.input_dim = input_dim
self.num_params = 1
self.name = 'unnamed'
def _get_params(self):
raise NotImplementedError
def _set_params(self,x):
raise NotImplementedError
def _get_param_names(self):
raise NotImplementedError
def K(self,X,X2,target):
raise NotImplementedError
def Kdiag(self,X,target):
raise NotImplementedError
def dK_dtheta(self,dL_dK,X,X2,target):
raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target):
# In the base case compute this by calling dK_dtheta. Need to
# override for stationary covariances (for example) to save
# time.
for i in range(X.shape[0]):
self.dK_dtheta(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
def psi0(self,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi1(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dtheta(self,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
raise NotImplementedError
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def psi2(self,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def dK_dX(self, dL_dK, X, X2, target):
raise NotImplementedError
class Kernpart_inner(Kernpart):
def __init__(self,input_dim):
"""
The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs.
:param input_dim: the number of input dimensions to the function
:type input_dim: int
Do not instantiate.
"""
Kernpart.__init__(self, input_dim)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))

304
GPy/kern/parts/linear.py Normal file
View file

@ -0,0 +1,304 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from ...util.linalg import tdot
from ...util.misc import fast_array_equal
from scipy import weave
class Linear(Kernpart):
"""
Linear kernel
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there is only one variance parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, variances=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if ARD == False:
self.num_params = 1
self.name = 'linear'
if variances is not None:
variances = np.asarray(variances)
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else:
variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,))
else:
self.num_params = self.input_dim
self.name = 'linear'
if variances is not None:
variances = np.asarray(variances)
assert variances.size == self.input_dim, "bad number of lengthscales"
else:
variances = np.ones(self.input_dim)
self._set_params(variances.flatten())
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))
def _get_params(self):
return self.variances
def _set_params(self, x):
assert x.size == (self.num_params)
self.variances = x
self.variances2 = np.square(self.variances)
def _get_param_names(self):
if self.num_params == 1:
return ['variance']
else:
return ['variance_%i' % i for i in range(self.variances.size)]
def K(self, X, X2, target):
if self.ARD:
XX = X * np.sqrt(self.variances)
if X2 is None:
target += tdot(XX)
else:
XX2 = X2 * np.sqrt(self.variances)
target += np.dot(XX, XX2.T)
else:
self._K_computations(X, X2)
target += self.variances * self._dot_product
def Kdiag(self, X, target):
np.add(target, np.sum(self.variances * np.square(X), -1), target)
def dK_dtheta(self, dL_dK, X, X2, target):
if self.ARD:
if X2 is None:
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)]
else:
product = X[:, None, :] * X2[None, :, :]
target += (dL_dK[:, :, None] * product).sum(0).sum(0)
else:
self._K_computations(X, X2)
target += np.sum(self._dot_product * dL_dK)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
target += tmp.sum(0)
else:
target += tmp.sum()
def dK_dX(self, dL_dK, X, X2, target):
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self,dL_dKdiag,X,target):
target += 2.*self.variances*dL_dKdiag[:,None]*X
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += np.sum(self.variances * self.mu2_S, 1)
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD:
target += tmp.sum(0)
else:
target += tmp.sum()
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
target_S += dL_dpsi0[:, None] * self.variances
def psi1(self, Z, mu, S, target):
"""the variance, it does nothing"""
self._psi1 = self.K(mu, Z, target)
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
"""the variance, it does nothing"""
self.dK_dtheta(dL_dpsi1, mu, Z, target)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
"""Do nothing for S, it does not affect psi1"""
self._psi_computations(Z, mu, S)
target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self.dK_dX(dL_dpsi1.T, Z, mu, target)
def psi2(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi2
def psi2_new(self,Z,mu,S,target):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp)
target += tmp[:,:,None]*tmp[:,None,:] + np.sum(S[:,None,None,:]*self.variances**2*Z[None,:,None,:]*Z[None,None,:,:],-1)
def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp)
self.dK_dtheta(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target)
result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0)
if self.ARD:
target += result.sum(0).sum(0).sum(0)
else:
target += result.sum()
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
if self.ARD:
target += tmp.sum(0).sum(0).sum(0)
else:
target += tmp.sum()
def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp)
self.dK_dX(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu)
Zs = Z*self.variances
Zs_sq = Zs[:,None,:]*Zs[None,:,:]
target_S += (dL_dpsi2[:,:,:,None]*Zs_sq[None,:,:,:]).sum(1).sum(1)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :]
AZZA = AZZA + AZZA.swapaxes(1, 2)
AZZA_2 = AZZA/2.
#muAZZA = np.tensordot(mu,AZZA,(-1,0))
#target_mu_dummy, target_S_dummy = np.zeros_like(target_mu), np.zeros_like(target_S)
#target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1)
#target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1)
#Using weave, we can exploiut the symmetry of this problem:
code = """
int n, m, mm,q,qq;
double factor,tmp;
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
for(n=0;n<N;n++){
for(m=0;m<num_inducing;m++){
for(mm=0;mm<=m;mm++){
//add in a factor of 2 for the off-diagonal terms (and then count them only once)
if(m==mm)
factor = dL_dpsi2(n,m,mm);
else
factor = 2.0*dL_dpsi2(n,m,mm);
for(q=0;q<input_dim;q++){
//take the dot product of mu[n,:] and AZZA[:,m,mm,q] TODO: blas!
tmp = 0.0;
for(qq=0;qq<input_dim;qq++){
tmp += mu(n,qq)*AZZA(qq,m,mm,q);
}
target_mu(n,q) += factor*tmp;
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
#psi2_dZ = dL_dpsi2[:, :, :, None] * self.variances * self.ZAinner[:, :, None, :]
#dummy_target = np.zeros_like(target)
#dummy_target += psi2_dZ.sum(0).sum(0)
AZA = self.variances*self.ZAinner
code="""
int n,m,mm,q;
#pragma omp parallel for private(n,mm,q)
for(m=0;m<num_inducing;m++){
for(q=0;q<input_dim;q++){
for(mm=0;mm<num_inducing;mm++){
for(n=0;n<N;n++){
target(m,q) += dL_dpsi2(n,m,mm)*AZA(n,mm,q);
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, X2):
if not (fast_array_equal(X, self._Xcache) and fast_array_equal(X2, self._X2cache)):
self._Xcache = X.copy()
if X2 is None:
self._dot_product = tdot(X)
self._X2cache = None
else:
self._X2cache = X2.copy()
self._dot_product = np.dot(X, X2.T)
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2
Zv_changed = not (fast_array_equal(Z, self._Z) and fast_array_equal(self.variances, self._variances))
muS_changed = not (fast_array_equal(mu, self._mu) and fast_array_equal(S, self._S))
if Zv_changed:
# Z has changed, compute Z specific stuff
# self.ZZ = Z[:,None,:]*Z[None,:,:] # num_inducing,num_inducing,input_dim
# self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F')
# [tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])]
self.ZA = Z * self.variances
self._Z = Z.copy()
self._variances = self.variances.copy()
if muS_changed:
self.mu2_S = np.square(mu) + S
self.inner = (mu[:, None, :] * mu[:, :, None])
diag_indices = np.diag_indices(mu.shape[1], 2)
self.inner[:, diag_indices[0], diag_indices[1]] += S
self._mu, self._S = mu.copy(), S.copy()
if Zv_changed or muS_changed:
self.ZAinner = np.dot(self.ZA, self.inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]!
self._psi2 = np.dot(self.ZAinner, self.ZA.T)

155
GPy/kern/parts/mlp.py Normal file
View file

@ -0,0 +1,155 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
four_over_tau = 2./np.pi
class MLP(Kernpart):
"""
multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
.. math::
k(x,y) = \sigma^2 \frac{2}{\pi} \text{asin} \left(\frac{\sigma_w^2 x^\top y+\sigma_b^2}{\sqrt{\sigma_w^2x^\top x + \sigma_b^2 + 1}\sqrt{\sigma_w^2 y^\top y \sigma_b^2 +1}} \right)
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
"""
def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=100., ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not ARD:
self.num_params=3
if weight_variance is not None:
weight_variance = np.asarray(weight_variance)
assert weight_variance.size == 1, "Only one weight variance needed for non-ARD kernel"
else:
weight_variance = 100.*np.ones(1)
else:
self.num_params = self.input_dim + 2
if weight_variance is not None:
weight_variance = np.asarray(weight_variance)
assert weight_variance.size == self.input_dim, "bad number of weight variances"
else:
weight_variance = np.ones(self.input_dim)
raise NotImplementedError
self.name='mlp'
self._set_params(np.hstack((variance, weight_variance.flatten(), bias_variance)))
def _get_params(self):
return np.hstack((self.variance, self.weight_variance.flatten(), self.bias_variance))
def _set_params(self, x):
assert x.size == (self.num_params)
self.variance = x[0]
self.weight_variance = x[1:-1]
self.weight_std = np.sqrt(self.weight_variance)
self.bias_variance = x[-1]
def _get_param_names(self):
if self.num_params == 3:
return ['variance', 'weight_variance', 'bias_variance']
else:
return ['variance'] + ['weight_variance_%i' % i for i in range(self.lengthscale.size)] + ['bias_variance']
def K(self, X, X2, target):
"""Return covariance between X and X2."""
self._K_computations(X, X2)
target += self.variance*self._K_dvar
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
self._K_diag_computations(X)
target+= self.variance*self._K_diag_dvar
def dK_dtheta(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
denom3 = self._K_denom*self._K_denom*self._K_denom
base = four_over_tau*self.variance/np.sqrt(1-self._K_asin_arg*self._K_asin_arg)
base_cov_grad = base*dL_dK
if X2 is None:
vec = np.diag(self._K_inner_prod)
target[1] += ((self._K_inner_prod/self._K_denom
-.5*self._K_numer/denom3
*(np.outer((self.weight_variance*vec+self.bias_variance+1.), vec)
+np.outer(vec,(self.weight_variance*vec+self.bias_variance+1.))))*base_cov_grad).sum()
target[2] += ((1./self._K_denom
-.5*self._K_numer/denom3
*((vec[None, :]+vec[:, None])*self.weight_variance
+2.*self.bias_variance + 2.))*base_cov_grad).sum()
else:
vec1 = (X*X).sum(1)
vec2 = (X2*X2).sum(1)
target[1] += ((self._K_inner_prod/self._K_denom
-.5*self._K_numer/denom3
*(np.outer((self.weight_variance*vec1+self.bias_variance+1.), vec2) + np.outer(vec1, self.weight_variance*vec2 + self.bias_variance+1.)))*base_cov_grad).sum()
target[2] += ((1./self._K_denom
-.5*self._K_numer/denom3
*((vec1[:, None]+vec2[None, :])*self.weight_variance
+ 2*self.bias_variance + 2.))*base_cov_grad).sum()
target[0] += np.sum(self._K_dvar*dL_dK)
def dK_dX(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_asin_arg
numer = self._K_numer
denom = self._K_denom
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
denom3 = denom*denom*denom
target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X"""
self._K_diag_computations(X)
arg = self._K_diag_asin_arg
denom = self._K_diag_denom
numer = self._K_diag_numer
target += four_over_tau*2.*self.weight_variance*self.variance*X*(1/denom*(1 - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None]
def _K_computations(self, X, X2):
"""Pre-computations for the covariance matrix (used for computing the covariance and its gradients."""
if self.ARD:
pass
else:
if X2 is None:
self._K_inner_prod = np.dot(X,X.T)
self._K_numer = self._K_inner_prod*self.weight_variance+self.bias_variance
vec = np.diag(self._K_numer) + 1.
self._K_denom = np.sqrt(np.outer(vec,vec))
self._K_asin_arg = self._K_numer/self._K_denom
self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg)
else:
self._K_inner_prod = np.dot(X,X2.T)
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
vec1 = (X*X).sum(1)*self.weight_variance + self.bias_variance + 1.
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
self._K_denom = np.sqrt(np.outer(vec1,vec2))
self._K_asin_arg = self._K_numer/self._K_denom
self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg)
def _K_diag_computations(self, X):
"""Pre-computations concerning the diagonal terms (used for computation of diagonal and its gradients)."""
if self.ARD:
pass
else:
self._K_diag_numer = (X*X).sum(1)*self.weight_variance + self.bias_variance
self._K_diag_denom = self._K_diag_numer+1.
self._K_diag_asin_arg = self._K_diag_numer/self._K_diag_denom
self._K_diag_dvar = four_over_tau*np.arcsin(self._K_diag_asin_arg)

View file

@ -0,0 +1,248 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from GPy.util.linalg import mdot
from GPy.util.decorators import silence_errors
class PeriodicMatern32(Kernpart):
"""
Kernel of the periodic subspace (up to a given frequency) of a Matern 3/2 RKHS. Only defined for input_dim=1.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the Matern kernel
:type variance: float
:param lengthscale: the lengthscale of the Matern kernel
:type lengthscale: np.ndarray of size (input_dim,)
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
:rtype: kernel object
"""
def __init__(self, input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
self.name = 'periodic_Mat32'
self.input_dim = input_dim
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Wrong size: only one lengthscale needed"
else:
lengthscale = np.ones(1)
self.lower,self.upper = lower, upper
self.num_params = 3
self.n_freq = n_freq
self.n_basis = 2*n_freq
self._set_params(np.hstack((variance,lengthscale,period)))
def _cos(self,alpha,omega,phase):
def f(x):
return alpha*np.cos(omega*x+phase)
return f
@silence_errors
def _cos_factorization(self,alpha,omega,phase):
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
r = np.sqrt(r1**2 + r2**2)
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi
@silence_errors
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
#Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
return Gint
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale,self.period))
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size==3
self.variance = x[0]
self.lengthscale = x[1]
self.period = x[2]
self.a = [3./self.lengthscale**2, 2*np.sqrt(3)/self.lengthscale, 1.]
self.b = [1,self.lengthscale**2/3]
self.basis_alpha = np.ones((self.n_basis,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
def _get_param_names(self):
"""return parameter names."""
return ['variance','lengthscale','period']
def Gram_matrix(self):
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
if X2 is None:
FX2 = FX
else:
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
np.add(mdot(FX,self.Gi,FX2.T), target,target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
@silence_errors
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
#dK_dlen
da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.]
db_dlen = [0.,2*self.lengthscale/3.]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T)
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
#IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
# np.add(target[:,:,0],dK_dvar, target[:,:,0])
target[0] += np.sum(dK_dvar*dL_dK)
#np.add(target[:,:,1],dK_dlen, target[:,:,1])
target[1] += np.sum(dK_dlen*dL_dK)
#np.add(target[:,:,2],dK_dper, target[:,:,2])
target[2] += np.sum(dK_dper*dL_dK)
@silence_errors
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal covariance matrix with respect to the parameters"""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2))
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX.T)
#dK_dlen
da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.]
db_dlen = [0.,2*self.lengthscale/3.]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T)
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
dK_dper = 2* mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)

View file

@ -0,0 +1,266 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from GPy.util.linalg import mdot
from GPy.util.decorators import silence_errors
class PeriodicMatern52(Kernpart):
"""
Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for input_dim=1.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the Matern kernel
:type variance: float
:param lengthscale: the lengthscale of the Matern kernel
:type lengthscale: np.ndarray of size (input_dim,)
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
:rtype: kernel object
"""
def __init__(self,input_dim=1,variance=1.,lengthscale=None,period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
self.name = 'periodic_Mat52'
self.input_dim = input_dim
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Wrong size: only one lengthscale needed"
else:
lengthscale = np.ones(1)
self.lower,self.upper = lower, upper
self.num_params = 3
self.n_freq = n_freq
self.n_basis = 2*n_freq
self._set_params(np.hstack((variance,lengthscale,period)))
def _cos(self,alpha,omega,phase):
def f(x):
return alpha*np.cos(omega*x+phase)
return f
@silence_errors
def _cos_factorization(self,alpha,omega,phase):
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
r = np.sqrt(r1**2 + r2**2)
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi
@silence_errors
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
#Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
return Gint
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale,self.period))
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size==3
self.variance = x[0]
self.lengthscale = x[1]
self.period = x[2]
self.a = [5*np.sqrt(5)/self.lengthscale**3, 15./self.lengthscale**2,3*np.sqrt(5)/self.lengthscale, 1.]
self.b = [9./8, 9*self.lengthscale**4/200., 3*self.lengthscale**2/5., 3*self.lengthscale**2/(5*8.), 3*self.lengthscale**2/(5*8.)]
self.basis_alpha = np.ones((2*self.n_freq,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
def _get_param_names(self):
"""return parameter names."""
return ['variance','lengthscale','period']
def Gram_matrix(self):
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
lower_terms = self.b[0]*np.dot(Flower,Flower.T) + self.b[1]*np.dot(F2lower,F2lower.T) + self.b[2]*np.dot(F1lower,F1lower.T) + self.b[3]*np.dot(F2lower,Flower.T) + self.b[4]*np.dot(Flower,F2lower.T)
return(3*self.lengthscale**5/(400*np.sqrt(5)*self.variance) * Gint + 1./self.variance*lower_terms)
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
if X2 is None:
FX2 = FX
else:
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
np.add(mdot(FX,self.Gi,FX2.T), target,target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
@silence_errors
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
#dK_dlen
da_dlen = [-3*self.a[0]/self.lengthscale, -2*self.a[1]/self.lengthscale, -self.a[2]/self.lengthscale, 0.]
db_dlen = [0., 4*self.b[1]/self.lengthscale, 2*self.b[2]/self.lengthscale, 2*self.b[3]/self.lengthscale, 2*self.b[4]/self.lengthscale]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)), da_dlen[1]*self.basis_omega, da_dlen[2]*self.basis_omega**2, da_dlen[3]*self.basis_omega**3))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dlower_terms_dlen = db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F2lower,F2lower.T) + db_dlen[2]*np.dot(F1lower,F1lower.T) + db_dlen[3]*np.dot(F2lower,Flower.T) + db_dlen[4]*np.dot(Flower,F2lower.T)
dG_dlen = 15*self.lengthscale**4/(400*np.sqrt(5))*Gint + 3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dlen + dlower_terms_dlen
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period, -self.a[3]*self.basis_omega**4/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2,self.basis_phi))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
#IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF2lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**3/self.period,self.basis_omega,self.basis_phi+np.pi*3/2)(self.lower) + self._cos(-2*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
dlower_terms_dper = self.b[0] * (np.dot(dFlower_dper,Flower.T) + np.dot(Flower.T,dFlower_dper))
dlower_terms_dper += self.b[1] * (np.dot(dF2lower_dper,F2lower.T) + np.dot(F2lower,dF2lower_dper.T)) - 4*self.b[1]/self.period*np.dot(F2lower,F2lower.T)
dlower_terms_dper += self.b[2] * (np.dot(dF1lower_dper,F1lower.T) + np.dot(F1lower,dF1lower_dper.T)) - 2*self.b[2]/self.period*np.dot(F1lower,F1lower.T)
dlower_terms_dper += self.b[3] * (np.dot(dF2lower_dper,Flower.T) + np.dot(F2lower,dFlower_dper.T)) - 2*self.b[3]/self.period*np.dot(F2lower,Flower.T)
dlower_terms_dper += self.b[4] * (np.dot(dFlower_dper,F2lower.T) + np.dot(Flower,dF2lower_dper.T)) - 2*self.b[4]/self.period*np.dot(Flower,F2lower.T)
dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
# np.add(target[:,:,0],dK_dvar, target[:,:,0])
target[0] += np.sum(dK_dvar*dL_dK)
#np.add(target[:,:,1],dK_dlen, target[:,:,1])
target[1] += np.sum(dK_dlen*dL_dK)
#np.add(target[:,:,2],dK_dper, target[:,:,2])
target[2] += np.sum(dK_dper*dL_dK)
@silence_errors
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters"""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1. / self.variance * mdot(FX, self.Gi, FX.T)
#dK_dlen
da_dlen = [-3*self.a[0]/self.lengthscale, -2*self.a[1]/self.lengthscale, -self.a[2]/self.lengthscale, 0.]
db_dlen = [0., 4*self.b[1]/self.lengthscale, 2*self.b[2]/self.lengthscale, 2*self.b[3]/self.lengthscale, 2*self.b[4]/self.lengthscale]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)), da_dlen[1]*self.basis_omega, da_dlen[2]*self.basis_omega**2, da_dlen[3]*self.basis_omega**3))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dlower_terms_dlen = db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F2lower,F2lower.T) + db_dlen[2]*np.dot(F1lower,F1lower.T) + db_dlen[3]*np.dot(F2lower,Flower.T) + db_dlen[4]*np.dot(Flower,F2lower.T)
dG_dlen = 15*self.lengthscale**4/(400*np.sqrt(5))*Gint + 3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dlen + dlower_terms_dlen
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period, -self.a[3]*self.basis_omega**4/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2,self.basis_phi))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + .5*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + .5*self.lower**2*np.cos(phi-phi1.T)
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF2lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**3/self.period,self.basis_omega,self.basis_phi+np.pi*3/2)(self.lower) + self._cos(-2*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
dlower_terms_dper = self.b[0] * (np.dot(dFlower_dper,Flower.T) + np.dot(Flower.T,dFlower_dper))
dlower_terms_dper += self.b[1] * (np.dot(dF2lower_dper,F2lower.T) + np.dot(F2lower,dF2lower_dper.T)) - 4*self.b[1]/self.period*np.dot(F2lower,F2lower.T)
dlower_terms_dper += self.b[2] * (np.dot(dF1lower_dper,F1lower.T) + np.dot(F1lower,dF1lower_dper.T)) - 2*self.b[2]/self.period*np.dot(F1lower,F1lower.T)
dlower_terms_dper += self.b[3] * (np.dot(dF2lower_dper,Flower.T) + np.dot(F2lower,dFlower_dper.T)) - 2*self.b[3]/self.period*np.dot(F2lower,Flower.T)
dlower_terms_dper += self.b[4] * (np.dot(dFlower_dper,F2lower.T) + np.dot(Flower,dF2lower_dper.T)) - 2*self.b[4]/self.period*np.dot(Flower,F2lower.T)
dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)

View file

@ -0,0 +1,237 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from GPy.util.linalg import mdot
from GPy.util.decorators import silence_errors
class PeriodicExponential(Kernpart):
"""
Kernel of the periodic subspace (up to a given frequency) of a exponential (Matern 1/2) RKHS. Only defined for input_dim=1.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the Matern kernel
:type variance: float
:param lengthscale: the lengthscale of the Matern kernel
:type lengthscale: np.ndarray of size (input_dim,)
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
:rtype: kernel object
"""
def __init__(self, input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
self.name = 'periodic_exp'
self.input_dim = input_dim
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Wrong size: only one lengthscale needed"
else:
lengthscale = np.ones(1)
self.lower,self.upper = lower, upper
self.num_params = 3
self.n_freq = n_freq
self.n_basis = 2*n_freq
self._set_params(np.hstack((variance,lengthscale,period)))
def _cos(self,alpha,omega,phase):
def f(x):
return alpha*np.cos(omega*x+phase)
return f
@silence_errors
def _cos_factorization(self,alpha,omega,phase):
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
r = np.sqrt(r1**2 + r2**2)
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi
@silence_errors
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
#Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
return Gint
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale,self.period))
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size==3
self.variance = x[0]
self.lengthscale = x[1]
self.period = x[2]
self.a = [1./self.lengthscale, 1.]
self.b = [1]
self.basis_alpha = np.ones((self.n_basis,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
def _get_param_names(self):
"""return parameter names."""
return ['variance','lengthscale','period']
def Gram_matrix(self):
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
Lo = np.column_stack((self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T))
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
if X2 is None:
FX2 = FX
else:
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
np.add(mdot(FX,self.Gi,FX2.T), target,target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
@silence_errors
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
Lo = np.column_stack((self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
#dK_dlen
da_dlen = [-1./self.lengthscale**2,0.]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
#IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))
r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
target[0] += np.sum(dK_dvar*dL_dK)
target[1] += np.sum(dK_dlen*dL_dK)
target[2] += np.sum(dK_dper*dL_dK)
@silence_errors
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters"""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
Lo = np.column_stack((self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX.T)
#dK_dlen
da_dlen = [-1./self.lengthscale**2,0.]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))
r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)

135
GPy/kern/parts/poly.py Normal file
View file

@ -0,0 +1,135 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
four_over_tau = 2./np.pi
class POLY(Kernpart):
"""
polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel,
.. math::
k(x, y) = \sigma^2*(\sigma_w^2 x'y+\sigma_b^b)^d
The kernel parameters are \sigma^2 (variance), \sigma^2_w
(weight_variance), \sigma^2_b (bias_variance) and d
(degree). Only gradients of the first three are provided for
kernel optimisation, it is assumed that polynomial degree would
be set by hand.
The kernel is not recommended as it is badly behaved when the
\sigma^2_w*x'*y + \sigma^2_b has a magnitude greater than one. For completeness
there is an automatic relevance determination version of this
kernel provided.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
:param degree: the degree of the polynomial.
:type degree: int
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
"""
def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=1., degree=2, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not ARD:
self.num_params=3
if weight_variance is not None:
weight_variance = np.asarray(weight_variance)
assert weight_variance.size == 1, "Only one weight variance needed for non-ARD kernel"
else:
weight_variance = 1.*np.ones(1)
else:
self.num_params = self.input_dim + 2
if weight_variance is not None:
weight_variance = np.asarray(weight_variance)
assert weight_variance.size == self.input_dim, "bad number of weight variances"
else:
weight_variance = np.ones(self.input_dim)
raise NotImplementedError
self.degree=degree
self.name='poly_deg' + str(self.degree)
self._set_params(np.hstack((variance, weight_variance.flatten(), bias_variance)))
def _get_params(self):
return np.hstack((self.variance, self.weight_variance.flatten(), self.bias_variance))
def _set_params(self, x):
assert x.size == (self.num_params)
self.variance = x[0]
self.weight_variance = x[1:-1]
self.weight_std = np.sqrt(self.weight_variance)
self.bias_variance = x[-1]
def _get_param_names(self):
if self.num_params == 3:
return ['variance', 'weight_variance', 'bias_variance']
else:
return ['variance'] + ['weight_variance_%i' % i for i in range(self.lengthscale.size)] + ['bias_variance']
def K(self, X, X2, target):
"""Return covariance between X and X2."""
self._K_computations(X, X2)
target += self.variance*self._K_dvar
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
self._K_diag_computations(X)
target+= self.variance*self._K_diag_dvar
def dK_dtheta(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
base_cov_grad = base*dL_dK
target[0] += np.sum(self._K_dvar*dL_dK)
target[1] += (self._K_inner_prod*base_cov_grad).sum()
target[2] += base_cov_grad.sum()
def dK_dX(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_poly_arg
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X"""
self._K_diag_computations(X)
arg = self._K_diag_poly_arg
target += 2.*self.weight_variance*self.degree*self.variance*X*dL_dKdiag[:, None]*(arg**(self.degree-1))[:, None]
def _K_computations(self, X, X2):
if self.ARD:
pass
else:
if X2 is None:
self._K_inner_prod = np.dot(X,X.T)
else:
self._K_inner_prod = np.dot(X,X2.T)
self._K_poly_arg = self._K_inner_prod*self.weight_variance + self.bias_variance
self._K_dvar = self._K_poly_arg**self.degree
def _K_diag_computations(self, X):
if self.ARD:
pass
else:
self._K_diag_poly_arg = (X*X).sum(1)*self.weight_variance + self.bias_variance
self._K_diag_dvar = self._K_diag_poly_arg**self.degree

121
GPy/kern/parts/prod.py Normal file
View file

@ -0,0 +1,121 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
import hashlib
class Prod(Kernpart):
"""
Computes the product of 2 kernels
:param k1, k2: the kernels to multiply
:type k1, k2: Kernpart
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""
def __init__(self,k1,k2,tensor=False):
self.num_params = k1.num_params + k2.num_params
self.name = k1.name + '<times>' + k2.name
self.k1 = k1
self.k2 = k2
if tensor:
self.input_dim = k1.input_dim + k2.input_dim
self.slice1 = slice(0,self.k1.input_dim)
self.slice2 = slice(self.k1.input_dim,self.k1.input_dim+self.k2.input_dim)
else:
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to sum don't have the same dimension."
self.input_dim = k1.input_dim
self.slice1 = slice(0,self.input_dim)
self.slice2 = slice(0,self.input_dim)
self._X, self._X2, self._params = np.empty(shape=(3,1))
self._set_params(np.hstack((k1._get_params(),k2._get_params())))
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.k1._get_params(), self.k2._get_params()))
def _set_params(self,x):
"""set the value of the parameters."""
self.k1._set_params(x[:self.k1.num_params])
self.k2._set_params(x[self.k1.num_params:])
def _get_param_names(self):
"""return parameter names."""
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
def K(self,X,X2,target):
self._K_computations(X,X2)
target += self._K1 * self._K2
def K1(self,X, X2):
"""Compute the part of the kernel associated with k1."""
self._K_computations(X, X2)
return self._K1
def K2(self, X, X2):
"""Compute the part of the kernel associated with k2."""
self._K_computations(X, X2)
return self._K2
def dK_dtheta(self,dL_dK,X,X2,target):
"""Derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2)
if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros(X.shape[0])
target2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],target1)
self.k2.Kdiag(X[:,self.slice2],target2)
target += target1 * target2
def dKdiag_dtheta(self,dL_dKdiag,X,target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
self._K_computations(X,X2)
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._params == self._get_params().copy()
if X2 is None:
self._X2 = None
self._K1 = np.zeros((X.shape[0],X.shape[0]))
self._K2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X[:,self.slice1],None,self._K1)
self.k2.K(X[:,self.slice2],None,self._K2)
else:
self._X2 = X2.copy()
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)

View file

@ -0,0 +1,101 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
import hashlib
#from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
class prod_orthogonal(Kernpart):
"""
Computes the product of 2 kernels
:param k1, k2: the kernels to multiply
:type k1, k2: Kernpart
:rtype: kernel object
"""
def __init__(self,k1,k2):
self.input_dim = k1.input_dim + k2.input_dim
self.num_params = k1.num_params + k2.num_params
self.name = k1.name + '<times>' + k2.name
self.k1 = k1
self.k2 = k2
self._X, self._X2, self._params = np.empty(shape=(3,1))
self._set_params(np.hstack((k1._get_params(),k2._get_params())))
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.k1._get_params(), self.k2._get_params()))
def _set_params(self,x):
"""set the value of the parameters."""
self.k1._set_params(x[:self.k1.num_params])
self.k2._set_params(x[self.k1.num_params:])
def _get_param_names(self):
"""return parameter names."""
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
def K(self,X,X2,target):
self._K_computations(X,X2)
target += self._K1 * self._K2
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2)
if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros(X.shape[0])
target2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,:self.k1.input_dim],target1)
self.k2.Kdiag(X[:,self.k1.input_dim:],target2)
target += target1 * target2
def dKdiag_dtheta(self,dL_dKdiag,X,target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,:self.k1.input_dim],K1)
self.k2.Kdiag(X[:,self.k1.input_dim:],K2)
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:])
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
self._K_computations(X,X2)
self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target)
self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target)
def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,0:self.k1.input_dim],K1)
self.k2.Kdiag(X[:,self.k1.input_dim:],K2)
self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.input_dim], target)
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.input_dim:], target)
def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._params == self._get_params().copy()
if X2 is None:
self._X2 = None
self._K1 = np.zeros((X.shape[0],X.shape[0]))
self._K2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X[:,:self.k1.input_dim],None,self._K1)
self.k2.K(X[:,self.k1.input_dim:],None,self._K2)
else:
self._X2 = X2.copy()
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,:self.k1.input_dim],X2[:,:self.k1.input_dim],self._K1)
self.k2.K(X[:,self.k1.input_dim:],X2[:,self.k1.input_dim:],self._K2)

View file

@ -0,0 +1,80 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
class RationalQuadratic(Kernpart):
"""
rational quadratic kernel
.. math::
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2 \ell^2} \\bigg)^{- \\alpha} \ \ \ \ \ \\text{ where } r^2 = (x-y)^2
:param input_dim: the number of input dimensions
:type input_dim: int (input_dim=1 is the only value currently supported)
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the lengthscale :math:`\ell`
:type lengthscale: float
:param power: the power :math:`\\alpha`
:type power: float
:rtype: Kernpart object
"""
def __init__(self,input_dim,variance=1.,lengthscale=1.,power=1.):
assert input_dim == 1, "For this kernel we assume input_dim=1"
self.input_dim = input_dim
self.num_params = 3
self.name = 'rat_quad'
self.variance = variance
self.lengthscale = lengthscale
self.power = power
def _get_params(self):
return np.hstack((self.variance,self.lengthscale,self.power))
def _set_params(self,x):
self.variance = x[0]
self.lengthscale = x[1]
self.power = x[2]
def _get_param_names(self):
return ['variance','lengthscale','power']
def K(self,X,X2,target):
if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale)
target += self.variance*(1 + dist2/2.)**(-self.power)
def Kdiag(self,X,target):
target += self.variance
def dK_dtheta(self,dL_dK,X,X2,target):
if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale)
dvar = (1 + dist2/2.)**(-self.power)
dl = self.power * self.variance * dist2 * self.lengthscale**(-3) * (1 + dist2/2./self.power)**(-self.power-1)
dp = - self.variance * np.log(1 + dist2/2.) * (1 + dist2/2.)**(-self.power)
target[0] += np.sum(dvar*dL_dK)
target[1] += np.sum(dl*dL_dK)
target[2] += np.sum(dp*dL_dK)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target[0] += np.sum(dL_dKdiag)
# here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale)
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
target += np.sum(dL_dK*dX,1)[:,np.newaxis]
def dKdiag_dX(self,dL_dKdiag,X,target):
pass

330
GPy/kern/parts/rbf.py Normal file
View file

@ -0,0 +1,330 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from scipy import weave
from ...util.linalg import tdot
from ...util.misc import fast_array_equal
class RBF(Kernpart):
"""
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the vector of lengthscale of the kernel
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
.. Note: this object implements both the ARD and 'spherical' version of the function
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False):
self.input_dim = input_dim
self.name = 'rbf'
self.ARD = ARD
if not ARD:
self.num_params = 2
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.input_dim)
self._set_params(np.hstack((variance, lengthscale.flatten())))
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))
# a set of optional args to pass to weave
self.weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
'extra_link_args' : ['-lgomp']}
def _get_params(self):
return np.hstack((self.variance, self.lengthscale))
def _set_params(self, x):
assert x.size == (self.num_params)
self.variance = x[0]
self.lengthscale = x[1:]
self.lengthscale2 = np.square(self.lengthscale)
# reset cached results
self._X, self._X2, self._params = np.empty(shape=(3, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def _get_param_names(self):
if self.num_params == 2:
return ['variance', 'lengthscale']
else:
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
def K(self, X, X2, target):
self._K_computations(X, X2)
target += self.variance * self._K_dvar
def Kdiag(self, X, target):
np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
target[0] += np.sum(self._K_dvar * dL_dK)
if self.ARD:
dvardLdK = self._K_dvar * dL_dK
var_len3 = self.variance / np.power(self.lengthscale, 3)
if X2 is None:
# save computation for the symmetrical case
dvardLdK = dvardLdK + dvardLdK.T
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<i; j++){
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
}
}
target(q+1) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
else:
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<num_inducing; j++){
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
}
}
target(q+1) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
else:
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
# NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(dL_dKdiag)
def dK_dX(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, mu, S, target):
target += self.variance
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
target[0] += np.sum(dL_dpsi0)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass
def psi1(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi1
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD:
target[1] += dpsi1_dlength.sum()
else:
target[1:] += dpsi1_dlength.sum(0).sum(0)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
denominator = (self.lengthscale2 * (self._psi1_denom))
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
self._psi_computations(Z, mu, S)
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
def psi2(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi2
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
"""Shape N,num_inducing,num_inducing,Ntheta"""
self._psi_computations(Z, mu, S)
d_var = 2.*self._psi2 / self.variance
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
target[0] += np.sum(dL_dpsi2 * d_var)
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
if not self.ARD:
target[1] += dpsi2_dlength.sum()
else:
target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, X2):
params = self._get_params()
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2) and fast_array_equal(self._params , params)):
self._X = X.copy()
self._params = params.copy()
if X2 is None:
self._X2 = None
X = X / self.lengthscale
Xsquare = np.sum(np.square(X), 1)
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
else:
self._X2 = X2.copy()
X = X / self.lengthscale
X2 = X2 / self.lengthscale
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
self._K_dvar = np.exp(-0.5 * self._K_dist2)
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2
Z_changed = not fast_array_equal(Z, self._Z)
if Z_changed:
# Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q
if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
# something's changed. recompute EVERYTHING
# psi1
self._psi1_denom = S[:, None, :] / self.lengthscale2 + 1.
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
self._psi1_dist_sq = np.square(self._psi1_dist) / self.lengthscale2 / self._psi1_denom
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
self._psi1 = self.variance * np.exp(self._psi1_exponent)
# psi2
self._psi2_denom = 2.*S[:, None, None, :] / self.lengthscale2 + 1. # N,M,M,Q
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
# store matrices for caching
self._Z, self._mu, self._S = Z, mu, S
def weave_psi2(self, mu, Zhat):
N, input_dim = mu.shape
num_inducing = Zhat.shape[0]
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
psi2 = np.empty((N, num_inducing, num_inducing))
psi2_Zdist_sq = self._psi2_Zdist_sq
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
variance_sq = float(np.square(self.variance))
if self.ARD:
lengthscale2 = self.lengthscale2
else:
lengthscale2 = np.ones(input_dim) * self.lengthscale2
code = """
double tmp;
#pragma omp parallel for private(tmp)
for (int n=0; n<N; n++){
for (int m=0; m<num_inducing; m++){
for (int mm=0; mm<(m+1); mm++){
for (int q=0; q<input_dim; q++){
//compute mudist
tmp = mu(n,q) - Zhat(m,mm,q);
mudist(n,m,mm,q) = tmp;
mudist(n,mm,m,q) = tmp;
//now mudist_sq
tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q);
mudist_sq(n,m,mm,q) = tmp;
mudist_sq(n,mm,m,q) = tmp;
//now psi2_exponent
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
psi2_exponent(n,mm,m) += tmp;
if (m !=mm){
psi2_exponent(n,m,mm) += tmp;
}
//psi2 would be computed like this, but np is faster
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
//psi2(n,m,mm) = tmp;
//psi2(n,mm,m) = tmp;
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)
return mudist, mudist_sq, psi2_exponent, psi2

322
GPy/kern/parts/rbf_inv.py Normal file
View file

@ -0,0 +1,322 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from rbf import RBF
import numpy as np
import hashlib
from scipy import weave
from ...util.linalg import tdot
class RBFInv(RBF):
"""
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel. It only
differs from RBF in that here the parametrization is wrt the inverse lengthscale:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the vector of lengthscale of the kernel
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
.. Note: this object implements both the ARD and 'spherical' version of the function
"""
def __init__(self, input_dim, variance=1., inv_lengthscale=None, ARD=False):
self.input_dim = input_dim
self.name = 'rbf_inv'
self.ARD = ARD
if not ARD:
self.num_params = 2
if inv_lengthscale is not None:
inv_lengthscale = np.asarray(inv_lengthscale)
assert inv_lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
inv_lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
if inv_lengthscale is not None:
inv_lengthscale = np.asarray(inv_lengthscale)
assert inv_lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
inv_lengthscale = np.ones(self.input_dim)
self._set_params(np.hstack((variance, inv_lengthscale.flatten())))
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))
# a set of optional args to pass to weave
self.weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
'extra_link_args' : ['-lgomp']}
def _get_params(self):
return np.hstack((self.variance, self.inv_lengthscale))
def _set_params(self, x):
assert x.size == (self.num_params)
self.variance = x[0]
self.inv_lengthscale = x[1:]
self.inv_lengthscale2 = np.square(self.inv_lengthscale)
# TODO: We can rewrite everything with inv_lengthscale and never need to do the below
self.lengthscale = 1. / self.inv_lengthscale
self.lengthscale2 = np.square(self.lengthscale)
# reset cached results
self._X, self._X2, self._params = np.empty(shape=(3, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def _get_param_names(self):
if self.num_params == 2:
return ['variance', 'inv_lengthscale']
else:
return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)]
# TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale)
def dK_dtheta(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
target[0] += np.sum(self._K_dvar * dL_dK)
if self.ARD:
dvardLdK = self._K_dvar * dL_dK
var_len3 = self.variance / np.power(self.lengthscale, 3)
len2 = self.lengthscale2
if X2 is None:
# save computation for the symmetrical case
dvardLdK = dvardLdK + dvardLdK.T
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<i; j++){
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
}
}
target(q+1) += var_len3(q)*tmp*(-len2(q));
}
"""
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
else:
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<num_inducing; j++){
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
}
}
target(q+1) += var_len3(q)*tmp*(-len2(q));
}
"""
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
else:
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) * (-self.lengthscale2)
def dK_dX(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
# def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
# self._psi_computations(Z, mu, S)
# denom_deriv = S[:, None, :] / (self.lengthscale ** 3 + self.lengthscale * S[:, None, :])
# d_length = self._psi1[:, :, None] * (self.lengthscale * np.square(self._psi1_dist / (self.lengthscale2 + S[:, None, :])) + denom_deriv)
# target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
# dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
# if not self.ARD:
# target[1] += dpsi1_dlength.sum()*(-self.lengthscale2)
# else:
# target[1:] += dpsi1_dlength.sum(0).sum(0)*(-self.lengthscale2)
# #target[1:] = target[1:]*(-self.lengthscale2)
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = 1 + S[:, None, :] * self.inv_lengthscale2
# d_inv_length_old = -self._psi1[:, :, None] * ((self._psi1_dist_sq - 1.) / (self.lengthscale * self._psi1_denom) + self.inv_lengthscale) / self.inv_lengthscale2
d_length = -(self._psi1[:, :, None] * ((np.square(self._psi1_dist) * self.inv_lengthscale) / (tmp ** 2) + (S[:, None, :] * self.inv_lengthscale) / (tmp)))
# d_inv_length = -self._psi1[:, :, None] * ((self._psi1_dist_sq - 1.) / self._psi1_denom + self.lengthscale)
target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD:
target[1] += dpsi1_dlength.sum() # *(-self.lengthscale2)
else:
target[1:] += dpsi1_dlength.sum(0).sum(0) # *(-self.lengthscale2)
# target[1:] = target[1:]*(-self.lengthscale2)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
dpsi1_dZ = -self._psi1[:, :, None] * ((self.inv_lengthscale2 * self._psi1_dist) / self._psi1_denom)
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
self._psi_computations(Z, mu, S)
tmp = (self._psi1[:, :, None] * self.inv_lengthscale2) / self._psi1_denom
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
"""Shape N,num_inducing,num_inducing,Ntheta"""
self._psi_computations(Z, mu, S)
d_var = 2.*self._psi2 / self.variance
# d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
d_length = -2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] * self.inv_lengthscale2) / (self.inv_lengthscale * self._psi2_denom)
target[0] += np.sum(dL_dpsi2 * d_var)
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
if not self.ARD:
target[1] += dpsi2_dlength.sum() # *(-self.lengthscale2)
else:
target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0) # *(-self.lengthscale2)
# target[1:] = target[1:]*(-self.lengthscale2)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
term1 = self._psi2_Zdist * self.inv_lengthscale2 # num_inducing, num_inducing, input_dim
term2 = (self._psi2_mudist * self.inv_lengthscale2) / self._psi2_denom # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
tmp = (self.inv_lengthscale2 * self._psi2[:, :, :, None]) / self._psi2_denom
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, X2):
if not (np.array_equal(X, self._X) and np.array_equal(X2, self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._params = self._get_params().copy()
if X2 is None:
self._X2 = None
X = X * self.inv_lengthscale
Xsquare = np.sum(np.square(X), 1)
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
else:
self._X2 = X2.copy()
X = X * self.inv_lengthscale
X2 = X2 * self.inv_lengthscale
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
self._K_dvar = np.exp(-0.5 * self._K_dist2)
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2
if not np.array_equal(Z, self._Z):
# Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist * self.inv_lengthscale) # M,M,Q
if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
# something's changed. recompute EVERYTHING
# psi1
self._psi1_denom = S[:, None, :] * self.inv_lengthscale2 + 1.
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
self._psi1_dist_sq = (np.square(self._psi1_dist) * self.inv_lengthscale2) / self._psi1_denom
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
self._psi1 = self.variance * np.exp(self._psi1_exponent)
# psi2
self._psi2_denom = 2.*S[:, None, None, :] * self.inv_lengthscale2 + 1. # N,M,M,Q
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
# store matrices for caching
self._Z, self._mu, self._S = Z, mu, S
def weave_psi2(self, mu, Zhat):
N, input_dim = mu.shape
num_inducing = Zhat.shape[0]
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
psi2 = np.empty((N, num_inducing, num_inducing))
psi2_Zdist_sq = self._psi2_Zdist_sq
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
variance_sq = float(np.square(self.variance))
if self.ARD:
inv_lengthscale2 = self.inv_lengthscale2
else:
inv_lengthscale2 = np.ones(input_dim) * self.inv_lengthscale2
code = """
double tmp;
#pragma omp parallel for private(tmp)
for (int n=0; n<N; n++){
for (int m=0; m<num_inducing; m++){
for (int mm=0; mm<(m+1); mm++){
for (int q=0; q<input_dim; q++){
//compute mudist
tmp = mu(n,q) - Zhat(m,mm,q);
mudist(n,m,mm,q) = tmp;
mudist(n,mm,m,q) = tmp;
//now mudist_sq
tmp = tmp*tmp*inv_lengthscale2(q)/_psi2_denom(n,q);
mudist_sq(n,m,mm,q) = tmp;
mudist_sq(n,mm,m,q) = tmp;
//now psi2_exponent
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
psi2_exponent(n,mm,m) += tmp;
if (m !=mm){
psi2_exponent(n,m,mm) += tmp;
}
//psi2 would be computed like this, but np is faster
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
//psi2(n,m,mm) = tmp;
//psi2(n,mm,m) = tmp;
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'inv_lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)
return mudist, mudist_sq, psi2_exponent, psi2

117
GPy/kern/parts/rbfcos.py Normal file
View file

@ -0,0 +1,117 @@
# Copyright (c) 2012, James Hensman and Andrew Gordon Wilson
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
class RBFCos(Kernpart):
def __init__(self,input_dim,variance=1.,frequencies=None,bandwidths=None,ARD=False):
self.input_dim = input_dim
self.name = 'rbfcos'
if self.input_dim>10:
print "Warning: the rbfcos kernel requires a lot of memory for high dimensional inputs"
self.ARD = ARD
#set the default frequencies and bandwidths, appropriate num_params
if ARD:
self.num_params = 2*self.input_dim + 1
if frequencies is not None:
frequencies = np.asarray(frequencies)
assert frequencies.size == self.input_dim, "bad number of frequencies"
else:
frequencies = np.ones(self.input_dim)
if bandwidths is not None:
bandwidths = np.asarray(bandwidths)
assert bandwidths.size == self.input_dim, "bad number of bandwidths"
else:
bandwidths = np.ones(self.input_dim)
else:
self.num_params = 3
if frequencies is not None:
frequencies = np.asarray(frequencies)
assert frequencies.size == 1, "Exactly one frequency needed for non-ARD kernel"
else:
frequencies = np.ones(1)
if bandwidths is not None:
bandwidths = np.asarray(bandwidths)
assert bandwidths.size == 1, "Exactly one bandwidth needed for non-ARD kernel"
else:
bandwidths = np.ones(1)
#initialise cache
self._X, self._X2, self._params = np.empty(shape=(3,1))
self._set_params(np.hstack((variance,frequencies.flatten(),bandwidths.flatten())))
def _get_params(self):
return np.hstack((self.variance,self.frequencies, self.bandwidths))
def _set_params(self,x):
assert x.size==(self.num_params)
if self.ARD:
self.variance = x[0]
self.frequencies = x[1:1+self.input_dim]
self.bandwidths = x[1+self.input_dim:]
else:
self.variance, self.frequencies, self.bandwidths = x
def _get_param_names(self):
if self.num_params == 3:
return ['variance','frequency','bandwidth']
else:
return ['variance']+['frequency_%i'%i for i in range(self.input_dim)]+['bandwidth_%i'%i for i in range(self.input_dim)]
def K(self,X,X2,target):
self._K_computations(X,X2)
target += self.variance*self._dvar
def Kdiag(self,X,target):
np.add(target,self.variance,target)
def dK_dtheta(self,dL_dK,X,X2,target):
self._K_computations(X,X2)
target[0] += np.sum(dL_dK*self._dvar)
if self.ARD:
for q in xrange(self.input_dim):
target[q+1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.tan(2.*np.pi*self._dist[:,:,q]*self.frequencies[q])*self._dist[:,:,q])
target[q+1+self.input_dim] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self._dist2[:,:,q])
else:
target[1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.sum(np.tan(2.*np.pi*self._dist*self.frequencies)*self._dist,-1))
target[2] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self._dist2.sum(-1))
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target[0] += np.sum(dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target):
#TODO!!!
raise NotImplementedError
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
def _K_computations(self,X,X2):
if not (np.all(X==self._X) and np.all(X2==self._X2)):
if X2 is None: X2 = X
self._X = X.copy()
self._X2 = X2.copy()
#do the distances: this will be high memory for large input_dim
#NB: we don't take the abs of the dist because cos is symmetric
self._dist = X[:,None,:] - X2[None,:,:]
self._dist2 = np.square(self._dist)
#ensure the next section is computed:
self._params = np.empty(self.num_params)
if not np.all(self._params == self._get_params()):
self._params == self._get_params().copy()
self._rbf_part = np.exp(-2.*np.pi**2*np.sum(self._dist2*self.bandwidths,-1))
self._cos_part = np.prod(np.cos(2.*np.pi*self._dist*self.frequencies),-1)
self._dvar = self._rbf_part*self._cos_part

58
GPy/kern/parts/spline.py Normal file
View file

@ -0,0 +1,58 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
import hashlib
def theta(x):
"""Heaviside step function"""
return np.where(x>=0.,1.,0.)
class Spline(Kernpart):
"""
Spline kernel
:param input_dim: the number of input dimensions (fixed to 1 right now TODO)
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
def __init__(self,input_dim,variance=1.,lengthscale=1.):
self.input_dim = input_dim
assert self.input_dim==1
self.num_params = 1
self.name = 'spline'
self._set_params(np.squeeze(variance))
def _get_params(self):
return self.variance
def _set_params(self,x):
self.variance = x
def _get_param_names(self):
return ['variance']
def K(self,X,X2,target):
assert np.all(X>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
assert np.all(X2>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
t = X
s = X2.T
s_t = s-t # broadcasted subtraction
target += self.variance*(0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.)
def Kdiag(self,X,target):
target += self.variance*X.flatten()**3/3.
def dK_dtheta(self,X,X2,target):
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
def dKdiag_dtheta(self,X,target):
target += X.flatten()**3/3.
def dKdiag_dX(self,X,target):
target += self.variance*X**2

View file

@ -0,0 +1,92 @@
# Copyright (c) 2012 James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
class Symmetric(Kernpart):
"""
Symmetrical kernels
:param k: the kernel to symmetrify
:type k: Kernpart
:param transform: the transform to use in symmetrification (allows symmetry on specified axes)
:type transform: A numpy array (input_dim x input_dim) specifiying the transform
:rtype: Kernpart
"""
def __init__(self,k,transform=None):
if transform is None:
transform = np.eye(k.input_dim)*-1.
assert transform.shape == (k.input_dim, k.input_dim)
self.transform = transform
self.input_dim = k.input_dim
self.num_params = k.num_params
self.name = k.name + '_symm'
self.k = k
self._set_params(k._get_params())
def _get_params(self):
"""return the value of the parameters."""
return self.k._get_params()
def _set_params(self,x):
"""set the value of the parameters."""
self.k._set_params(x)
def _get_param_names(self):
"""return parameter names."""
return self.k._get_param_names()
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
AX = np.dot(X,self.transform)
if X2 is None:
X2 = X
AX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k.K(X,X2,target)
self.k.K(AX,X2,target)
self.k.K(X,AX2,target)
self.k.K(AX,AX2,target)
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
AX = np.dot(X,self.transform)
if X2 is None:
X2 = X
ZX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k.dK_dtheta(dL_dK,X,X2,target)
self.k.dK_dtheta(dL_dK,AX,X2,target)
self.k.dK_dtheta(dL_dK,X,AX2,target)
self.k.dK_dtheta(dL_dK,AX,AX2,target)
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
AX = np.dot(X,self.transform)
if X2 is None:
X2 = X
ZX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k.dK_dX(dL_dK, X, X2, target)
self.k.dK_dX(dL_dK, AX, X2, target)
self.k.dK_dX(dL_dK, X, AX2, target)
self.k.dK_dX(dL_dK, AX ,AX2, target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
foo = np.zeros((X.shape[0],X.shape[0]))
self.K(X,X,foo)
target += np.diag(foo)
def dKdiag_dX(self,dL_dKdiag,X,target):
raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
raise NotImplementedError

View file

@ -0,0 +1,10 @@
#include <math.h>
double DiracDelta(double x){
if((x<0.000001) & (x>-0.000001))//go on, laught at my c++ skills
return 1.0;
else
return 0.0;
};
double DiracDelta(double x,int foo){
return 0.0;
};

View file

@ -0,0 +1,3 @@
#include <math.h>
double DiracDelta(double x);
double DiracDelta(double x, int foo);

258
GPy/kern/parts/sympykern.py Normal file
View file

@ -0,0 +1,258 @@
import numpy as np
import sympy as sp
from sympy.utilities.codegen import codegen
from sympy.core.cache import clear_cache
from scipy import weave
import re
import os
import sys
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
import tempfile
import pdb
from kernpart import Kernpart
class spkern(Kernpart):
"""
A kernel object, where all the hard work in done by sympy.
:param k: the covariance function
:type k: a positive definite sympy function of x1, z1, x2, z2...
To construct a new sympy kernel, you'll need to define:
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
- that's it! we'll extract the variables from the function k.
Note:
- to handle multiple inputs, call them x1, z1, etc
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
"""
def __init__(self,input_dim,k,param=None):
self.name='sympykern'
self._sp_k = k
sp_vars = [e for e in k.atoms() if e.is_Symbol]
self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:]))
self._sp_z= sorted([e for e in sp_vars if e.name[0]=='z'],key=lambda z:int(z.name[1:]))
assert all([x.name=='x%i'%i for i,x in enumerate(self._sp_x)])
assert all([z.name=='z%i'%i for i,z in enumerate(self._sp_z)])
assert len(self._sp_x)==len(self._sp_z)
self.input_dim = len(self._sp_x)
assert self.input_dim == input_dim
self._sp_theta = sorted([e for e in sp_vars if not (e.name[0]=='x' or e.name[0]=='z')],key=lambda e:e.name)
self.num_params = len(self._sp_theta)
#deal with param
if param is None:
param = np.ones(self.num_params)
assert param.size==self.num_params
self._set_params(param)
#Differentiate!
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
#self._sp_dk_dz = [sp.diff(k,zi) for zi in self._sp_z]
#self.compute_psi_stats()
self._gen_code()
self.weave_kwargs = {\
'support_code':self._function_code,\
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],\
'headers':['"sympy_helpers.h"'],\
'sources':[os.path.join(current_dir,"kern/sympy_helpers.cpp")],\
#'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\
'extra_compile_args':[],\
'extra_link_args':['-lgomp'],\
'verbose':True}
def __add__(self,other):
return spkern(self._sp_k+other._sp_k)
def compute_psi_stats(self):
#define some normal distributions
mus = [sp.var('mu%i'%i,real=True) for i in range(self.input_dim)]
Ss = [sp.var('S%i'%i,positive=True) for i in range(self.input_dim)]
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
#do some integration!
#self._sp_psi0 = ??
self._sp_psi1 = self._sp_k
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi1 *= normals[i]
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi1 = self._sp_psi1.simplify()
#and here's psi2 (eek!)
zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi2 *= normals[i]
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi2 = self._sp_psi2.simplify()
def _gen_code(self):
#generate c functions from sympy objects
(foo_c,self._function_code),(foo_h,self._function_header) = \
codegen([('k',self._sp_k)] \
+ [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]\
#+ [('dk_d%s'%z.name,dz) for z,dz in zip(self._sp_z,self._sp_dk_dz)]\
+ [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]\
,"C",'foobar',argument_sequence=self._sp_x+self._sp_z+self._sp_theta)
#put the header file where we can find it
f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w')
f.write(self._function_header)
f.close()
#get rid of derivatives of DiracDelta
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
#Here's some code to do the looping for K
arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]\
+ ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]\
+ ["param[%i]"%i for i in range(self.num_params)])
self._K_code =\
"""
int i;
int j;
int N = target_array->dimensions[0];
int num_inducing = target_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N;i++){
for (j=0;j<num_inducing;j++){
target[i*num_inducing+j] = k(%s);
}
}
%s
"""%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
diag_arglist = re.sub('Z','X',arglist)
diag_arglist = re.sub('j','i',diag_arglist)
#Here's some code to do the looping for Kdiag
self._Kdiag_code =\
"""
int i;
int N = target_array->dimensions[0];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for
for (i=0;i<N;i++){
target[i] = k(%s);
}
%s
"""%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
#here's some code to compute gradients
funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in enumerate(self._sp_theta)])
self._dK_dtheta_code =\
"""
int i;
int j;
int N = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N;i++){
for (j=0;j<num_inducing;j++){
%s
}
}
%s
"""%(funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
#here's some code to compute gradients for Kdiag TODO: thius is yucky.
diag_funclist = re.sub('Z','X',funclist,count=0)
diag_funclist = re.sub('j','i',diag_funclist)
diag_funclist = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_funclist)
self._dKdiag_dtheta_code =\
"""
int i;
int N = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (i=0;i<N;i++){
%s
}
%s
"""%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
#Here's some code to do gradients wrt x
gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.input_dim)])
self._dK_dX_code = \
"""
int i;
int j;
int N = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N; i++){
for (j=0; j<num_inducing; j++){
%s
//if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
//if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}
}
}
%s
"""%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
#now for gradients of Kdiag wrt X
self._dKdiag_dX_code= \
"""
int i;
int j;
int N = partial_array->dimensions[0];
int num_inducing = 0;
int input_dim = X_array->dimensions[1];
for (i=0;i<N; i++){
j = i;
%s
}
%s
"""%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
#TODO: insert multiple functions here via string manipulation
#TODO: similar functions for psi_stats
def K(self,X,Z,target):
param = self._param
weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
def Kdiag(self,X,target):
param = self._param
weave.inline(self._Kdiag_code,arg_names=['target','X','param'],**self.weave_kwargs)
def dK_dtheta(self,partial,X,Z,target):
param = self._param
weave.inline(self._dK_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
def dKdiag_dtheta(self,partial,X,target):
param = self._param
Z = X
weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
def dK_dX(self,partial,X,Z,target):
param = self._param
weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
def dKdiag_dX(self,partial,X,target):
param = self._param
Z = X
weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
def _set_params(self,param):
#print param.flags['C_CONTIGUOUS']
self._param = param.copy()
def _get_params(self):
return self._param
def _get_param_names(self):
return [x.name for x in self._sp_theta]

84
GPy/kern/parts/white.py Normal file
View file

@ -0,0 +1,84 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
class White(Kernpart):
"""
White noise kernel.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance:
:type variance: float
"""
def __init__(self,input_dim,variance=1.):
self.input_dim = input_dim
self.num_params = 1
self.name = 'white'
self._set_params(np.array([variance]).flatten())
self._psi1 = 0 # TODO: more elegance here
def _get_params(self):
return self.variance
def _set_params(self,x):
assert x.shape==(1,)
self.variance = x
def _get_param_names(self):
return ['variance']
def K(self,X,X2,target):
if X2 is None:
target += np.eye(X.shape[0])*self.variance
def Kdiag(self,X,target):
target += self.variance
def dK_dtheta(self,dL_dK,X,X2,target):
if X2 is None:
target += np.trace(dL_dK)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.sum(dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target):
pass
def dKdiag_dX(self,dL_dKdiag,X,target):
pass
def psi0(self,Z,mu,S,target):
pass # target += self.variance
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
pass # target += dL_dpsi0.sum()
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
pass
def psi1(self,Z,mu,S,target):
pass
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
pass
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
pass
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
pass
def psi2(self,Z,mu,S,target):
pass
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
pass
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
pass
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
pass

View file

@ -0,0 +1,4 @@
from ep import EP
from gaussian import Gaussian
# TODO: from Laplace import Laplace
import likelihood_functions as functions

342
GPy/likelihoods/ep.py Normal file
View file

@ -0,0 +1,342 @@
import numpy as np
from scipy import stats
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
from likelihood import likelihood
class EP(likelihood):
def __init__(self,data,LikelihoodFunction,epsilon=1e-3,power_ep=[1.,1.]):
"""
Expectation Propagation
Arguments
---------
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
LikelihoodFunction : a likelihood function (see likelihood_functions.py)
"""
self.LikelihoodFunction = LikelihoodFunction
self.epsilon = epsilon
self.eta, self.delta = power_ep
self.data = data
self.N, self.output_dim = self.data.shape
self.is_heteroscedastic = True
self.Nparams = 0
self._transf_data = self.LikelihoodFunction._preprocess_values(data)
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
#initial values for the GP variables
self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = 0.
def restart(self):
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = 0.
def predictive_values(self,mu,var,full_cov):
if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
return self.LikelihoodFunction.predictive_values(mu,var)
def _get_params(self):
return np.zeros(0)
def _get_param_names(self):
return []
def _set_params(self,p):
pass # TODO: the EP likelihood might want to take some parameters...
def _gradients(self,partial):
return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
def _compute_GP_variables(self):
#Variables to be called from GP
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
self.Y = mu_tilde[:,None]
self.YYT = np.dot(self.Y,self.Y.T)
self.covariance_matrix = np.diag(1./self.tau_tilde)
self.precision = self.tau_tilde[:,None]
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = np.trace(self.YYT)
def fit_full(self,K):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006.
"""
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.N)
Sigma = K.copy()
"""
Initial values - Cavity distribution parameters:
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = self.epsilon + 1.
epsilon_np2 = self.epsilon + 1.
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B)
V,info = dtrtrs(L,Sroot_tilde_K,lower=1)
Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()
def fit_DTC(self, Kmm, Kmn):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013.
"""
num_inducing = Kmm.shape[0]
#TODO: this doesn't work with uncertain inputs!
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = Qnn = Knm*Kmmi*Kmn
"""
KmnKnm = np.dot(Kmn,Kmn.T)
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy()
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
#KmnKnm = np.dot(Kmn, Kmn.T)
#KmmiKmn = np.dot(Kmmi,Kmn)
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
#LLT0 = Kmm.copy()
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
mu = np.zeros(self.N)
LLT = Kmm.copy()
Sigma_diag = Qnn_diag.copy()
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
np1 = [self.tau_tilde.copy()]
np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
L = jitchol(LLT)
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
V,info = dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu += (Delta_v-Delta_tau*mu[i])*si
self.iterations += 1
#Sigma recomputation with Cholesky decompositon
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
V2,info = dtrtrs(L.T,V,lower=0)
Sigma_diag = np.sum(V*V,-2)
Knmv_tilde = np.dot(Kmn,self.v_tilde)
mu = np.dot(V2.T,Knmv_tilde)
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N
np1.append(self.tau_tilde.copy())
np2.append(self.v_tilde.copy())
self._compute_GP_variables()
def fit_FITC(self, Kmm, Kmn, Knn_diag):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008.
"""
num_inducing = Kmm.shape[0]
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
"""
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
P0 = Kmn.T
KmnKnm = np.dot(P0.T, P0)
KmmiKmn = np.dot(Kmmi,P0.T)
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
Diag0 = Knn_diag - Qnn_diag
R0 = jitchol(Kmmi).T
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
self.w = np.zeros(self.N)
self.Gamma = np.zeros(num_inducing)
mu = np.zeros(self.N)
P = P0.copy()
R = R0.copy()
Diag = Diag0.copy()
Sigma_diag = Knn_diag
RPT0 = np.dot(R0,P0.T)
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i]
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = P[i,:].reshape(1,num_inducing)
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
mu = self.w + np.dot(P,self.Gamma)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
Diag = Diag0 * Iplus_Dprod_i
P = Iplus_Dprod_i[:,None] * P0
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
R,info = dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
self.w = Diag * self.v_tilde
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
mu = self.w + np.dot(P,self.Gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()

View file

@ -0,0 +1,98 @@
import numpy as np
from likelihood import likelihood
from ..util.linalg import jitchol
class Gaussian(likelihood):
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
:param variance :
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
:type normalize: False|True
"""
def __init__(self, data, variance=1., normalize=False):
self.is_heteroscedastic = False
self.Nparams = 1
self.Z = 0. # a correction factor which accounts for the approximation made
N, self.output_dim = data.shape
# normalization
if normalize:
self._offset = data.mean(0)[None, :]
self._scale = data.std(0)[None, :]
# Don't scale outputs which have zero variance to zero.
self._scale[np.nonzero(self._scale == 0.)] = 1.0e-3
else:
self._offset = np.zeros((1, self.output_dim))
self._scale = np.ones((1, self.output_dim))
self.set_data(data)
self._variance = np.asarray(variance) + 1.
self._set_params(np.asarray(variance))
def set_data(self, data):
self.data = data
self.N, D = data.shape
assert D == self.output_dim
self.Y = (self.data - self._offset) / self._scale
if D > self.N:
self.YYT = np.dot(self.Y, self.Y.T)
self.trYYT = np.trace(self.YYT)
self.YYT_factor = jitchol(self.YYT)
else:
self.YYT = None
self.trYYT = np.sum(np.square(self.Y))
self.YYT_factor = self.Y
def _get_params(self):
return np.asarray(self._variance)
def _get_param_names(self):
return ["noise_variance"]
def _set_params(self, x):
x = np.float64(x)
if np.all(self._variance != x):
if x == 0.:#special case of zero noise
self.precision = np.inf
self.V = None
else:
self.precision = 1. / x
self.V = (self.precision) * self.Y
self.VVT_factor = self.precision * self.YYT_factor
self.covariance_matrix = np.eye(self.N) * x
self._variance = x
def predictive_values(self, mu, var, full_cov):
"""
Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
"""
mean = mu * self._scale + self._offset
if full_cov:
if self.output_dim > 1:
raise NotImplementedError, "TODO"
# Note. for output_dim>1, we need to re-normalise all the outputs independently.
# This will mess up computations of diag(true_var), below.
# note that the upper, lower quantiles should be the same shape as mean
# Augment the output variance with the likelihood variance and rescale.
true_var = (var + np.eye(var.shape[0]) * self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(np.diag(true_var))
_95pc = mean + 2.*np.sqrt(np.diag(true_var))
else:
true_var = (var + self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(true_var)
_95pc = mean + 2.*np.sqrt(true_var)
return mean, true_var, _5pc, _95pc
def fit_full(self):
"""
No approximations needed
"""
pass
def _gradients(self, partial):
return np.sum(partial)

View file

@ -0,0 +1,44 @@
import numpy as np
import copy
class likelihood:
"""
The atom for a likelihood class
This object interfaces the GP and the data. The most basic likelihood
(Gaussian) inherits directly from this, as does the EP algorithm
Some things must be defined for this to work properly:
self.Y : the effective Gaussian target of the GP
self.N, self.D : Y.shape
self.covariance_matrix : the effective (noise) covariance of the GP targets
self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
self.is_heteroscedastic : enables significant computational savings in GP
self.precision : a scalar or vector representation of the effective target precision
self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
self.V : self.precision * self.Y
"""
def __init__(self,data):
raise ValueError, "this class is not to be instantiated"
def _get_params(self):
raise NotImplementedError
def _get_param_names(self):
raise NotImplementedError
def _set_params(self, x):
raise NotImplementedError
def fit(self):
raise NotImplementedError
def _gradients(self, partial):
raise NotImplementedError
def predictive_values(self, mu, var):
raise NotImplementedError
def copy(self):
""" Returns a (deep) copy of the current likelihood """
return copy.deepcopy(self)

View file

@ -0,0 +1,166 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import link_functions
class LikelihoodFunction(object):
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the LikelihoodFunction used
"""
def __init__(self,link):
if link == self._analytical:
self.moments_match = self._moments_match_analytical
else:
assert isinstance(link,link_functions.LinkFunction)
self.link = link
self.moments_match = self._moments_match_numerical
def _preprocess_values(self,Y):
return Y
def _product(self,gp,obs,mu,sigma):
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._distribution(gp,obs)
def _nlog_product(self,gp,obs,mu,sigma):
return -(-.5*(gp-mu)**2/sigma**2 + self._log_distribution(gp,obs))
def _locate(self,obs,mu,sigma):
"""
Golden Search to find the mode in the _product function (cavity x exact likelihood) and define a grid around it for numerical integration
"""
golden_A = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit
golden_B = np.array([np.log(obs),mu]).max() #Upper limit
return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product
def _moments_match_numerical(self,obs,tau,v):
"""
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
"""
mu = v/tau
sigma = np.sqrt(1./tau)
opt = self._locate(obs,mu,sigma)
width = 3./np.log(max(obs,2))
A = opt - width #Grid's lower limit
B = opt + width #Grid's Upper limit
K = 10*int(np.log(max(obs,150))) #Number of points in the grid
h = (B-A)/K # length of the intervals
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
_aux1 = self._product(A,obs,mu,sigma)
_aux2 = self._product(B,obs,mu,sigma)
_aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma)
_aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma)
zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged
first = zeroth*x
second = first*x
Z_hat = sum(zeroth)*h/3 # Zero-th moment
mu_hat = sum(first)*h/(3*Z_hat) # First moment
m2 = sum(second)*h/(3*Z_hat) # Second moment
sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat)
class Binomial(LikelihoodFunction):
"""
Probit likelihood
Y is expected to take values in {-1,1}
-----
$$
L(x) = \\Phi (Y_i*f_i)
$$
"""
def __init__(self,link=None):
self._analytical = link_functions.Probit
if not link:
link = self._analytical
super(Binomial, self).__init__(link)
def _distribution(self,gp,obs):
pass
def _log_distribution(self,gp,obs):
pass
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
..Note:: Binary classification algorithm works better with classes {-1,1}
"""
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep
def _moments_match_analytical(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
return Z_hat, mu_hat, sigma2_hat
def predictive_values(self,mu,var):
"""
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction
:param mu: mean of the latent variable
:param var: variance of the latent variable
"""
mu = mu.flatten()
var = var.flatten()
mean = stats.norm.cdf(mu/np.sqrt(1+var))
norm_025 = [stats.norm.ppf(.025,m,v) for m,v in zip(mu,var)]
norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)]
p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var))
p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var))
return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var
class Poisson(LikelihoodFunction):
"""
Poisson likelihood
Y is expected to take values in {0,1,2,...}
-----
$$
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def __init__(self,link=None):
self._analytical = None
if not link:
link = link_functions.Log()
super(Poisson, self).__init__(link)
def _distribution(self,gp,obs):
return stats.poisson.pmf(obs,self.link.inv_transf(gp))
def _log_distribution(self,gp,obs):
return - self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp)
def predictive_values(self,mu,var):
"""
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
"""
mean = self.link.transf(mu)#np.exp(mu*self.scale + self.location)
tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
p_025 = tmp[:,0]
p_975 = tmp[:,1]
return mean,np.nan*mean,p_025,p_975 # better variance here TODO

View file

@ -0,0 +1,33 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
class LinkFunction(object):
"""
Link function class for doing non-Gaussian likelihoods approximation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
"""
def __init__(self):
pass
class Probit(LinkFunction):
"""
Probit link function: Squashes a likelihood between 0 and 1
"""
def transf(self,mu):
pass
def inv_transf(self,f):
pass
def log_inv_transf(self,f):
pass

7
GPy/mappings/__init__.py Normal file
View file

@ -0,0 +1,7 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernel import Kernel
from linear import Linear
from mlp import MLP
#from rbf import RBF

60
GPy/mappings/kernel.py Normal file
View file

@ -0,0 +1,60 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.mapping import Mapping
import GPy
class Kernel(Mapping):
"""
Mapping based on a kernel/covariance function.
.. math::
f(\mathbf{x}*) = \mathbf{A}\mathbf{k}(\mathbf{X}, \mathbf{x}^*) + \mathbf{b}
:param X: input observations containing :math:`\mathbf{X}`
:type X: ndarray
:param output_dim: dimension of output.
:type output_dim: int
:param kernel: a GPy kernel, defaults to GPy.kern.rbf
:type kernel: GPy.kern.kern
"""
def __init__(self, X, output_dim=1, kernel=None):
Mapping.__init__(self, input_dim=X.shape[1], output_dim=output_dim)
if kernel is None:
kernel = GPy.kern.rbf(self.input_dim)
self.kern = kernel
self.X = X
self.num_data = X.shape[0]
self.num_params = self.output_dim*(self.num_data + 1)
self.A = np.array((self.num_data, self.output_dim))
self.bias = np.array(self.output_dim)
self.randomize()
self.name = 'kernel'
def _get_param_names(self):
return sum([['A_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.num_data)], []) + ['bias_%i' % d for d in range(self.output_dim)]
def _get_params(self):
return np.hstack((self.A.flatten(), self.bias))
def _set_params(self, x):
self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy()
self.bias = x[self.num_data*self.output_dim:].copy()
def randomize(self):
self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1)
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.num_data+1)
def f(self, X):
return np.dot(self.kern.K(X, self.X),self.A) + self.bias
def df_dtheta(self, dL_df, X):
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T
self._df_dbias = (dL_df.sum(0))
return np.hstack((self._df_dA.flatten(), self._df_dbias))
def df_dX(self, dL_df, X):
return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)

53
GPy/mappings/linear.py Normal file
View file

@ -0,0 +1,53 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.mapping import Mapping
class Linear(Mapping):
"""
Mapping based on a linear model.
.. math::
f(\mathbf{x}*) = \mathbf{W}\mathbf{x}^* + \mathbf{b}
:param X: input observations
:type X: ndarray
:param output_dim: dimension of output.
:type output_dim: int
"""
def __init__(self, input_dim=1, output_dim=1):
self.name = 'linear'
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.num_params = self.output_dim*(self.input_dim + 1)
self.W = np.array((self.input_dim, self.output_dim))
self.bias = np.array(self.output_dim)
self.randomize()
def _get_param_names(self):
return sum([['W_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.input_dim)], []) + ['bias_%i' % d for d in range(self.output_dim)]
def _get_params(self):
return np.hstack((self.W.flatten(), self.bias))
def _set_params(self, x):
self.W = x[:self.input_dim * self.output_dim].reshape(self.input_dim, self.output_dim).copy()
self.bias = x[self.input_dim*self.output_dim:].copy()
def randomize(self):
self.W = np.random.randn(self.input_dim, self.output_dim)/np.sqrt(self.input_dim + 1)
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.input_dim + 1)
def f(self, X):
return np.dot(X,self.W) + self.bias
def df_dtheta(self, dL_df, X):
self._df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
self._df_dbias = (dL_df.sum(0))
return np.hstack((self._df_dW.flatten(), self._df_dbias))
def df_dX(self, dL_df, X):
return (dL_df[:, None, :]*self.W[None, :, :]).sum(2)

127
GPy/mappings/mlp.py Normal file
View file

@ -0,0 +1,127 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.mapping import Mapping
class MLP(Mapping):
"""
Mapping based on a multi-layer perceptron neural network model.
.. math::
f(\mathbf{x}*) = \mathbf{W}^0\boldsymbol{\phi}(\mathbf{W}^1\mathbf{x}+\mathb{b}^1)^* + \mathbf{b}^0
where
..math::
\phi(\cdot) = \text{tanh}(\cdot)
:param X: input observations
:type X: ndarray
:param output_dim: dimension of output.
:type output_dim: int
:param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it.
:type hidden_dim: int or list of ints.
"""
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.name = 'mlp'
if isinstance(hidden_dim, int):
hidden_dim = [hidden_dim]
self.hidden_dim = hidden_dim
self.activation = [None]*len(self.hidden_dim)
self.W = []
self._dL_dW = []
self.bias = []
self._dL_dbias = []
self.W.append(np.zeros((self.input_dim, self.hidden_dim[0])))
self._dL_dW.append(np.zeros((self.input_dim, self.hidden_dim[0])))
self.bias.append(np.zeros(self.hidden_dim[0]))
self._dL_dbias.append(np.zeros(self.hidden_dim[0]))
self.num_params = self.hidden_dim[0]*(self.input_dim+1)
for h1, h0 in zip(hidden_dim[1:], hidden_dim[0:-1]):
self.W.append(np.zeros((h0, h1)))
self._dL_dW.append(np.zeros((h0, h1)))
self.bias.append(np.zeros(h1))
self._dL_dbias.append(np.zeros(h1))
self.num_params += h1*(h0+1)
self.W.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
self._dL_dW.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
self.bias.append(np.zeros(self.output_dim))
self._dL_dbias.append(np.zeros(self.output_dim))
self.num_params += self.output_dim*(self.hidden_dim[-1]+1)
self.randomize()
def _get_param_names(self):
return sum([['W%i_%i_%i' % (i, n, d) for n in range(self.W[i].shape[0]) for d in range(self.W[i].shape[1])] + ['bias%i_%i' % (i, d) for d in range(self.W[i].shape[1])] for i in range(len(self.W))], [])
def _get_params(self):
param = np.array([])
for W, bias in zip(self.W, self.bias):
param = np.hstack((param, W.flatten(), bias))
return param
def _set_params(self, x):
start = 0
for W, bias in zip(self.W, self.bias):
end = W.shape[0]*W.shape[1]+start
W[:] = x[start:end].reshape(W.shape[0], W.shape[1]).copy()
start = end
end = W.shape[1]+end
bias[:] = x[start:end].copy()
start = end
def randomize(self):
for W, bias in zip(self.W, self.bias):
W[:] = np.random.randn(W.shape[0], W.shape[1])/np.sqrt(W.shape[0]+1)
bias[:] = np.random.randn(W.shape[1])/np.sqrt(W.shape[0]+1)
def f(self, X):
self._f_computations(X)
return np.dot(np.tanh(self.activation[-1]), self.W[-1]) + self.bias[-1]
def _f_computations(self, X):
W = self.W[0]
bias = self.bias[0]
self.activation[0] = np.dot(X,W) + bias
for W, bias, index in zip(self.W[1:-1], self.bias[1:-1], range(1, len(self.activation))):
self.activation[index] = np.dot(np.tanh(self.activation[index-1]), W)+bias
def df_dtheta(self, dL_df, X):
self._df_computations(dL_df, X)
g = np.array([])
for gW, gbias in zip(self._dL_dW, self._dL_dbias):
g = np.hstack((g, gW.flatten(), gbias))
return g
def _df_computations(self, dL_df, X):
self._f_computations(X)
a0 = self.activation[-1]
W = self.W[-1]
self._dL_dW[-1] = (dL_df[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
dL_dta=(dL_df[:, None, :]*W[None, :, :]).sum(2)
self._dL_dbias[-1] = (dL_df.sum(0))
for dL_dW, dL_dbias, W, bias, a0, a1 in zip(self._dL_dW[-2:0:-1],
self._dL_dbias[-2:0:-1],
self.W[-2:0:-1],
self.bias[-2:0:-1],
self.activation[-2::-1],
self.activation[-1:0:-1]):
ta = np.tanh(a1)
dL_da = dL_dta*(1-ta*ta)
dL_dW[:] = (dL_da[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
dL_dbias[:] = (dL_da.sum(0))
dL_dta = (dL_da[:, None, :]*W[None, :, :]).sum(2)
ta = np.tanh(self.activation[0])
dL_da = dL_dta*(1-ta*ta)
W = self.W[0]
self._dL_dW[0] = (dL_da[:, :, None]*X[:, None, :]).sum(0).T
self._dL_dbias[0] = (dL_da.sum(0))
self._dL_dX = (dL_da[:, None, :]*W[None, :, :]).sum(2)
def df_dX(self, dL_df, X):
self._df_computations(dL_df, X)
return self._dL_dX

16
GPy/models/__init__.py Normal file
View file

@ -0,0 +1,16 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from gp_regression import GPRegression
from gp_classification import GPClassification
from sparse_gp_regression import SparseGPRegression
from svigp_regression import SVIGPRegression
from sparse_gp_classification import SparseGPClassification
from fitc_classification import FITCClassification
from gplvm import GPLVM
from bcgplvm import BCGPLVM
from sparse_gplvm import SparseGPLVM
from warped_gp import WarpedGP
from bayesian_gplvm import BayesianGPLVM
from mrd import MRD
from gradient_checker import GradientChecker

View file

@ -0,0 +1,334 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import SparseGP
from ..likelihoods import Gaussian
from .. import kern
import itertools
from matplotlib.colors import colorConverter
from GPy.inference.optimization import SCG
from GPy.util import plot_latent
from GPy.models.gplvm import GPLVM
from GPy.util.plot_latent import most_significant_input_dimensions
from matplotlib import pyplot
class BayesianGPLVM(SparseGP, GPLVM):
"""
Bayesian Gaussian Process Latent Variable Model
:param Y: observed data (np.ndarray) or GPy.likelihood
:type Y: np.ndarray| GPy.likelihood instance
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, **kwargs):
if type(likelihood_or_Y) is np.ndarray:
likelihood = Gaussian(likelihood_or_Y)
else:
likelihood = likelihood_or_Y
if X == None:
X = self.initialise_latent(init, input_dim, likelihood.Y)
self.init = init
if X_variance is None:
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
if Z is None:
Z = np.random.permutation(X.copy())[:num_inducing]
assert Z.shape[1] == X.shape[1]
if kernel is None:
kernel = kern.rbf(input_dim) # + kern.white(input_dim)
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
self.ensure_default_constraints()
def getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return SparseGP.getstate(self) + [self.init]
def setstate(self, state):
self._const_jitter = None
self.init = state.pop()
SparseGP.setstate(self, state)
def _get_param_names(self):
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
return (X_names + S_names + SparseGP._get_param_names(self))
def _get_print_names(self):
return SparseGP._get_print_names(self)
def _get_params(self):
"""
Horizontally stacks the parameters in order to present them to the optimizer.
The resulting 1-input_dim array has this structure:
===============================================================
| mu | S | Z | theta | beta |
===============================================================
"""
x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self)))
return x
def _set_params(self, x, save_old=True, save_count=0):
N, input_dim = self.num_data, self.input_dim
self.X = x[:self.X.size].reshape(N, input_dim).copy()
self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
SparseGP._set_params(self, x[(2 * N * input_dim):])
def dKL_dmuS(self):
dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
dKL_dmu = self.X
return dKL_dmu, dKL_dS
def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
return dL_dmu, dL_dS
def KL_divergence(self):
var_mean = np.square(self.X).sum()
var_S = np.sum(self.X_variance - np.log(self.X_variance))
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
def log_likelihood(self):
ll = SparseGP.log_likelihood(self)
kl = self.KL_divergence()
return ll - kl
def _log_likelihood_gradients(self):
dKL_dmu, dKL_dS = self.dKL_dmuS()
dL_dmu, dL_dS = self.dL_dmuS()
d_dmu = (dL_dmu - dKL_dmu).flatten()
d_dS = (dL_dS - dKL_dS).flatten()
self.dbound_dmuS = np.hstack((d_dmu, d_dS))
self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self)
return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
def plot_latent(self, plot_inducing=True, *args, **kwargs):
return plot_latent.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs)
def do_test_latents(self, Y):
"""
Compute the latent representation for a set of new points Y
Notes:
This will only work with a univariate Gaussian likelihood (for now)
"""
assert not self.likelihood.is_heteroscedastic
N_test = Y.shape[0]
input_dim = self.Z.shape[1]
means = np.zeros((N_test, input_dim))
covars = np.zeros((N_test, input_dim))
dpsi0 = -0.5 * self.input_dim * self.likelihood.precision
dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
V = self.likelihood.precision * Y
dpsi1 = np.dot(self.Cpsi1V, V.T)
start = np.zeros(self.input_dim * 2)
for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
args = (self.kern, self.Z, dpsi0, dpsi1_n, dpsi2)
xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False)
mu, log_S = xopt.reshape(2, 1, -1)
means[n] = mu[0].copy()
covars[n] = np.exp(log_S[0]).copy()
return means, covars
def dmu_dX(self, Xnew):
"""
Calculate the gradient of the prediction at Xnew w.r.t Xnew.
"""
dmu_dX = np.zeros_like(Xnew)
for i in range(self.Z.shape[0]):
dmu_dX += self.kern.dK_dX(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
return dmu_dX
def dmu_dXnew(self, Xnew):
"""
Individual gradient of prediction at Xnew w.r.t. each sample in Xnew
"""
dK_dX = np.zeros((Xnew.shape[0], self.num_inducing))
ones = np.ones((1, 1))
for i in range(self.Z.shape[0]):
dK_dX[:, i] = self.kern.dK_dX(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
return np.dot(dK_dX, self.Cpsi1Vf)
def plot_steepest_gradient_map(self, fignum=None, ax=None, which_indices=None, labels=None, data_labels=None, data_marker='o', data_s=40, resolution=20, aspect='auto', updates=False, ** kwargs):
input_1, input_2 = significant_dims = most_significant_input_dimensions(self, which_indices)
X = np.zeros((resolution ** 2, self.input_dim))
indices = np.r_[:X.shape[0]]
if labels is None:
labels = range(self.output_dim)
def plot_function(x):
X[:, significant_dims] = x
dmu_dX = self.dmu_dXnew(X)
argmax = np.argmax(dmu_dX, 1)
return dmu_dX[indices, argmax], np.array(labels)[argmax]
if ax is None:
fig = pyplot.figure(num=fignum)
ax = fig.add_subplot(111)
if data_labels is None:
data_labels = np.ones(self.num_data)
ulabels = []
for lab in data_labels:
if not lab in ulabels:
ulabels.append(lab)
marker = itertools.cycle(list(data_marker))
from GPy.util import Tango
for i, ul in enumerate(ulabels):
if type(ul) is np.string_:
this_label = ul
elif type(ul) is np.int64:
this_label = 'class %i' % ul
else:
this_label = 'class %i' % i
m = marker.next()
index = np.nonzero(data_labels == ul)[0]
x = self.X[index, input_1]
y = self.X[index, input_2]
ax.scatter(x, y, marker=m, s=data_s, color=Tango.nextMedium(), label=this_label)
ax.set_xlabel('latent dimension %i' % input_1)
ax.set_ylabel('latent dimension %i' % input_2)
from matplotlib.cm import get_cmap
from GPy.util.latent_space_visualizations.controllers.imshow_controller import ImAnnotateController
controller = ImAnnotateController(ax,
plot_function,
tuple(self.X.min(0)[:, significant_dims]) + tuple(self.X.max(0)[:, significant_dims]),
resolution=resolution,
aspect=aspect,
cmap=get_cmap('jet'),
**kwargs)
ax.legend()
ax.figure.tight_layout()
if updates:
pyplot.show()
clear = raw_input('Enter to continue')
if clear.lower() in 'yes' or clear == '':
controller.deactivate()
return controller.view
def plot_X_1d(self, fignum=None, ax=None, colors=None):
"""
Plot latent space X in 1D:
-if fig is given, create input_dim subplots in fig and plot in these
-if ax is given plot input_dim 1D latent space plots of X into each `axis`
-if neither fig nor ax is given create a figure with fignum and plot in there
colors:
colors of different latent space dimensions input_dim
"""
import pylab
if ax is None:
fig = pylab.figure(num=fignum, figsize=(8, min(12, (2 * self.X.shape[1]))))
if colors is None:
colors = pylab.gca()._get_lines.color_cycle
pylab.clf()
else:
colors = iter(colors)
plots = []
x = np.arange(self.X.shape[0])
for i in range(self.X.shape[1]):
if ax is None:
a = fig.add_subplot(self.X.shape[1], 1, i + 1)
elif isinstance(ax, (tuple, list)):
a = ax[i]
else:
raise ValueError("Need one ax per latent dimnesion input_dim")
a.plot(self.X, c='k', alpha=.3)
plots.extend(a.plot(x, self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
a.fill_between(x,
self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
facecolor=plots[-1].get_color(),
alpha=.3)
a.legend(borderaxespad=0.)
a.set_xlim(x.min(), x.max())
if i < self.X.shape[1] - 1:
a.set_xticklabels('')
pylab.draw()
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
return fig
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
objective function for fitting the latent variables for test points
(negative log-likelihood: should be minimised!)
"""
mu, log_S = mu_S.reshape(2, 1, -1)
S = np.exp(log_S)
psi0 = kern.psi0(Z, mu, S)
psi1 = kern.psi1(Z, mu, S)
psi2 = kern.psi2(Z, mu, S)
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
dmu = mu0 + mu1 + mu2 - mu
# dS = S0 + S1 + S2 -0.5 + .5/S
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))
def latent_cost(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
objective function for fitting the latent variables (negative log-likelihood: should be minimised!)
This is the same as latent_cost_and_grad but only for the objective
"""
mu, log_S = mu_S.reshape(2, 1, -1)
S = np.exp(log_S)
psi0 = kern.psi0(Z, mu, S)
psi1 = kern.psi1(Z, mu, S)
psi2 = kern.psi2(Z, mu, S)
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
return -float(lik)
def latent_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
This is the same as latent_cost_and_grad but only for the grad
"""
mu, log_S = mu_S.reshape(2, 1, -1)
S = np.exp(log_S)
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
dmu = mu0 + mu1 + mu2 - mu
# dS = S0 + S1 + S2 -0.5 + .5/S
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
return -np.hstack((dmu.flatten(), dlnS.flatten()))

50
GPy/models/bcgplvm.py Normal file
View file

@ -0,0 +1,50 @@
# ## Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
from ..core import GP
from ..models import GPLVM
from ..mappings import *
class BCGPLVM(GPLVM):
"""
Back constrained Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
:param mapping: mapping for back constraint
:type mapping: GPy.core.Mapping object
"""
def __init__(self, Y, input_dim, init='PCA', X=None, kernel=None, normalize_Y=False, mapping=None):
if mapping is None:
mapping = Kernel(X=Y, output_dim=input_dim)
self.mapping = mapping
GPLVM.__init__(self, Y, input_dim, init, X, kernel, normalize_Y)
self.X = self.mapping.f(self.likelihood.Y)
def _get_param_names(self):
return self.mapping._get_param_names() + GP._get_param_names(self)
def _get_params(self):
return np.hstack((self.mapping._get_params(), GP._get_params(self)))
def _set_params(self, x):
self.mapping._set_params(x[:self.mapping.num_params])
self.X = self.mapping.f(self.likelihood.Y)
GP._set_params(self, x[self.mapping.num_params:])
def _log_likelihood_gradients(self):
dL_df = 2.*self.kern.dK_dX(self.dL_dK, self.X)
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))

View file

@ -0,0 +1,47 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import FITC
from .. import likelihoods
from .. import kern
from ..likelihoods import likelihood
class FITCClassification(FITC):
"""
FITC approximation for classification
This is a thin wrapper around the FITC class, with a set of sensible defaults
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to Binomial with probit link function
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:rtype: model object
"""
def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10):
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
if likelihood is None:
distribution = likelihoods.likelihood_functions.Binomial()
likelihood = likelihoods.EP(Y, distribution)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):
raise Warning, 'likelihood.data and Y are different.'
if Z is None:
i = np.random.permutation(X.shape[0])[:num_inducing]
Z = X[i].copy()
else:
assert Z.shape[1]==X.shape[1]
FITC.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
self.ensure_default_constraints()

View file

@ -0,0 +1,41 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
class GPClassification(GP):
"""
Gaussian Process classification
This is a thin wrapper around the models.GP class, with a set of sensible defaults
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
:param kernel: a GPy kernel, defaults to rbf
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self,X,Y=None,likelihood=None,kernel=None,normalize_X=False,normalize_Y=False):
if kernel is None:
kernel = kern.rbf(X.shape[1])
if likelihood is None:
distribution = likelihoods.likelihood_functions.Binomial()
likelihood = likelihoods.EP(Y, distribution)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):
raise Warning, 'likelihood.data and Y are different.'
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self.ensure_default_constraints()

View file

@ -0,0 +1,43 @@
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
class GPRegression(GP):
"""
Gaussian Process model for regression
This is a thin wrapper around the models.GP class, with a set of sensible defaults
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False):
if kernel is None:
kernel = kern.rbf(X.shape[1])
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self.ensure_default_constraints()
def getstate(self):
return GP.getstate(self)
def setstate(self, state):
return GP.setstate(self, state)
pass

93
GPy/models/gplvm.py Normal file
View file

@ -0,0 +1,93 @@
# ## Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
from .. import kern
from ..core import Model
from ..util.linalg import pdinv, PCA
from ..core.priors import Gaussian as Gaussian_prior
from ..core import GP
from ..likelihoods import Gaussian
from .. import util
from GPy.util import plot_latent
class GPLVM(GP):
"""
Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, input_dim, init='PCA', X=None, kernel=None, normalize_Y=False):
if X is None:
X = self.initialise_latent(init, input_dim, Y)
if kernel is None:
kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2))
likelihood = Gaussian(Y, normalize=normalize_Y, variance=np.exp(-2.))
GP.__init__(self, X, likelihood, kernel, normalize_X=False)
self.set_prior('.*X', Gaussian_prior(0, 1))
self.ensure_default_constraints()
def initialise_latent(self, init, input_dim, Y):
Xr = np.random.randn(Y.shape[0], input_dim)
if init == 'PCA':
PC = PCA(Y, input_dim)[0]
Xr[:PC.shape[0], :PC.shape[1]] = PC
return Xr
def getstate(self):
return GP.getstate(self)
def setstate(self, state):
GP.setstate(self, state)
def _get_param_names(self):
return sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + GP._get_param_names(self)
def _get_params(self):
return np.hstack((self.X.flatten(), GP._get_params(self)))
def _set_params(self, x):
self.X = x[:self.num_data * self.input_dim].reshape(self.num_data, self.input_dim).copy()
GP._set_params(self, x[self.X.size:])
def _log_likelihood_gradients(self):
dL_dX = 2.*self.kern.dK_dX(self.dL_dK, self.X)
return np.hstack((dL_dX.flatten(), GP._log_likelihood_gradients(self)))
def jacobian(self,X):
target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
for i in range(self.output_dim):
target[:,:,i]=self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
return target
def magnification(self,X):
target=np.zeros(X.shape[0])
J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
J=self.jacobian(X)
for i in range(X.shape[0]):
target[i]=np.sqrt(pb.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
return target
def plot(self):
assert self.likelihood.Y.shape[1] == 2
pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet)
Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
mu, var, upper, lower = self.predict(Xnew)
pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
def plot_latent(self, *args, **kwargs):
return util.plot_latent.plot_latent(self, *args, **kwargs)
def plot_magnification(self, *args, **kwargs):
return util.plot_latent.plot_magnification(self, *args, **kwargs)

View file

@ -0,0 +1,114 @@
'''
Created on 17 Jul 2013
@author: maxz
'''
from GPy.core.model import Model
import itertools
import numpy
def get_shape(x):
if isinstance(x, numpy.ndarray):
return x.shape
return ()
def at_least_one_element(x):
if isinstance(x, (list, tuple)):
return x
return [x]
def flatten_if_needed(x):
return numpy.atleast_1d(x).flatten()
class GradientChecker(Model):
def __init__(self, f, df, x0, names=None, *args, **kwargs):
"""
:param f: Function to check gradient for
:param df: Gradient of function to check
:param x0:
Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
Can be a list of arrays, if takes a list of arrays. This list will be passed
to f and df in the same order as given here.
If only one argument, make sure not to pass a list!!!
:type x0: [array-like] | array-like | float | int
:param names:
Names to print, when performing gradcheck. If a list was passed to x0
a list of names with the same length is expected.
:param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
Examples:
---------
from GPy.models import GradientChecker
N, M, Q = 10, 5, 3
Sinusoid:
X = numpy.random.rand(N, Q)
grad = GradientChecker(numpy.sin,numpy.cos,X,'x')
grad.checkgrad(verbose=1)
Using GPy:
X, Z = numpy.random.randn(N,Q), numpy.random.randn(M,Q)
kern = GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True)
grad = GradientChecker(kern.K,
lambda x: 2*kern.dK_dX(numpy.ones((1,1)), x),
x0 = X.copy(),
names='X')
grad.checkgrad(verbose=1)
grad.randomize()
grad.checkgrad(verbose=1)
"""
Model.__init__(self)
if isinstance(x0, (list, tuple)) and names is None:
self.shapes = [get_shape(xi) for xi in x0]
self.names = ['X{i}'.format(i=i) for i in range(len(x0))]
elif isinstance(x0, (list, tuple)) and names is not None:
self.shapes = [get_shape(xi) for xi in x0]
self.names = names
elif names is None:
self.names = ['X']
self.shapes = [get_shape(x0)]
else:
self.names = names
self.shapes = [get_shape(x0)]
for name, xi in zip(self.names, at_least_one_element(x0)):
self.__setattr__(name, xi)
# self._param_names = []
# for name, shape in zip(self.names, self.shapes):
# self._param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
self.args = args
self.kwargs = kwargs
self.f = f
self.df = df
def _get_x(self):
if len(self.names) > 1:
return [self.__getattribute__(name) for name in self.names] + list(self.args)
return [self.__getattribute__(self.names[0])] + list(self.args)
def log_likelihood(self):
return float(numpy.sum(self.f(*self._get_x(), **self.kwargs)))
def _log_likelihood_gradients(self):
return numpy.atleast_1d(self.df(*self._get_x(), **self.kwargs)).flatten()
def _get_params(self):
return numpy.atleast_1d(numpy.hstack(map(lambda name: flatten_if_needed(self.__getattribute__(name)), self.names)))
def _set_params(self, x):
current_index = 0
for name, shape in zip(self.names, self.shapes):
current_size = numpy.prod(shape)
self.__setattr__(name, x[current_index:current_index + current_size].reshape(shape))
current_index += current_size
def _get_param_names(self):
_param_names = []
for name, shape in zip(self.names, self.shapes):
_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
return _param_names

367
GPy/models/mrd.py Normal file
View file

@ -0,0 +1,367 @@
'''
Created on 10 Apr 2013
@author: Max Zwiessele
'''
from GPy.core import Model
from GPy.core import SparseGP
from GPy.util.linalg import PCA
import numpy
import itertools
import pylab
from GPy.kern.kern import kern
from GPy.models.bayesian_gplvm import BayesianGPLVM
class MRD(Model):
"""
Do MRD on given Datasets in Ylist.
All Ys in likelihood_list are in [N x Dn], where Dn can be different per Yn,
N must be shared across datasets though.
:param likelihood_list: list of observed datasets (:py:class:`~GPy.likelihoods.gaussian.Gaussian` if not supplied directly)
:type likelihood_list: [:py:class:`~GPy.likelihoods.likelihood.likelihood` | :py:class:`ndarray`]
:param names: names for different gplvm models
:type names: [str]
:param input_dim: latent dimensionality
:type input_dim: int
:param initx: initialisation method for the latent space :
* 'concat' - PCA on concatenation of all datasets
* 'single' - Concatenation of PCA on datasets, respectively
* 'random' - Random draw from a normal
:type initx: ['concat'|'single'|'random']
:param initz: initialisation method for inducing inputs
:type initz: 'permute'|'random'
:param X: Initial latent space
:param X_variance: Initial latent space variance
:param Z: initial inducing inputs
:param num_inducing: number of inducing inputs to use
:param kernels: list of kernels or kernel shared for all BGPLVMS
:type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default)
"""
def __init__(self, likelihood_or_Y_list, input_dim, num_inducing=10, names=None,
kernels=None, initx='PCA',
initz='permute', _debug=False, **kw):
if names is None:
self.names = ["{}".format(i) for i in range(len(likelihood_or_Y_list))]
# sort out the kernels
if kernels is None:
kernels = [None] * len(likelihood_or_Y_list)
elif isinstance(kernels, kern):
kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))]
else:
assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output"
assert all([isinstance(k, kern) for k in kernels]), "invalid kernel object detected!"
assert not ('kernel' in kw), "pass kernels through `kernels` argument"
self.input_dim = input_dim
self._debug = _debug
self.num_inducing = num_inducing
self._init = True
X = self._init_X(initx, likelihood_or_Y_list)
Z = self._init_Z(initz, X)
self.num_inducing = Z.shape[0] # ensure M==N if M>N
self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, num_inducing=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
del self._init
self.gref = self.bgplvms[0]
nparams = numpy.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms])
self.nparams = nparams.cumsum()
self.num_data = self.gref.num_data
self.NQ = self.num_data * self.input_dim
self.MQ = self.num_inducing * self.input_dim
Model.__init__(self)
self.ensure_default_constraints()
def getstate(self):
return Model.getstate(self) + [self.names,
self.bgplvms,
self.gref,
self.nparams,
self.input_dim,
self.num_inducing,
self.num_data,
self.NQ,
self.MQ]
def setstate(self, state):
self.MQ = state.pop()
self.NQ = state.pop()
self.num_data = state.pop()
self.num_inducing = state.pop()
self.input_dim = state.pop()
self.nparams = state.pop()
self.gref = state.pop()
self.bgplvms = state.pop()
self.names = state.pop()
Model.setstate(self, state)
@property
def X(self):
return self.gref.X
@X.setter
def X(self, X):
try:
self.propagate_param(X=X)
except AttributeError:
if not self._init:
raise AttributeError("bgplvm list not initialized")
@property
def Z(self):
return self.gref.Z
@Z.setter
def Z(self, Z):
try:
self.propagate_param(Z=Z)
except AttributeError:
if not self._init:
raise AttributeError("bgplvm list not initialized")
@property
def X_variance(self):
return self.gref.X_variance
@X_variance.setter
def X_variance(self, X_var):
try:
self.propagate_param(X_variance=X_var)
except AttributeError:
if not self._init:
raise AttributeError("bgplvm list not initialized")
@property
def likelihood_list(self):
return [g.likelihood.Y for g in self.bgplvms]
@likelihood_list.setter
def likelihood_list(self, likelihood_list):
for g, Y in itertools.izip(self.bgplvms, likelihood_list):
g.likelihood.Y = Y
@property
def auto_scale_factor(self):
"""
set auto_scale_factor for all gplvms
:param b: auto_scale_factor
:type b:
"""
return self.gref.auto_scale_factor
@auto_scale_factor.setter
def auto_scale_factor(self, b):
self.propagate_param(auto_scale_factor=b)
def propagate_param(self, **kwargs):
for key, val in kwargs.iteritems():
for g in self.bgplvms:
g.__setattr__(key, val)
def randomize(self, initx='concat', initz='permute', *args, **kw):
super(MRD, self).randomize(*args, **kw)
self._init_X(initx, self.likelihood_list)
self._init_Z(initz, self.X)
def _get_latent_param_names(self):
n1 = self.gref._get_param_names()
n1var = n1[:self.NQ * 2 + self.MQ]
return n1var
def _get_kernel_names(self):
map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x),
itertools.izip(ns,
itertools.repeat(name)))
kernel_names = (map_names(SparseGP._get_param_names(g)[self.MQ:], n) for g, n in zip(self.bgplvms, self.names))
return kernel_names
def _get_param_names(self):
# X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
# S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
n1var = self._get_latent_param_names()
kernel_names = self._get_kernel_names()
return list(itertools.chain(n1var, *kernel_names))
def _get_print_names(self):
return list(itertools.chain(*self._get_kernel_names()))
def _get_params(self):
"""
return parameter list containing private and shared parameters as follows:
=================================================================
| mu | S | Z || theta1 | theta2 | .. | thetaN |
=================================================================
"""
X = self.gref.X.ravel()
X_var = self.gref.X_variance.ravel()
Z = self.gref.Z.ravel()
thetas = [SparseGP._get_params(g)[g.Z.size:] for g in self.bgplvms]
params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)])
return params
# def _set_var_params(self, g, X, X_var, Z):
# g.X = X.reshape(self.num_data, self.input_dim)
# g.X_variance = X_var.reshape(self.num_data, self.input_dim)
# g.Z = Z.reshape(self.num_inducing, self.input_dim)
#
# def _set_kern_params(self, g, p):
# g.kern._set_params(p[:g.kern.Nparam])
# g.likelihood._set_params(p[g.kern.Nparam:])
def _set_params(self, x):
start = 0; end = self.NQ
X = x[start:end]
start = end; end += start
X_var = x[start:end]
start = end; end += self.MQ
Z = x[start:end]
thetas = x[end:]
# set params for all:
for g, s, e in itertools.izip(self.bgplvms, self.nparams, self.nparams[1:]):
g._set_params(numpy.hstack([X, X_var, Z, thetas[s:e]]))
# self._set_var_params(g, X, X_var, Z)
# self._set_kern_params(g, thetas[s:e].copy())
# g._compute_kernel_matrices()
# if self.auto_scale_factor:
# g.scale_factor = numpy.sqrt(g.psi2.sum(0).mean() * g.likelihood.precision)
# # self.scale_factor = numpy.sqrt(self.psi2.sum(0).mean() * self.likelihood.precision)
# g._computations()
def update_likelihood_approximation(self): # TODO: object oriented vs script base
for bgplvm in self.bgplvms:
bgplvm.update_likelihood_approximation()
def log_likelihood(self):
ll = -self.gref.KL_divergence()
for g in self.bgplvms:
ll += SparseGP.log_likelihood(g)
return ll
def _log_likelihood_gradients(self):
dLdmu, dLdS = reduce(lambda a, b: [a[0] + b[0], a[1] + b[1]], (g.dL_dmuS() for g in self.bgplvms))
dKLmu, dKLdS = self.gref.dKL_dmuS()
dLdmu -= dKLmu
dLdS -= dKLdS
dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten()
dldzt1 = reduce(lambda a, b: a + b, (SparseGP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
return numpy.hstack((dLdmuS,
dldzt1,
numpy.hstack([numpy.hstack([g.dL_dtheta(),
g.likelihood._gradients(\
partial=g.partial_for_likelihood)]) \
for g in self.bgplvms])))
def _init_X(self, init='PCA', likelihood_list=None):
if likelihood_list is None:
likelihood_list = self.likelihood_list
Ylist = []
for likelihood_or_Y in likelihood_list:
if type(likelihood_or_Y) is numpy.ndarray:
Ylist.append(likelihood_or_Y)
else:
Ylist.append(likelihood_or_Y.Y)
del likelihood_list
if init in "PCA_concat":
X = PCA(numpy.hstack(Ylist), self.input_dim)[0]
elif init in "PCA_single":
X = numpy.zeros((Ylist[0].shape[0], self.input_dim))
for qs, Y in itertools.izip(numpy.array_split(numpy.arange(self.input_dim), len(Ylist)), Ylist):
X[:, qs] = PCA(Y, len(qs))[0]
else: # init == 'random':
X = numpy.random.randn(Ylist[0].shape[0], self.input_dim)
self.X = X
return X
def _init_Z(self, init="permute", X=None):
if X is None:
X = self.X
if init in "permute":
Z = numpy.random.permutation(X.copy())[:self.num_inducing]
elif init in "random":
Z = numpy.random.randn(self.num_inducing, self.input_dim) * X.var()
self.Z = Z
return Z
def _handle_plotting(self, fignum, axes, plotf, sharex=False, sharey=False):
if axes is None:
fig = pylab.figure(num=fignum)
sharex_ax = None
sharey_ax = None
for i, g in enumerate(self.bgplvms):
try:
if sharex:
sharex_ax = ax # @UndefinedVariable
sharex = False # dont set twice
if sharey:
sharey_ax = ax # @UndefinedVariable
sharey = False # dont set twice
except:
pass
if axes is None:
ax = fig.add_subplot(1, len(self.bgplvms), i + 1, sharex=sharex_ax, sharey=sharey_ax)
elif isinstance(axes, (tuple, list)):
ax = axes[i]
else:
raise ValueError("Need one axes per latent dimension input_dim")
plotf(i, g, ax)
if sharey_ax is not None:
pylab.setp(ax.get_yticklabels(), visible=False)
pylab.draw()
if axes is None:
fig.tight_layout()
return fig
else:
return pylab.gcf()
def plot_X_1d(self, *a, **kw):
return self.gref.plot_X_1d(*a, **kw)
def plot_X(self, fignum=None, ax=None):
fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X))
return fig
def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs):
fig = self._handle_plotting(fignum,
ax,
lambda i, g, ax: ax.imshow(g. predict(g.X)[0], **kwargs),
sharex=sharex, sharey=sharey)
return fig
def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs):
"""
:param:`titles` :
titles for axes of datasets
"""
if titles is None:
titles = [r'${}$'.format(name) for name in self.names]
ymax = reduce(max, [numpy.ceil(max(g.input_sensitivity())) for g in self.bgplvms])
def plotf(i, g, ax):
ax.set_ylim([0,ymax])
g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey)
return fig
def plot_latent(self, fignum=None, ax=None, *args, **kwargs):
fig = self.gref.plot_latent(fignum=fignum, ax=ax, *args, **kwargs) # self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
return fig
def _debug_plot(self):
self.plot_X_1d()
fig = pylab.figure("MRD DEBUG PLOT", figsize=(4 * len(self.bgplvms), 9))
fig.clf()
axes = [fig.add_subplot(3, len(self.bgplvms), i + 1) for i in range(len(self.bgplvms))]
self.plot_X(ax=axes)
axes = [fig.add_subplot(3, len(self.bgplvms), i + len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
self.plot_latent(ax=axes)
axes = [fig.add_subplot(3, len(self.bgplvms), i + 2 * len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
self.plot_scales(ax=axes)
pylab.draw()
fig.tight_layout()

View file

@ -0,0 +1,56 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import SparseGP
from .. import likelihoods
from .. import kern
from ..likelihoods import likelihood
class SparseGPClassification(SparseGP):
"""
sparse Gaussian Process model for classification
This is a thin wrapper around the sparse_GP class, with a set of sensible defaults
:param X: input observations
:param Y: observed values
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:rtype: model object
"""
def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10):
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3)
if likelihood is None:
distribution = likelihoods.likelihood_functions.Binomial()
likelihood = likelihoods.EP(Y, distribution)
elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()):
raise Warning, 'likelihood.data and Y are different.'
if Z is None:
i = np.random.permutation(X.shape[0])[:num_inducing]
Z = X[i].copy()
else:
assert Z.shape[1] == X.shape[1]
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
self.ensure_default_constraints()
def getstate(self):
return SparseGP.getstate(self)
def setstate(self, state):
return SparseGP.setstate(self, state)
pass

View file

@ -0,0 +1,55 @@
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import SparseGP
from .. import likelihoods
from .. import kern
class SparseGPRegression(SparseGP):
"""
Gaussian Process model for regression
This is a thin wrapper around the SparseGP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:rtype: model object
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10, X_variance=None):
# kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.rbf(X.shape[1]) # + kern.white(X.shape[1], 1e-3)
# Z defaults to a subset of the data
if Z is None:
i = np.random.permutation(X.shape[0])[:num_inducing]
Z = X[i].copy()
else:
assert Z.shape[1] == X.shape[1]
# likelihood defaults to Gaussian
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance)
self.ensure_default_constraints()
pass
def getstate(self):
return SparseGP.getstate(self)
def setstate(self, state):
return SparseGP.setstate(self, state)
pass

View file

@ -0,0 +1,70 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
from GPy.models.sparse_gp_regression import SparseGPRegression
from GPy.models.gplvm import GPLVM
# from .. import kern
# from ..core import model
# from ..util.linalg import pdinv, PCA
class SparseGPLVM(SparseGPRegression, GPLVM):
"""
Sparse Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, input_dim, kernel=None, init='PCA', num_inducing=10):
X = self.initialise_latent(init, input_dim, Y)
SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
self.ensure_default_constraints()
def getstate(self):
return SparseGPRegression.getstate(self)
def setstate(self, state):
return SparseGPRegression.setstate(self, state)
def _get_param_names(self):
return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
+ SparseGPRegression._get_param_names(self))
def _get_params(self):
return np.hstack((self.X.flatten(), SparseGPRegression._get_params(self)))
def _set_params(self, x):
self.X = x[:self.X.size].reshape(self.num_data, self.input_dim).copy()
SparseGPRegression._set_params(self, x[self.X.size:])
def log_likelihood(self):
return SparseGPRegression.log_likelihood(self)
def dL_dX(self):
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X)
dL_dX += self.kern.dK_dX(self.dL_dpsi1, self.X, self.Z)
return dL_dX
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dX().flatten(), SparseGPRegression._log_likelihood_gradients(self)))
def plot(self):
GPLVM.plot(self)
# passing Z without a small amout of jitter will induce the white kernel where we don;t want it!
mu, var, upper, lower = SparseGPRegression.predict(self, self.Z + np.random.randn(*self.Z.shape) * 0.0001)
pb.plot(mu[:, 0] , mu[:, 1], 'ko')
def plot_latent(self, *args, **kwargs):
input_1, input_2 = GPLVM.plot_latent(*args, **kwargs)
pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w')

View file

@ -0,0 +1,52 @@
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import SVIGP
from .. import likelihoods
from .. import kern
class SVIGPRegression(SVIGP):
"""
Gaussian Process model for regression
This is a thin wrapper around the SVIGP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:rtype: model object
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, q_u=None, batchsize=10):
# kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.rbf(X.shape[1], variance=1., lengthscale=4.) + kern.white(X.shape[1], 1e-3)
# Z defaults to a subset of the data
if Z is None:
i = np.random.permutation(X.shape[0])[:num_inducing]
Z = X[i].copy()
else:
assert Z.shape[1] == X.shape[1]
# likelihood defaults to Gaussian
likelihood = likelihoods.Gaussian(Y, normalize=False)
SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
self.load_batch()
def getstate(self):
return GPBase.getstate(self)
def setstate(self, state):
return GPBase.setstate(self, state)

107
GPy/models/warped_gp.py Normal file
View file

@ -0,0 +1,107 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..util.warping_functions import *
from ..core import GP
from .. import likelihoods
from GPy.util.warping_functions import TanhWarpingFunction_d
from GPy import kern
class WarpedGP(GP):
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalize_X=False, normalize_Y=False):
if kernel is None:
kernel = kern.rbf(X.shape[1])
if warping_function == None:
self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1,) * 1)
self.scale_data = False
if self.scale_data:
Y = self._scale_data(Y)
self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy()
self.predict_in_warped_space = False
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params())
def getstate(self):
return GP.getstate(self)
def setstate(self, state):
return GP.setstate(self, state)
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y - self._Ymin) / (self._Ymax - self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters]
Y = self.transform_data()
self.likelihood.set_data(Y)
GP._set_params(self, x[self.warping_function.num_parameters:].copy())
def _get_params(self):
return np.hstack((self.warping_params.flatten().copy(), GP._get_params(self).copy()))
def _get_param_names(self):
warping_names = self.warping_function._get_param_names()
param_names = GP._get_param_names(self)
return warping_names + param_names
def transform_data(self):
Y = self.warping_function.f(self.Y_untransformed.copy(), self.warping_params).copy()
return Y
def log_likelihood(self):
ll = GP.log_likelihood(self)
jacobian = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
return ll + np.log(jacobian).sum()
def _log_likelihood_gradients(self):
ll_grads = GP._log_likelihood_gradients(self)
alpha = np.dot(self.Ki, self.likelihood.Y.flatten())
warping_grads = self.warping_function_gradients(alpha)
warping_grads = np.append(warping_grads[:, :-1].flatten(), warping_grads[0, -1])
return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
def warping_function_gradients(self, Kiy):
grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params,
return_covar_chain=True)
djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
return -dquad_dpsi + djac_dpsi
def plot_warping(self):
self.warping_function.plot(self.warping_params, self.Y_untransformed.min(), self.Y_untransformed.max())
def predict(self, Xnew, which_parts='all', full_cov=False, pred_init=None):
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = GP._raw_predict(self, Xnew, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
if self.predict_in_warped_space:
mean = self.warping_function.f_inv(mean, self.warping_params, y=pred_init)
var = self.warping_function.f_inv(var, self.warping_params)
if self.scale_data:
mean = self._unscale_data(mean)
return mean, var, _025pm, _975pm

80
GPy/notes.txt Normal file
View file

@ -0,0 +1,80 @@
Prod.py kernel could also take a list of kernels rather than two arguments for kernels.
transformations.py should have limits on what is fed into exp() particularly for the negative log logistic (done -neil).
Load in a model with mlp kernel, plot it, change a parameter, plot it again. It doesn't update the plot.
Tests for kernels which work directly on the kernel implementation (not through GP).
Should stationary covariances have their own kernpart type, I think so, also inner product kernels. That way the caching so carefully constructed for RBF or linear could be shared.
Where do we declare default kernel parameters. In constructors.py or in the definition file for the kernel?
When printing to stdout, can we check that our approach is also working nicely for the ipython notebook? I like the way our optimization ticks over, but at the moment this doesn't seem to work in the ipython notebook, it would be nice if it did. My problems may be due to using ipython 0.12, I've had a poke around at fixing this and I can't do it for 0.12.
When we print a model should we also include information such as number of inputs and number of outputs?
Let's not use N for giving the number of data in the model. When it pops up as a help tip it's not as clear as num_samples or num_data. Prefer the second, but oddly I've been using first.
Loving the fact that the * has been overloaded on the kernels (oddly never thought to check this before). Although naming can be a bit confusing. Can we think how to deal with the names in a clearer way when we use a kernel like this one:
kern = GPy.kern.rbf(30)*(GPy.kern.mlp(30)+GPy.kern.poly(30, degree=5)) + GPy.kern.bias(30). There seems to be some tieing of parameters going on ... should there be? (you can try it as the kernel for the robot wireless model).
Can we comment up some of the list incomprehensions in hierarchical.py??
Need to tidy up classification.py,
many examples include help that doesn't apply
(it is suggested that you can try different approximation types)
Shall we overload the ** operator to have tensor products? (I've done this now we can see if we like it)
People aren't filling the doc strings in as they go *everyone* needs to get in the habit of this (and modifying them as they edit, or correcting them when there is a problem).
Need some nice way of explaining how to compile documentation and run the unit tests, could this be in a readme or FAQ somewhere? Maybe it's there already somewhere and I've missed it.
Shouldn't EP be in the inference package (not likelihoods)?
When using bfgs in ipython notebook, text appears in the original console, not in the notebook.
In sparse GPs wouldn't it be clearer to call Z inducing?
In coregionalisation matrix, setting the W to all ones will (surely?) ensure that symmetry isn't broken. Also, but allowing it to scale like that, the output variance increases as rank is increased (and if user sets rank to more than output dim they could get very different results).
We are inconsistent about our use of ise and ize e.g. optimize and normalize_X, but coregionalise, we should choose one and stick to it. Suggest -ize.
Exceptions: we need to provide a list of exceptions we throw and specify what is thrown where.
Why is it get_params() but it's getstate()? Should be get_state(). Why is it get_gradient instead of get_gradients? Need to be consistent!! Doesn't matter which way we choose as long as it's consistent.
In likelihood Nparams should be num_params
In likelihood N should be num_data
The Gaussian target in likelihood should be F What is V doing here?
Need to check for nan values in likelihoods. These should be treated as missing values. If the likelihood can't handle the missing value an error should be throw.
Sometimes you want to print kernpart objects, for diagnosis etc. This isn't possible currently.
Why do likelihoods still have YYT everywhere, didn't we agree to set observed data to Y and latent function to F?
For some reason a stub of _get_param_names(self) wasn't available in the Parameterized base class. Have put it in (is this right?)
Is there a quick FAQ or something on how to build the documentation? I did it once, but can't remember! Have started a FAQ.txt file where we can add this type of information.
Similar for the nosetests ... even ran them last week but can't remember the command!
Now added Gaussian priors to GPLVM latent variables by default. When running the GPy.examples.dimensionality_reduction.stick() example the print out from print model has the same value for the prior+likelihood as for the prior.
For the back constrained GP-LVM need priors to be on the Xs not on the model parameters (because they aren't parameters, they are constraints). Need to work out how to do this, perhaps by creating the full GP-LVM model then constraining around it, rather than overriding inside the GP-LVM model.
This code fails:
kern = GPy.kern.rbf(2)
GPy.kern.Kern_check_dK_dX(kern, X=np.random.randn(10, 2), X2=None).checkgrad(verbose=True)
because X2 is now equal to X, so there is a factor of 2 missing. Does this every come up? Yes, in the GP-LVM, (gplvm.py, line 64) where it is called with a corrective factor of 2! And on line 241 of sparse_gp where it is also called with a corrective factor of 2! In original matlab GPLVM, didn't allow gradients with respect to X alone, and multiplied by 2 in base code, but then add diagonal across those elements. This is missing in the new code.
In white.py, line 41, Need to check here if X and X2 refer to the same reference too ... becaue up the pipeline somewhere someone may have set X2=X when X2 arrived originally equal to None.

12
GPy/testing/__init__.py Normal file
View file

@ -0,0 +1,12 @@
"""
MaxZ
"""
import unittest
import sys
def deepTest(reason):
if reason:
return lambda x:x
return unittest.skip("Not deep scanning, enable deepscan by adding 'deep' argument")

View file

@ -0,0 +1,74 @@
# Copyright (c) 2012, Nicolo Fusi
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
from GPy.models.bayesian_gplvm import BayesianGPLVM
class BGPLVMTests(unittest.TestCase):
def test_bias_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_linear_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_bias_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
#@unittest.skip('psi2 cross terms are NotImplemented for this combination')
def test_linear_bias_kern(self):
N, num_inducing, input_dim, D = 30, 5, 4, 30
X = np.random.rand(N, input_dim)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

111
GPy/testing/cgd_tests.py Normal file
View file

@ -0,0 +1,111 @@
'''
Created on 26 Apr 2013
@author: maxz
'''
import unittest
import numpy
from GPy.inference.conjugate_gradient_descent import CGD, RUNNING
import pylab
from scipy.optimize.optimize import rosen, rosen_der
from GPy.inference.gradient_descent_update_rules import PolakRibiere
class Test(unittest.TestCase):
def testMinimizeSquare(self):
N = 100
A = numpy.random.rand(N) * numpy.eye(N)
b = numpy.random.rand(N) * 0
f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
df = lambda x: numpy.dot(A, x) - b
opt = CGD()
restarts = 10
for _ in range(restarts):
try:
x0 = numpy.random.randn(N) * 10
res = opt.opt(f, df, x0, messages=0, maxiter=1000, gtol=1e-15)
assert numpy.allclose(res[0], 0, atol=1e-5)
break
except AssertionError:
import ipdb;ipdb.set_trace()
# RESTART
pass
else:
raise AssertionError("Test failed for {} restarts".format(restarts))
def testRosen(self):
N = 20
f = rosen
df = rosen_der
opt = CGD()
restarts = 10
for _ in range(restarts):
try:
x0 = (numpy.random.randn(N) * .5) + numpy.ones(N)
res = opt.opt(f, df, x0, messages=0,
maxiter=1e3, gtol=1e-12)
assert numpy.allclose(res[0], 1, atol=.1)
break
except:
# RESTART
pass
else:
raise AssertionError("Test failed for {} restarts".format(restarts))
if __name__ == "__main__":
# import sys;sys.argv = ['',
# 'Test.testMinimizeSquare',
# 'Test.testRosen',
# ]
# unittest.main()
N = 2
A = numpy.random.rand(N) * numpy.eye(N)
b = numpy.random.rand(N) * 0
f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
df = lambda x: numpy.dot(A, x) - b
# f = rosen
# df = rosen_der
x0 = (numpy.random.randn(N) * .5) + numpy.ones(N)
print x0
opt = CGD()
pylab.ion()
fig = pylab.figure("cgd optimize")
if fig.axes:
ax = fig.axes[0]
ax.cla()
else:
ax = fig.add_subplot(111, projection='3d')
interpolation = 40
# x, y = numpy.linspace(.5, 1.5, interpolation)[:, None], numpy.linspace(.5, 1.5, interpolation)[:, None]
x, y = numpy.linspace(-1, 1, interpolation)[:, None], numpy.linspace(-1, 1, interpolation)[:, None]
X, Y = numpy.meshgrid(x, y)
fXY = numpy.array([f(numpy.array([x, y])) for x, y in zip(X.flatten(), Y.flatten())]).reshape(interpolation, interpolation)
ax.plot_wireframe(X, Y, fXY)
xopts = [x0.copy()]
optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='', color='r')
raw_input("enter to start optimize")
res = [0]
def callback(*r):
xopts.append(r[0].copy())
# time.sleep(.3)
optplts._verts3d = [numpy.array(xopts)[:, 0], numpy.array(xopts)[:, 1], [f(xs) for xs in xopts]]
fig.canvas.draw()
if r[-1] != RUNNING:
res[0] = r
res[0] = opt.opt(f, df, x0.copy(), callback, messages=True, maxiter=1000,
report_every=7, gtol=1e-12, update_rule=PolakRibiere)
pass

View file

@ -0,0 +1,99 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
import inspect
import pkgutil
import os
import random
from nose.tools import nottest
import sys
class ExamplesTests(unittest.TestCase):
def _checkgrad(self, Model):
self.assertTrue(Model.checkgrad())
def _model_instance(self, Model):
self.assertTrue(isinstance(Model, GPy.models))
"""
def model_instance_generator(model):
def check_model_returned(self):
self._model_instance(model)
return check_model_returned
def checkgrads_generator(model):
def model_checkgrads(self):
self._checkgrad(model)
return model_checkgrads
"""
def model_checkgrads(model):
model.randomize()
#assert model.checkgrad()
return model.checkgrad()
def model_instance(model):
#assert isinstance(model, GPy.core.model)
return isinstance(model, GPy.core.model)
@nottest
def test_models():
examples_path = os.path.dirname(GPy.examples.__file__)
# Load modules
failing_models = {}
for loader, module_name, is_pkg in pkgutil.iter_modules([examples_path]):
# Load examples
module_examples = loader.find_module(module_name).load_module(module_name)
print "MODULE", module_examples
print "Before"
print inspect.getmembers(module_examples, predicate=inspect.isfunction)
functions = [ func for func in inspect.getmembers(module_examples, predicate=inspect.isfunction) if func[0].startswith('_') is False ][::-1]
print "After"
print functions
for example in functions:
if example[0] in ['oil', 'silhouette', 'GPLVM_oil_100']:
print "SKIPPING"
continue
print "Testing example: ", example[0]
# Generate model
try:
model = example[1]()
except Exception as e:
failing_models[example[0]] = "Cannot make model: \n{e}".format(e=e)
else:
print model
model_checkgrads.description = 'test_checkgrads_%s' % example[0]
try:
if not model_checkgrads(model):
failing_models[model_checkgrads.description] = False
except Exception as e:
failing_models[model_checkgrads.description] = e
model_instance.description = 'test_instance_%s' % example[0]
try:
if not model_instance(model):
failing_models[model_instance.description] = False
except Exception as e:
failing_models[model_instance.description] = e
#yield model_checkgrads, model
#yield model_instance, model
print "Finished checking module {m}".format(m=module_name)
if len(failing_models.keys()) > 0:
print "Failing models: "
print failing_models
if len(failing_models.keys()) > 0:
print failing_models
raise Exception(failing_models)
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
# unittest.main()
test_models()

View file

@ -0,0 +1,44 @@
# Copyright (c) 2012, Nicolo Fusi
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class GPLVMTests(unittest.TestCase):
def test_bias_kern(self):
num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
X = np.random.rand(num_data, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.GPLVM(Y, input_dim, kernel = k)
m.randomize()
self.assertTrue(m.checkgrad())
def test_linear_kern(self):
num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
X = np.random.rand(num_data, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.GPLVM(Y, input_dim, kernel = k)
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_kern(self):
num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
X = np.random.rand(num_data, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = GPy.models.GPLVM(Y, input_dim, kernel = k)
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -0,0 +1,74 @@
# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class KernelTests(unittest.TestCase):
def test_kerneltie(self):
K = GPy.kern.rbf(5, ARD=True)
K.tie_params('.*[01]')
K.constrain_fixed('2')
X = np.random.rand(5,5)
Y = np.ones((5,1))
m = GPy.models.GPRegression(X,Y,K)
self.assertTrue(m.checkgrad())
def test_rbfkernel(self):
verbose = False
kern = GPy.kern.rbf(5)
self.assertTrue(GPy.kern.Kern_check_model(kern).is_positive_definite())
self.assertTrue(GPy.kern.Kern_check_dK_dtheta(kern).checkgrad(verbose=verbose))
self.assertTrue(GPy.kern.Kern_check_dKdiag_dtheta(kern).checkgrad(verbose=verbose))
self.assertTrue(GPy.kern.Kern_check_dK_dX(kern).checkgrad(verbose=verbose))
def test_gibbskernel(self):
verbose = False
kern = GPy.kern.gibbs(5, mapping=GPy.mappings.Linear(5, 1))
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_mlpkernel(self):
verbose = False
kern = GPy.kern.mlp(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_polykernel(self):
verbose = False
kern = GPy.kern.poly(5, degree=4)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_fixedkernel(self):
"""
Fixed effect kernel test
"""
X = np.random.rand(30, 4)
K = np.dot(X, X.T)
kernel = GPy.kern.fixed(4, K)
Y = np.ones((30,1))
m = GPy.models.GPRegression(X,Y,kernel=kernel)
self.assertTrue(m.checkgrad())
def test_coregionalisation(self):
X1 = np.random.rand(50,1)*8
X2 = np.random.rand(30,1)*5
index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
X = np.hstack((np.vstack((X1,X2)),index))
Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
Y2 = np.sin(X2) + np.random.randn(*X2.shape)*0.05 + 2.
Y = np.vstack((Y1,Y2))
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalise(2,1)
k = k1.prod(k2,tensor=True)
m = GPy.models.GPRegression(X,Y,kernel=k)
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -0,0 +1,34 @@
# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class MappingTests(unittest.TestCase):
def test_kernelmapping(self):
verbose = False
mapping = GPy.mappings.Kernel(np.random.rand(10, 3), 2)
self.assertTrue(GPy.core.mapping.Mapping_check_df_dtheta(mapping=mapping).checkgrad(verbose=verbose))
self.assertTrue(GPy.core.mapping.Mapping_check_df_dX(mapping=mapping).checkgrad(verbose=verbose))
def test_linearmapping(self):
verbose = False
mapping = GPy.mappings.Linear(3, 2)
self.assertTrue(GPy.core.Mapping_check_df_dtheta(mapping=mapping).checkgrad(verbose=verbose))
self.assertTrue(GPy.core.Mapping_check_df_dX(mapping=mapping).checkgrad(verbose=verbose))
def test_mlpmapping(self):
verbose = False
mapping = GPy.mappings.MLP(input_dim=2, hidden_dim=[3, 4, 8, 2], output_dim=2)
self.assertTrue(GPy.core.Mapping_check_df_dtheta(mapping=mapping).checkgrad(verbose=verbose))
self.assertTrue(GPy.core.Mapping_check_df_dX(mapping=mapping).checkgrad(verbose=verbose))
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

32
GPy/testing/mrd_tests.py Normal file
View file

@ -0,0 +1,32 @@
# Copyright (c) 2013, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
'''
Created on 10 Apr 2013
@author: maxz
'''
import unittest
import numpy as np
import GPy
class MRDTests(unittest.TestCase):
def test_gradients(self):
num_m = 3
N, num_inducing, input_dim, D = 20, 8, 6, 20
X = np.random.rand(N, input_dim)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
K = k.K(X)
Ylist = [np.random.multivariate_normal(np.zeros(N), K, input_dim).T for _ in range(num_m)]
likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist]
m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, num_inducing=num_inducing)
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -0,0 +1,58 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class PriorTests(unittest.TestCase):
def test_lognormal(self):
xmin, xmax = 1, 2.5*np.pi
b, C, SNR = 1, 0, 0.1
X = np.linspace(xmin, xmax, 500)
y = b*X + C + 1*np.sin(X)
y += 0.05*np.random.randn(len(X))
X, y = X[:, None], y[:, None]
m = GPy.models.GPRegression(X, y)
lognormal = GPy.priors.LogGaussian(1, 2)
m.set_prior('rbf', lognormal)
m.randomize()
self.assertTrue(m.checkgrad())
def test_Gamma(self):
xmin, xmax = 1, 2.5*np.pi
b, C, SNR = 1, 0, 0.1
X = np.linspace(xmin, xmax, 500)
y = b*X + C + 1*np.sin(X)
y += 0.05*np.random.randn(len(X))
X, y = X[:, None], y[:, None]
m = GPy.models.GPRegression(X, y)
Gamma = GPy.priors.Gamma(1, 1)
m.set_prior('rbf', Gamma)
m.randomize()
self.assertTrue(m.checkgrad())
def test_incompatibility(self):
xmin, xmax = 1, 2.5*np.pi
b, C, SNR = 1, 0, 0.1
X = np.linspace(xmin, xmax, 500)
y = b*X + C + 1*np.sin(X)
y += 0.05*np.random.randn(len(X))
X, y = X[:, None], y[:, None]
m = GPy.models.GPRegression(X, y)
gaussian = GPy.priors.Gaussian(1, 1)
success = False
# setting a Gaussian prior on non-negative parameters
# should raise an assertionerror.
try:
m.set_prior('rbf', gaussian)
except AssertionError:
success = True
self.assertTrue(success)
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -0,0 +1,141 @@
'''
Created on 26 Apr 2013
@author: maxz
'''
import unittest
import GPy
import numpy as np
from GPy import testing
import sys
import numpy
from GPy.kern.parts.rbf import RBF
from GPy.kern.parts.linear import Linear
from copy import deepcopy
__test__ = lambda: 'deep' in sys.argv
# np.random.seed(0)
def ard(p):
try:
if p.ARD:
return "ARD"
except:
pass
return ""
@testing.deepTest(__test__())
class Test(unittest.TestCase):
input_dim = 9
num_inducing = 4
N = 3
Nsamples = 5e6
def setUp(self):
i_s_dim_list = [2,4,3]
indices = numpy.cumsum(i_s_dim_list).tolist()
input_slices = [slice(a,b) for a,b in zip([None]+indices, indices)]
#input_slices[2] = deepcopy(input_slices[1])
input_slice_kern = GPy.kern.kern(9,
[
RBF(i_s_dim_list[0], np.random.rand(), np.random.rand(i_s_dim_list[0]), ARD=True),
RBF(i_s_dim_list[1], np.random.rand(), np.random.rand(i_s_dim_list[1]), ARD=True),
Linear(i_s_dim_list[2], np.random.rand(i_s_dim_list[2]), ARD=True)
],
input_slices = input_slices
)
self.kerns = (
input_slice_kern,
# (GPy.kern.rbf(self.input_dim, ARD=True) +
# GPy.kern.linear(self.input_dim, ARD=True) +
# GPy.kern.bias(self.input_dim) +
# GPy.kern.white(self.input_dim)),
# (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
# GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
# GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) +
# GPy.kern.bias(self.input_dim) +
# GPy.kern.white(self.input_dim)),
# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True),
# GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True),
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim),
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim),
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
# GPy.kern.bias(self.input_dim), GPy.kern.white(self.input_dim),
)
self.q_x_mean = np.random.randn(self.input_dim)
self.q_x_variance = np.exp(np.random.randn(self.input_dim))
self.q_x_samples = np.random.randn(self.Nsamples, self.input_dim) * np.sqrt(self.q_x_variance) + self.q_x_mean
self.Z = np.random.randn(self.num_inducing, self.input_dim)
self.q_x_mean.shape = (1, self.input_dim)
self.q_x_variance.shape = (1, self.input_dim)
def test_psi0(self):
for kern in self.kerns:
psi0 = kern.psi0(self.Z, self.q_x_mean, self.q_x_variance)
Kdiag = kern.Kdiag(self.q_x_samples)
self.assertAlmostEqual(psi0, np.mean(Kdiag), 1)
# print kern.parts[0].name, np.allclose(psi0, np.mean(Kdiag))
def test_psi1(self):
for kern in self.kerns:
Nsamples = np.floor(self.Nsamples/300.)
psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
K_ = np.zeros((Nsamples, self.num_inducing))
diffs = []
for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
K = kern.K(q_x_sample_stripe[:Nsamples], self.Z)
K_ += K
diffs.append((np.abs(psi1 - (K_ / (i + 1)))**2).mean())
K_ /= self.Nsamples / Nsamples
msg = "psi1: " + "+".join([p.name + ard(p) for p in kern.parts])
try:
import pylab
pylab.figure(msg)
pylab.plot(diffs)
# print msg, ((psi1.squeeze() - K_)**2).mean() < .01
self.assertTrue(((psi1.squeeze() - K_)**2).mean() < .01,
msg=msg + ": not matching")
# sys.stdout.write(".")
except:
# import ipdb;ipdb.set_trace()
# kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
# sys.stdout.write("E") # msg + ": not matching"
pass
def test_psi2(self):
for kern in self.kerns:
Nsamples = self.Nsamples/300.
psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
K_ = np.zeros((self.num_inducing, self.num_inducing))
diffs = []
for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
K = kern.K(q_x_sample_stripe, self.Z)
K = (K[:, :, None] * K[:, None, :]).mean(0)
K_ += K
diffs.append(((psi2 - (K_ / (i + 1)))**2).mean())
K_ /= self.Nsamples / Nsamples
msg = "psi2: {}".format("+".join([p.name + ard(p) for p in kern.parts]))
try:
import pylab
pylab.figure(msg)
pylab.plot(diffs)
# print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1)
self.assertTrue(np.allclose(psi2.squeeze(), K_,
rtol=1e-1, atol=.1),
msg=msg + ": not matching")
# sys.stdout.write(".")
except:
# import ipdb;ipdb.set_trace()
# kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
# sys.stdout.write("E")
print msg + ": not matching"
pass
if __name__ == "__main__":
sys.argv = ['',
#'Test.test_psi0',
'Test.test_psi1',
'Test.test_psi2',
]
unittest.main()

View file

@ -0,0 +1,152 @@
'''
Created on 22 Apr 2013
@author: maxz
'''
import unittest
import numpy
import GPy
import itertools
from GPy.core import Model
class PsiStatModel(Model):
def __init__(self, which, X, X_variance, Z, num_inducing, kernel):
self.which = which
self.X = X
self.X_variance = X_variance
self.Z = Z
self.N, self.input_dim = X.shape
self.num_inducing, input_dim = Z.shape
assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
self.kern = kernel
super(PsiStatModel, self).__init__()
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
def _get_param_names(self):
Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.input_dim))]
Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.num_inducing), range(self.input_dim))]
return Xnames + Znames + self.kern._get_param_names()
def _get_params(self):
return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()])
def _set_params(self, x, save_old=True, save_count=0):
start, end = 0, self.X.size
self.X = x[start:end].reshape(self.N, self.input_dim)
start, end = end, end + self.X_variance.size
self.X_variance = x[start: end].reshape(self.N, self.input_dim)
start, end = end, end + self.Z.size
self.Z = x[start: end].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(x[end:])
def log_likelihood(self):
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
def _log_likelihood_gradients(self):
psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
try:
psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
except AttributeError:
psiZ = numpy.zeros(self.num_inducing * self.input_dim)
thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten()
return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad))
class DPsiStatTest(unittest.TestCase):
input_dim = 5
N = 50
num_inducing = 10
input_dim = 20
X = numpy.random.randn(N, input_dim)
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
Z = numpy.random.permutation(X)[:num_inducing]
Y = X.dot(numpy.random.randn(input_dim, input_dim))
# kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)]
kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim),
GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim),
GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)]
def testPsi0(self):
for k in self.kernels:
m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts)))
# def testPsi1(self):
# for k in self.kernels:
# m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
# num_inducing=self.num_inducing, kernel=k)
# assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_lin(self):
k = self.kernels[0]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_lin_bia(self):
k = self.kernels[3]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_rbf(self):
k = self.kernels[1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_rbf_bia(self):
k = self.kernels[-1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_bia(self):
k = self.kernels[2]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
if __name__ == "__main__":
import sys
interactive = 'i' in sys.argv
if interactive:
# N, num_inducing, input_dim, input_dim = 30, 5, 4, 30
# X = numpy.random.rand(N, input_dim)
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# K = k.K(X)
# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, input_dim).T
# Y -= Y.mean(axis=0)
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
# m.randomize()
# # self.assertTrue(m.checkgrad())
numpy.random.seed(0)
input_dim = 5
N = 50
num_inducing = 10
D = 15
X = numpy.random.randn(N, input_dim)
X_var = .5 * numpy.ones_like(X) + .1 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
Z = numpy.random.permutation(X)[:num_inducing]
Y = X.dot(numpy.random.randn(input_dim, D))
# kernel = GPy.kern.bias(input_dim)
#
# kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim),
# GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim),
# GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)]
# for k in kernels:
# m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# num_inducing=num_inducing, kernel=k)
# assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
#
# m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
# num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim))
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# num_inducing=num_inducing, kernel=kernel)
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# num_inducing=num_inducing, kernel=kernel)
# m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim))
m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)))
# + GPy.kern.bias(input_dim))
# m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim))
else:
unittest.main()

View file

@ -0,0 +1,45 @@
# Copyright (c) 2012, Nicolo Fusi, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
from GPy.models.sparse_gplvm import SparseGPLVM
class sparse_GPLVMTests(unittest.TestCase):
def test_bias_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_linear_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

Some files were not shown because too many files have changed in this diff Show more